ScheduleDAGRRList.cpp revision b9a3f3d2d395ff32a4f40fe6642d5f0e38a2c0be
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "pre-RA-sched"
19#include "ScheduleDAGSDNodes.h"
20#include "llvm/InlineAsm.h"
21#include "llvm/CodeGen/SchedulerRegistry.h"
22#include "llvm/CodeGen/SelectionDAGISel.h"
23#include "llvm/Target/TargetRegisterInfo.h"
24#include "llvm/Target/TargetData.h"
25#include "llvm/Target/TargetMachine.h"
26#include "llvm/Target/TargetInstrInfo.h"
27#include "llvm/ADT/PriorityQueue.h"
28#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
30#include "llvm/ADT/STLExtras.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/ErrorHandling.h"
33#include "llvm/Support/raw_ostream.h"
34#include <climits>
35using namespace llvm;
36
37STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
38STATISTIC(NumUnfolds,    "Number of nodes unfolded");
39STATISTIC(NumDups,       "Number of duplicated nodes");
40STATISTIC(NumPRCopies,   "Number of physical register copies");
41
42static RegisterScheduler
43  burrListDAGScheduler("list-burr",
44                       "Bottom-up register reduction list scheduling",
45                       createBURRListDAGScheduler);
46static RegisterScheduler
47  tdrListrDAGScheduler("list-tdrr",
48                       "Top-down register reduction list scheduling",
49                       createTDRRListDAGScheduler);
50static RegisterScheduler
51  sourceListDAGScheduler("source",
52                         "Similar to list-burr but schedules in source "
53                         "order when possible",
54                         createSourceListDAGScheduler);
55
56namespace {
57//===----------------------------------------------------------------------===//
58/// ScheduleDAGRRList - The actual register reduction list scheduler
59/// implementation.  This supports both top-down and bottom-up scheduling.
60///
61class ScheduleDAGRRList : public ScheduleDAGSDNodes {
62private:
63  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
64  /// it is top-down.
65  bool isBottomUp;
66
67  /// AvailableQueue - The priority queue to use for the available SUnits.
68  SchedulingPriorityQueue *AvailableQueue;
69
70  /// LiveRegDefs - A set of physical registers and their definition
71  /// that are "live". These nodes must be scheduled before any other nodes that
72  /// modifies the registers can be scheduled.
73  unsigned NumLiveRegs;
74  std::vector<SUnit*> LiveRegDefs;
75  std::vector<unsigned> LiveRegCycles;
76
77  /// Topo - A topological ordering for SUnits which permits fast IsReachable
78  /// and similar queries.
79  ScheduleDAGTopologicalSort Topo;
80
81public:
82  ScheduleDAGRRList(MachineFunction &mf,
83                    bool isbottomup,
84                    SchedulingPriorityQueue *availqueue)
85    : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup),
86      AvailableQueue(availqueue), Topo(SUnits) {
87    }
88
89  ~ScheduleDAGRRList() {
90    delete AvailableQueue;
91  }
92
93  void Schedule();
94
95  /// IsReachable - Checks if SU is reachable from TargetSU.
96  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
97    return Topo.IsReachable(SU, TargetSU);
98  }
99
100  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
101  /// create a cycle.
102  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
103    return Topo.WillCreateCycle(SU, TargetSU);
104  }
105
106  /// AddPred - adds a predecessor edge to SUnit SU.
107  /// This returns true if this is a new predecessor.
108  /// Updates the topological ordering if required.
109  void AddPred(SUnit *SU, const SDep &D) {
110    Topo.AddPred(SU, D.getSUnit());
111    SU->addPred(D);
112  }
113
114  /// RemovePred - removes a predecessor edge from SUnit SU.
115  /// This returns true if an edge was removed.
116  /// Updates the topological ordering if required.
117  void RemovePred(SUnit *SU, const SDep &D) {
118    Topo.RemovePred(SU, D.getSUnit());
119    SU->removePred(D);
120  }
121
122private:
123  void ReleasePred(SUnit *SU, const SDep *PredEdge);
124  void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
125  void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
126  void ReleaseSuccessors(SUnit *SU);
127  void CapturePred(SDep *PredEdge);
128  void ScheduleNodeBottomUp(SUnit*, unsigned);
129  void ScheduleNodeTopDown(SUnit*, unsigned);
130  void UnscheduleNodeBottomUp(SUnit*);
131  void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
132  SUnit *CopyAndMoveSuccessors(SUnit*);
133  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
134                                const TargetRegisterClass*,
135                                const TargetRegisterClass*,
136                                SmallVector<SUnit*, 2>&);
137  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
138  void ListScheduleTopDown();
139  void ListScheduleBottomUp();
140
141
142  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
143  /// Updates the topological ordering if required.
144  SUnit *CreateNewSUnit(SDNode *N) {
145    unsigned NumSUnits = SUnits.size();
146    SUnit *NewNode = NewSUnit(N);
147    // Update the topological ordering.
148    if (NewNode->NodeNum >= NumSUnits)
149      Topo.InitDAGTopologicalSorting();
150    return NewNode;
151  }
152
153  /// CreateClone - Creates a new SUnit from an existing one.
154  /// Updates the topological ordering if required.
155  SUnit *CreateClone(SUnit *N) {
156    unsigned NumSUnits = SUnits.size();
157    SUnit *NewNode = Clone(N);
158    // Update the topological ordering.
159    if (NewNode->NodeNum >= NumSUnits)
160      Topo.InitDAGTopologicalSorting();
161    return NewNode;
162  }
163
164  /// ForceUnitLatencies - Return true, since register-pressure-reducing
165  /// scheduling doesn't need actual latency information.
166  bool ForceUnitLatencies() const { return true; }
167};
168}  // end anonymous namespace
169
170
171/// Schedule - Schedule the DAG using list scheduling.
172void ScheduleDAGRRList::Schedule() {
173  DEBUG(dbgs() << "********** List Scheduling **********\n");
174
175  NumLiveRegs = 0;
176  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
177  LiveRegCycles.resize(TRI->getNumRegs(), 0);
178
179  // Build the scheduling graph.
180  BuildSchedGraph(NULL);
181
182  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
183          SUnits[su].dumpAll(this));
184  Topo.InitDAGTopologicalSorting();
185
186  AvailableQueue->initNodes(SUnits);
187
188  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
189  if (isBottomUp)
190    ListScheduleBottomUp();
191  else
192    ListScheduleTopDown();
193
194  AvailableQueue->releaseState();
195}
196
197//===----------------------------------------------------------------------===//
198//  Bottom-Up Scheduling
199//===----------------------------------------------------------------------===//
200
201/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
202/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
203void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
204  SUnit *PredSU = PredEdge->getSUnit();
205
206#ifndef NDEBUG
207  if (PredSU->NumSuccsLeft == 0) {
208    dbgs() << "*** Scheduling failed! ***\n";
209    PredSU->dump(this);
210    dbgs() << " has been released too many times!\n";
211    llvm_unreachable(0);
212  }
213#endif
214  --PredSU->NumSuccsLeft;
215
216  // If all the node's successors are scheduled, this node is ready
217  // to be scheduled. Ignore the special EntrySU node.
218  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
219    PredSU->isAvailable = true;
220    AvailableQueue->push(PredSU);
221  }
222}
223
224void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
225  // Bottom up: release predecessors
226  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
227       I != E; ++I) {
228    ReleasePred(SU, &*I);
229    if (I->isAssignedRegDep()) {
230      // This is a physical register dependency and it's impossible or
231      // expensive to copy the register. Make sure nothing that can
232      // clobber the register is scheduled between the predecessor and
233      // this node.
234      if (!LiveRegDefs[I->getReg()]) {
235        ++NumLiveRegs;
236        LiveRegDefs[I->getReg()] = I->getSUnit();
237        LiveRegCycles[I->getReg()] = CurCycle;
238      }
239    }
240  }
241}
242
243/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
244/// count of its predecessors. If a predecessor pending count is zero, add it to
245/// the Available queue.
246void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
247  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
248  DEBUG(SU->dump(this));
249
250  assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
251  SU->setHeightToAtLeast(CurCycle);
252  Sequence.push_back(SU);
253
254  ReleasePredecessors(SU, CurCycle);
255
256  // Release all the implicit physical register defs that are live.
257  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
258       I != E; ++I) {
259    if (I->isAssignedRegDep()) {
260      if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
261        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
262        assert(LiveRegDefs[I->getReg()] == SU &&
263               "Physical register dependency violated?");
264        --NumLiveRegs;
265        LiveRegDefs[I->getReg()] = NULL;
266        LiveRegCycles[I->getReg()] = 0;
267      }
268    }
269  }
270
271  SU->isScheduled = true;
272  AvailableQueue->ScheduledNode(SU);
273}
274
275/// CapturePred - This does the opposite of ReleasePred. Since SU is being
276/// unscheduled, incrcease the succ left count of its predecessors. Remove
277/// them from AvailableQueue if necessary.
278void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
279  SUnit *PredSU = PredEdge->getSUnit();
280  if (PredSU->isAvailable) {
281    PredSU->isAvailable = false;
282    if (!PredSU->isPending)
283      AvailableQueue->remove(PredSU);
284  }
285
286  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
287  ++PredSU->NumSuccsLeft;
288}
289
290/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
291/// its predecessor states to reflect the change.
292void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
293  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
294  DEBUG(SU->dump(this));
295
296  AvailableQueue->UnscheduledNode(SU);
297
298  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
299       I != E; ++I) {
300    CapturePred(&*I);
301    if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) {
302      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
303      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
304             "Physical register dependency violated?");
305      --NumLiveRegs;
306      LiveRegDefs[I->getReg()] = NULL;
307      LiveRegCycles[I->getReg()] = 0;
308    }
309  }
310
311  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
312       I != E; ++I) {
313    if (I->isAssignedRegDep()) {
314      if (!LiveRegDefs[I->getReg()]) {
315        LiveRegDefs[I->getReg()] = SU;
316        ++NumLiveRegs;
317      }
318      if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
319        LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
320    }
321  }
322
323  SU->setHeightDirty();
324  SU->isScheduled = false;
325  SU->isAvailable = true;
326  AvailableQueue->push(SU);
327}
328
329/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
330/// BTCycle in order to schedule a specific node.
331void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
332                                          unsigned &CurCycle) {
333  SUnit *OldSU = NULL;
334  while (CurCycle > BtCycle) {
335    OldSU = Sequence.back();
336    Sequence.pop_back();
337    if (SU->isSucc(OldSU))
338      // Don't try to remove SU from AvailableQueue.
339      SU->isAvailable = false;
340    UnscheduleNodeBottomUp(OldSU);
341    --CurCycle;
342  }
343
344  assert(!SU->isSucc(OldSU) && "Something is wrong!");
345
346  ++NumBacktracks;
347}
348
349static bool isOperandOf(const SUnit *SU, SDNode *N) {
350  for (const SDNode *SUNode = SU->getNode(); SUNode;
351       SUNode = SUNode->getFlaggedNode()) {
352    if (SUNode->isOperandOf(N))
353      return true;
354  }
355  return false;
356}
357
358/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
359/// successors to the newly created node.
360SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
361  if (SU->getNode()->getFlaggedNode())
362    return NULL;
363
364  SDNode *N = SU->getNode();
365  if (!N)
366    return NULL;
367
368  SUnit *NewSU;
369  bool TryUnfold = false;
370  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
371    EVT VT = N->getValueType(i);
372    if (VT == MVT::Flag)
373      return NULL;
374    else if (VT == MVT::Other)
375      TryUnfold = true;
376  }
377  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
378    const SDValue &Op = N->getOperand(i);
379    EVT VT = Op.getNode()->getValueType(Op.getResNo());
380    if (VT == MVT::Flag)
381      return NULL;
382  }
383
384  if (TryUnfold) {
385    SmallVector<SDNode*, 2> NewNodes;
386    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
387      return NULL;
388
389    DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
390    assert(NewNodes.size() == 2 && "Expected a load folding node!");
391
392    N = NewNodes[1];
393    SDNode *LoadNode = NewNodes[0];
394    unsigned NumVals = N->getNumValues();
395    unsigned OldNumVals = SU->getNode()->getNumValues();
396    for (unsigned i = 0; i != NumVals; ++i)
397      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
398    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
399                                   SDValue(LoadNode, 1));
400
401    // LoadNode may already exist. This can happen when there is another
402    // load from the same location and producing the same type of value
403    // but it has different alignment or volatileness.
404    bool isNewLoad = true;
405    SUnit *LoadSU;
406    if (LoadNode->getNodeId() != -1) {
407      LoadSU = &SUnits[LoadNode->getNodeId()];
408      isNewLoad = false;
409    } else {
410      LoadSU = CreateNewSUnit(LoadNode);
411      LoadNode->setNodeId(LoadSU->NodeNum);
412      ComputeLatency(LoadSU);
413    }
414
415    SUnit *NewSU = CreateNewSUnit(N);
416    assert(N->getNodeId() == -1 && "Node already inserted!");
417    N->setNodeId(NewSU->NodeNum);
418
419    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
420    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
421      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
422        NewSU->isTwoAddress = true;
423        break;
424      }
425    }
426    if (TID.isCommutable())
427      NewSU->isCommutable = true;
428    ComputeLatency(NewSU);
429
430    // Record all the edges to and from the old SU, by category.
431    SmallVector<SDep, 4> ChainPreds;
432    SmallVector<SDep, 4> ChainSuccs;
433    SmallVector<SDep, 4> LoadPreds;
434    SmallVector<SDep, 4> NodePreds;
435    SmallVector<SDep, 4> NodeSuccs;
436    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
437         I != E; ++I) {
438      if (I->isCtrl())
439        ChainPreds.push_back(*I);
440      else if (isOperandOf(I->getSUnit(), LoadNode))
441        LoadPreds.push_back(*I);
442      else
443        NodePreds.push_back(*I);
444    }
445    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
446         I != E; ++I) {
447      if (I->isCtrl())
448        ChainSuccs.push_back(*I);
449      else
450        NodeSuccs.push_back(*I);
451    }
452
453    // Now assign edges to the newly-created nodes.
454    for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
455      const SDep &Pred = ChainPreds[i];
456      RemovePred(SU, Pred);
457      if (isNewLoad)
458        AddPred(LoadSU, Pred);
459    }
460    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
461      const SDep &Pred = LoadPreds[i];
462      RemovePred(SU, Pred);
463      if (isNewLoad)
464        AddPred(LoadSU, Pred);
465    }
466    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
467      const SDep &Pred = NodePreds[i];
468      RemovePred(SU, Pred);
469      AddPred(NewSU, Pred);
470    }
471    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
472      SDep D = NodeSuccs[i];
473      SUnit *SuccDep = D.getSUnit();
474      D.setSUnit(SU);
475      RemovePred(SuccDep, D);
476      D.setSUnit(NewSU);
477      AddPred(SuccDep, D);
478    }
479    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
480      SDep D = ChainSuccs[i];
481      SUnit *SuccDep = D.getSUnit();
482      D.setSUnit(SU);
483      RemovePred(SuccDep, D);
484      if (isNewLoad) {
485        D.setSUnit(LoadSU);
486        AddPred(SuccDep, D);
487      }
488    }
489
490    // Add a data dependency to reflect that NewSU reads the value defined
491    // by LoadSU.
492    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
493
494    if (isNewLoad)
495      AvailableQueue->addNode(LoadSU);
496    AvailableQueue->addNode(NewSU);
497
498    ++NumUnfolds;
499
500    if (NewSU->NumSuccsLeft == 0) {
501      NewSU->isAvailable = true;
502      return NewSU;
503    }
504    SU = NewSU;
505  }
506
507  DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
508  NewSU = CreateClone(SU);
509
510  // New SUnit has the exact same predecessors.
511  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
512       I != E; ++I)
513    if (!I->isArtificial())
514      AddPred(NewSU, *I);
515
516  // Only copy scheduled successors. Cut them from old node's successor
517  // list and move them over.
518  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
519  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
520       I != E; ++I) {
521    if (I->isArtificial())
522      continue;
523    SUnit *SuccSU = I->getSUnit();
524    if (SuccSU->isScheduled) {
525      SDep D = *I;
526      D.setSUnit(NewSU);
527      AddPred(SuccSU, D);
528      D.setSUnit(SU);
529      DelDeps.push_back(std::make_pair(SuccSU, D));
530    }
531  }
532  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
533    RemovePred(DelDeps[i].first, DelDeps[i].second);
534
535  AvailableQueue->updateNode(SU);
536  AvailableQueue->addNode(NewSU);
537
538  ++NumDups;
539  return NewSU;
540}
541
542/// InsertCopiesAndMoveSuccs - Insert register copies and move all
543/// scheduled successors of the given SUnit to the last copy.
544void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
545                                               const TargetRegisterClass *DestRC,
546                                               const TargetRegisterClass *SrcRC,
547                                               SmallVector<SUnit*, 2> &Copies) {
548  SUnit *CopyFromSU = CreateNewSUnit(NULL);
549  CopyFromSU->CopySrcRC = SrcRC;
550  CopyFromSU->CopyDstRC = DestRC;
551
552  SUnit *CopyToSU = CreateNewSUnit(NULL);
553  CopyToSU->CopySrcRC = DestRC;
554  CopyToSU->CopyDstRC = SrcRC;
555
556  // Only copy scheduled successors. Cut them from old node's successor
557  // list and move them over.
558  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
559  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
560       I != E; ++I) {
561    if (I->isArtificial())
562      continue;
563    SUnit *SuccSU = I->getSUnit();
564    if (SuccSU->isScheduled) {
565      SDep D = *I;
566      D.setSUnit(CopyToSU);
567      AddPred(SuccSU, D);
568      DelDeps.push_back(std::make_pair(SuccSU, *I));
569    }
570  }
571  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
572    RemovePred(DelDeps[i].first, DelDeps[i].second);
573
574  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
575  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
576
577  AvailableQueue->updateNode(SU);
578  AvailableQueue->addNode(CopyFromSU);
579  AvailableQueue->addNode(CopyToSU);
580  Copies.push_back(CopyFromSU);
581  Copies.push_back(CopyToSU);
582
583  ++NumPRCopies;
584}
585
586/// getPhysicalRegisterVT - Returns the ValueType of the physical register
587/// definition of the specified node.
588/// FIXME: Move to SelectionDAG?
589static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
590                                 const TargetInstrInfo *TII) {
591  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
592  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
593  unsigned NumRes = TID.getNumDefs();
594  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
595    if (Reg == *ImpDef)
596      break;
597    ++NumRes;
598  }
599  return N->getValueType(NumRes);
600}
601
602/// CheckForLiveRegDef - Return true and update live register vector if the
603/// specified register def of the specified SUnit clobbers any "live" registers.
604static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
605                               std::vector<SUnit*> &LiveRegDefs,
606                               SmallSet<unsigned, 4> &RegAdded,
607                               SmallVector<unsigned, 4> &LRegs,
608                               const TargetRegisterInfo *TRI) {
609  bool Added = false;
610  if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
611    if (RegAdded.insert(Reg)) {
612      LRegs.push_back(Reg);
613      Added = true;
614    }
615  }
616  for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
617    if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
618      if (RegAdded.insert(*Alias)) {
619        LRegs.push_back(*Alias);
620        Added = true;
621      }
622    }
623  return Added;
624}
625
626/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
627/// scheduling of the given node to satisfy live physical register dependencies.
628/// If the specific node is the last one that's available to schedule, do
629/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
630bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
631                                                 SmallVector<unsigned, 4> &LRegs){
632  if (NumLiveRegs == 0)
633    return false;
634
635  SmallSet<unsigned, 4> RegAdded;
636  // If this node would clobber any "live" register, then it's not ready.
637  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
638       I != E; ++I) {
639    if (I->isAssignedRegDep())
640      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
641                         RegAdded, LRegs, TRI);
642  }
643
644  for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
645    if (Node->getOpcode() == ISD::INLINEASM) {
646      // Inline asm can clobber physical defs.
647      unsigned NumOps = Node->getNumOperands();
648      if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
649        --NumOps;  // Ignore the flag operand.
650
651      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
652        unsigned Flags =
653          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
654        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
655
656        ++i; // Skip the ID value.
657        if (InlineAsm::isRegDefKind(Flags) ||
658            InlineAsm::isRegDefEarlyClobberKind(Flags)) {
659          // Check for def of register or earlyclobber register.
660          for (; NumVals; --NumVals, ++i) {
661            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
662            if (TargetRegisterInfo::isPhysicalRegister(Reg))
663              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
664          }
665        } else
666          i += NumVals;
667      }
668      continue;
669    }
670
671    if (!Node->isMachineOpcode())
672      continue;
673    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
674    if (!TID.ImplicitDefs)
675      continue;
676    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
677      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
678  }
679  return !LRegs.empty();
680}
681
682
683/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
684/// schedulers.
685void ScheduleDAGRRList::ListScheduleBottomUp() {
686  unsigned CurCycle = 0;
687
688  // Release any predecessors of the special Exit node.
689  ReleasePredecessors(&ExitSU, CurCycle);
690
691  // Add root to Available queue.
692  if (!SUnits.empty()) {
693    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
694    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
695    RootSU->isAvailable = true;
696    AvailableQueue->push(RootSU);
697  }
698
699  // While Available queue is not empty, grab the node with the highest
700  // priority. If it is not ready put it back.  Schedule the node.
701  SmallVector<SUnit*, 4> NotReady;
702  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
703  Sequence.reserve(SUnits.size());
704  while (!AvailableQueue->empty()) {
705    bool Delayed = false;
706    LRegsMap.clear();
707    SUnit *CurSU = AvailableQueue->pop();
708    while (CurSU) {
709      SmallVector<unsigned, 4> LRegs;
710      if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
711        break;
712      Delayed = true;
713      LRegsMap.insert(std::make_pair(CurSU, LRegs));
714
715      CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
716      NotReady.push_back(CurSU);
717      CurSU = AvailableQueue->pop();
718    }
719
720    // All candidates are delayed due to live physical reg dependencies.
721    // Try backtracking, code duplication, or inserting cross class copies
722    // to resolve it.
723    if (Delayed && !CurSU) {
724      for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
725        SUnit *TrySU = NotReady[i];
726        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
727
728        // Try unscheduling up to the point where it's safe to schedule
729        // this node.
730        unsigned LiveCycle = CurCycle;
731        for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
732          unsigned Reg = LRegs[j];
733          unsigned LCycle = LiveRegCycles[Reg];
734          LiveCycle = std::min(LiveCycle, LCycle);
735        }
736        SUnit *OldSU = Sequence[LiveCycle];
737        if (!WillCreateCycle(TrySU, OldSU))  {
738          BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
739          // Force the current node to be scheduled before the node that
740          // requires the physical reg dep.
741          if (OldSU->isAvailable) {
742            OldSU->isAvailable = false;
743            AvailableQueue->remove(OldSU);
744          }
745          AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
746                              /*Reg=*/0, /*isNormalMemory=*/false,
747                              /*isMustAlias=*/false, /*isArtificial=*/true));
748          // If one or more successors has been unscheduled, then the current
749          // node is no longer avaialable. Schedule a successor that's now
750          // available instead.
751          if (!TrySU->isAvailable)
752            CurSU = AvailableQueue->pop();
753          else {
754            CurSU = TrySU;
755            TrySU->isPending = false;
756            NotReady.erase(NotReady.begin()+i);
757          }
758          break;
759        }
760      }
761
762      if (!CurSU) {
763        // Can't backtrack. If it's too expensive to copy the value, then try
764        // duplicate the nodes that produces these "too expensive to copy"
765        // values to break the dependency. In case even that doesn't work,
766        // insert cross class copies.
767        // If it's not too expensive, i.e. cost != -1, issue copies.
768        SUnit *TrySU = NotReady[0];
769        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
770        assert(LRegs.size() == 1 && "Can't handle this yet!");
771        unsigned Reg = LRegs[0];
772        SUnit *LRDef = LiveRegDefs[Reg];
773        EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
774        const TargetRegisterClass *RC =
775          TRI->getPhysicalRegisterRegClass(Reg, VT);
776        const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
777
778        // If cross copy register class is null, then it must be possible copy
779        // the value directly. Do not try duplicate the def.
780        SUnit *NewDef = 0;
781        if (DestRC)
782          NewDef = CopyAndMoveSuccessors(LRDef);
783        else
784          DestRC = RC;
785        if (!NewDef) {
786          // Issue copies, these can be expensive cross register class copies.
787          SmallVector<SUnit*, 2> Copies;
788          InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
789          DEBUG(dbgs() << "Adding an edge from SU #" << TrySU->NodeNum
790                       << " to SU #" << Copies.front()->NodeNum << "\n");
791          AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
792                              /*Reg=*/0, /*isNormalMemory=*/false,
793                              /*isMustAlias=*/false,
794                              /*isArtificial=*/true));
795          NewDef = Copies.back();
796        }
797
798        DEBUG(dbgs() << "Adding an edge from SU #" << NewDef->NodeNum
799                     << " to SU #" << TrySU->NodeNum << "\n");
800        LiveRegDefs[Reg] = NewDef;
801        AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
802                             /*Reg=*/0, /*isNormalMemory=*/false,
803                             /*isMustAlias=*/false,
804                             /*isArtificial=*/true));
805        TrySU->isAvailable = false;
806        CurSU = NewDef;
807      }
808
809      assert(CurSU && "Unable to resolve live physical register dependencies!");
810    }
811
812    // Add the nodes that aren't ready back onto the available list.
813    for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
814      NotReady[i]->isPending = false;
815      // May no longer be available due to backtracking.
816      if (NotReady[i]->isAvailable)
817        AvailableQueue->push(NotReady[i]);
818    }
819    NotReady.clear();
820
821    if (CurSU)
822      ScheduleNodeBottomUp(CurSU, CurCycle);
823    ++CurCycle;
824  }
825
826  // Reverse the order if it is bottom up.
827  std::reverse(Sequence.begin(), Sequence.end());
828
829#ifndef NDEBUG
830  VerifySchedule(isBottomUp);
831#endif
832}
833
834//===----------------------------------------------------------------------===//
835//  Top-Down Scheduling
836//===----------------------------------------------------------------------===//
837
838/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
839/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
840void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
841  SUnit *SuccSU = SuccEdge->getSUnit();
842
843#ifndef NDEBUG
844  if (SuccSU->NumPredsLeft == 0) {
845    dbgs() << "*** Scheduling failed! ***\n";
846    SuccSU->dump(this);
847    dbgs() << " has been released too many times!\n";
848    llvm_unreachable(0);
849  }
850#endif
851  --SuccSU->NumPredsLeft;
852
853  // If all the node's predecessors are scheduled, this node is ready
854  // to be scheduled. Ignore the special ExitSU node.
855  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
856    SuccSU->isAvailable = true;
857    AvailableQueue->push(SuccSU);
858  }
859}
860
861void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
862  // Top down: release successors
863  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
864       I != E; ++I) {
865    assert(!I->isAssignedRegDep() &&
866           "The list-tdrr scheduler doesn't yet support physreg dependencies!");
867
868    ReleaseSucc(SU, &*I);
869  }
870}
871
872/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
873/// count of its successors. If a successor pending count is zero, add it to
874/// the Available queue.
875void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
876  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
877  DEBUG(SU->dump(this));
878
879  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
880  SU->setDepthToAtLeast(CurCycle);
881  Sequence.push_back(SU);
882
883  ReleaseSuccessors(SU);
884  SU->isScheduled = true;
885  AvailableQueue->ScheduledNode(SU);
886}
887
888/// ListScheduleTopDown - The main loop of list scheduling for top-down
889/// schedulers.
890void ScheduleDAGRRList::ListScheduleTopDown() {
891  unsigned CurCycle = 0;
892
893  // Release any successors of the special Entry node.
894  ReleaseSuccessors(&EntrySU);
895
896  // All leaves to Available queue.
897  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
898    // It is available if it has no predecessors.
899    if (SUnits[i].Preds.empty()) {
900      AvailableQueue->push(&SUnits[i]);
901      SUnits[i].isAvailable = true;
902    }
903  }
904
905  // While Available queue is not empty, grab the node with the highest
906  // priority. If it is not ready put it back.  Schedule the node.
907  Sequence.reserve(SUnits.size());
908  while (!AvailableQueue->empty()) {
909    SUnit *CurSU = AvailableQueue->pop();
910
911    if (CurSU)
912      ScheduleNodeTopDown(CurSU, CurCycle);
913    ++CurCycle;
914  }
915
916#ifndef NDEBUG
917  VerifySchedule(isBottomUp);
918#endif
919}
920
921
922//===----------------------------------------------------------------------===//
923//                RegReductionPriorityQueue Implementation
924//===----------------------------------------------------------------------===//
925//
926// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
927// to reduce register pressure.
928//
929namespace {
930  template<class SF>
931  class RegReductionPriorityQueue;
932
933  /// Sorting functions for the Available queue.
934  struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
935    RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
936    bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
937    bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
938
939    bool operator()(const SUnit* left, const SUnit* right) const;
940  };
941
942  struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
943    RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
944    td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
945    td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
946
947    bool operator()(const SUnit* left, const SUnit* right) const;
948  };
949
950  struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
951    RegReductionPriorityQueue<src_ls_rr_sort> *SPQ;
952    src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq)
953      : SPQ(spq) {}
954    src_ls_rr_sort(const src_ls_rr_sort &RHS)
955      : SPQ(RHS.SPQ) {}
956
957    bool operator()(const SUnit* left, const SUnit* right) const;
958  };
959}  // end anonymous namespace
960
961/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
962/// Smaller number is the higher priority.
963static unsigned
964CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
965  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
966  if (SethiUllmanNumber != 0)
967    return SethiUllmanNumber;
968
969  unsigned Extra = 0;
970  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
971       I != E; ++I) {
972    if (I->isCtrl()) continue;  // ignore chain preds
973    SUnit *PredSU = I->getSUnit();
974    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
975    if (PredSethiUllman > SethiUllmanNumber) {
976      SethiUllmanNumber = PredSethiUllman;
977      Extra = 0;
978    } else if (PredSethiUllman == SethiUllmanNumber)
979      ++Extra;
980  }
981
982  SethiUllmanNumber += Extra;
983
984  if (SethiUllmanNumber == 0)
985    SethiUllmanNumber = 1;
986
987  return SethiUllmanNumber;
988}
989
990namespace {
991  template<class SF>
992  class RegReductionPriorityQueue : public SchedulingPriorityQueue {
993    PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue;
994    unsigned currentQueueId;
995
996  protected:
997    // SUnits - The SUnits for the current graph.
998    std::vector<SUnit> *SUnits;
999
1000    const TargetInstrInfo *TII;
1001    const TargetRegisterInfo *TRI;
1002    ScheduleDAGRRList *scheduleDAG;
1003
1004    // SethiUllmanNumbers - The SethiUllman number for each node.
1005    std::vector<unsigned> SethiUllmanNumbers;
1006
1007  public:
1008    RegReductionPriorityQueue(const TargetInstrInfo *tii,
1009                              const TargetRegisterInfo *tri)
1010      : Queue(SF(this)), currentQueueId(0),
1011        TII(tii), TRI(tri), scheduleDAG(NULL) {}
1012
1013    void initNodes(std::vector<SUnit> &sunits) {
1014      SUnits = &sunits;
1015      // Add pseudo dependency edges for two-address nodes.
1016      AddPseudoTwoAddrDeps();
1017      // Reroute edges to nodes with multiple uses.
1018      PrescheduleNodesWithMultipleUses();
1019      // Calculate node priorities.
1020      CalculateSethiUllmanNumbers();
1021    }
1022
1023    void addNode(const SUnit *SU) {
1024      unsigned SUSize = SethiUllmanNumbers.size();
1025      if (SUnits->size() > SUSize)
1026        SethiUllmanNumbers.resize(SUSize*2, 0);
1027      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1028    }
1029
1030    void updateNode(const SUnit *SU) {
1031      SethiUllmanNumbers[SU->NodeNum] = 0;
1032      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1033    }
1034
1035    void releaseState() {
1036      SUnits = 0;
1037      SethiUllmanNumbers.clear();
1038    }
1039
1040    unsigned getNodePriority(const SUnit *SU) const {
1041      assert(SU->NodeNum < SethiUllmanNumbers.size());
1042      unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1043      if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1044        // CopyToReg should be close to its uses to facilitate coalescing and
1045        // avoid spilling.
1046        return 0;
1047      if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1048          Opc == TargetOpcode::SUBREG_TO_REG ||
1049          Opc == TargetOpcode::INSERT_SUBREG)
1050        // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1051        // close to their uses to facilitate coalescing.
1052        return 0;
1053      if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1054        // If SU does not have a register use, i.e. it doesn't produce a value
1055        // that would be consumed (e.g. store), then it terminates a chain of
1056        // computation.  Give it a large SethiUllman number so it will be
1057        // scheduled right before its predecessors that it doesn't lengthen
1058        // their live ranges.
1059        return 0xffff;
1060      if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1061        // If SU does not have a register def, schedule it close to its uses
1062        // because it does not lengthen any live ranges.
1063        return 0;
1064      return SethiUllmanNumbers[SU->NodeNum];
1065    }
1066
1067    unsigned getNodeOrdering(const SUnit *SU) const {
1068      return scheduleDAG->DAG->GetOrdering(SU->getNode());
1069    }
1070
1071    unsigned size() const { return Queue.size(); }
1072
1073    bool empty() const { return Queue.empty(); }
1074
1075    void push(SUnit *U) {
1076      assert(!U->NodeQueueId && "Node in the queue already");
1077      U->NodeQueueId = ++currentQueueId;
1078      Queue.push(U);
1079    }
1080
1081    void push_all(const std::vector<SUnit *> &Nodes) {
1082      for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
1083        push(Nodes[i]);
1084    }
1085
1086    SUnit *pop() {
1087      if (empty()) return NULL;
1088      SUnit *V = Queue.top();
1089      Queue.pop();
1090      V->NodeQueueId = 0;
1091      return V;
1092    }
1093
1094    void remove(SUnit *SU) {
1095      assert(!Queue.empty() && "Queue is empty!");
1096      assert(SU->NodeQueueId != 0 && "Not in queue!");
1097      Queue.erase_one(SU);
1098      SU->NodeQueueId = 0;
1099    }
1100
1101    void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1102      scheduleDAG = scheduleDag;
1103    }
1104
1105  protected:
1106    bool canClobber(const SUnit *SU, const SUnit *Op);
1107    void AddPseudoTwoAddrDeps();
1108    void PrescheduleNodesWithMultipleUses();
1109    void CalculateSethiUllmanNumbers();
1110  };
1111
1112  typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1113    BURegReductionPriorityQueue;
1114
1115  typedef RegReductionPriorityQueue<td_ls_rr_sort>
1116    TDRegReductionPriorityQueue;
1117
1118  typedef RegReductionPriorityQueue<src_ls_rr_sort>
1119    SrcRegReductionPriorityQueue;
1120}
1121
1122/// closestSucc - Returns the scheduled cycle of the successor which is
1123/// closest to the current cycle.
1124static unsigned closestSucc(const SUnit *SU) {
1125  unsigned MaxHeight = 0;
1126  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1127       I != E; ++I) {
1128    if (I->isCtrl()) continue;  // ignore chain succs
1129    unsigned Height = I->getSUnit()->getHeight();
1130    // If there are bunch of CopyToRegs stacked up, they should be considered
1131    // to be at the same position.
1132    if (I->getSUnit()->getNode() &&
1133        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
1134      Height = closestSucc(I->getSUnit())+1;
1135    if (Height > MaxHeight)
1136      MaxHeight = Height;
1137  }
1138  return MaxHeight;
1139}
1140
1141/// calcMaxScratches - Returns an cost estimate of the worse case requirement
1142/// for scratch registers, i.e. number of data dependencies.
1143static unsigned calcMaxScratches(const SUnit *SU) {
1144  unsigned Scratches = 0;
1145  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1146       I != E; ++I) {
1147    if (I->isCtrl()) continue;  // ignore chain preds
1148    Scratches++;
1149  }
1150  return Scratches;
1151}
1152
1153template <typename RRSort>
1154static bool BURRSort(const SUnit *left, const SUnit *right,
1155                     const RegReductionPriorityQueue<RRSort> *SPQ) {
1156  unsigned LPriority = SPQ->getNodePriority(left);
1157  unsigned RPriority = SPQ->getNodePriority(right);
1158  if (LPriority != RPriority)
1159    return LPriority > RPriority;
1160
1161  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
1162  // e.g.
1163  // t1 = op t2, c1
1164  // t3 = op t4, c2
1165  //
1166  // and the following instructions are both ready.
1167  // t2 = op c3
1168  // t4 = op c4
1169  //
1170  // Then schedule t2 = op first.
1171  // i.e.
1172  // t4 = op c4
1173  // t2 = op c3
1174  // t1 = op t2, c1
1175  // t3 = op t4, c2
1176  //
1177  // This creates more short live intervals.
1178  unsigned LDist = closestSucc(left);
1179  unsigned RDist = closestSucc(right);
1180  if (LDist != RDist)
1181    return LDist < RDist;
1182
1183  // How many registers becomes live when the node is scheduled.
1184  unsigned LScratch = calcMaxScratches(left);
1185  unsigned RScratch = calcMaxScratches(right);
1186  if (LScratch != RScratch)
1187    return LScratch > RScratch;
1188
1189  if (left->getHeight() != right->getHeight())
1190    return left->getHeight() > right->getHeight();
1191
1192  if (left->getDepth() != right->getDepth())
1193    return left->getDepth() < right->getDepth();
1194
1195  assert(left->NodeQueueId && right->NodeQueueId &&
1196         "NodeQueueId cannot be zero");
1197  return (left->NodeQueueId > right->NodeQueueId);
1198}
1199
1200// Bottom up
1201bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1202  return BURRSort(left, right, SPQ);
1203}
1204
1205// Source order, otherwise bottom up.
1206bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
1207  unsigned LOrder = SPQ->getNodeOrdering(left);
1208  unsigned ROrder = SPQ->getNodeOrdering(right);
1209
1210  // Prefer an ordering where the lower the non-zero order number, the higher
1211  // the preference.
1212  if ((LOrder || ROrder) && LOrder != ROrder)
1213    return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
1214
1215  return BURRSort(left, right, SPQ);
1216}
1217
1218template<class SF>
1219bool
1220RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
1221  if (SU->isTwoAddress) {
1222    unsigned Opc = SU->getNode()->getMachineOpcode();
1223    const TargetInstrDesc &TID = TII->get(Opc);
1224    unsigned NumRes = TID.getNumDefs();
1225    unsigned NumOps = TID.getNumOperands() - NumRes;
1226    for (unsigned i = 0; i != NumOps; ++i) {
1227      if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
1228        SDNode *DU = SU->getNode()->getOperand(i).getNode();
1229        if (DU->getNodeId() != -1 &&
1230            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
1231          return true;
1232      }
1233    }
1234  }
1235  return false;
1236}
1237
1238/// hasCopyToRegUse - Return true if SU has a value successor that is a
1239/// CopyToReg node.
1240static bool hasCopyToRegUse(const SUnit *SU) {
1241  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1242       I != E; ++I) {
1243    if (I->isCtrl()) continue;
1244    const SUnit *SuccSU = I->getSUnit();
1245    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)
1246      return true;
1247  }
1248  return false;
1249}
1250
1251/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
1252/// physical register defs.
1253static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
1254                                  const TargetInstrInfo *TII,
1255                                  const TargetRegisterInfo *TRI) {
1256  SDNode *N = SuccSU->getNode();
1257  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1258  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
1259  assert(ImpDefs && "Caller should check hasPhysRegDefs");
1260  for (const SDNode *SUNode = SU->getNode(); SUNode;
1261       SUNode = SUNode->getFlaggedNode()) {
1262    if (!SUNode->isMachineOpcode())
1263      continue;
1264    const unsigned *SUImpDefs =
1265      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
1266    if (!SUImpDefs)
1267      return false;
1268    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1269      EVT VT = N->getValueType(i);
1270      if (VT == MVT::Flag || VT == MVT::Other)
1271        continue;
1272      if (!N->hasAnyUseOfValue(i))
1273        continue;
1274      unsigned Reg = ImpDefs[i - NumDefs];
1275      for (;*SUImpDefs; ++SUImpDefs) {
1276        unsigned SUReg = *SUImpDefs;
1277        if (TRI->regsOverlap(Reg, SUReg))
1278          return true;
1279      }
1280    }
1281  }
1282  return false;
1283}
1284
1285/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
1286/// are not handled well by the general register pressure reduction
1287/// heuristics. When presented with code like this:
1288///
1289///      N
1290///    / |
1291///   /  |
1292///  U  store
1293///  |
1294/// ...
1295///
1296/// the heuristics tend to push the store up, but since the
1297/// operand of the store has another use (U), this would increase
1298/// the length of that other use (the U->N edge).
1299///
1300/// This function transforms code like the above to route U's
1301/// dependence through the store when possible, like this:
1302///
1303///      N
1304///      ||
1305///      ||
1306///     store
1307///       |
1308///       U
1309///       |
1310///      ...
1311///
1312/// This results in the store being scheduled immediately
1313/// after N, which shortens the U->N live range, reducing
1314/// register pressure.
1315///
1316template<class SF>
1317void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
1318  // Visit all the nodes in topological order, working top-down.
1319  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1320    SUnit *SU = &(*SUnits)[i];
1321    // For now, only look at nodes with no data successors, such as stores.
1322    // These are especially important, due to the heuristics in
1323    // getNodePriority for nodes with no data successors.
1324    if (SU->NumSuccs != 0)
1325      continue;
1326    // For now, only look at nodes with exactly one data predecessor.
1327    if (SU->NumPreds != 1)
1328      continue;
1329    // Avoid prescheduling copies to virtual registers, which don't behave
1330    // like other nodes from the perspective of scheduling heuristics.
1331    if (SDNode *N = SU->getNode())
1332      if (N->getOpcode() == ISD::CopyToReg &&
1333          TargetRegisterInfo::isVirtualRegister
1334            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1335        continue;
1336
1337    // Locate the single data predecessor.
1338    SUnit *PredSU = 0;
1339    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
1340         EE = SU->Preds.end(); II != EE; ++II)
1341      if (!II->isCtrl()) {
1342        PredSU = II->getSUnit();
1343        break;
1344      }
1345    assert(PredSU);
1346
1347    // Don't rewrite edges that carry physregs, because that requires additional
1348    // support infrastructure.
1349    if (PredSU->hasPhysRegDefs)
1350      continue;
1351    // Short-circuit the case where SU is PredSU's only data successor.
1352    if (PredSU->NumSuccs == 1)
1353      continue;
1354    // Avoid prescheduling to copies from virtual registers, which don't behave
1355    // like other nodes from the perspective of scheduling // heuristics.
1356    if (SDNode *N = SU->getNode())
1357      if (N->getOpcode() == ISD::CopyFromReg &&
1358          TargetRegisterInfo::isVirtualRegister
1359            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1360        continue;
1361
1362    // Perform checks on the successors of PredSU.
1363    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
1364         EE = PredSU->Succs.end(); II != EE; ++II) {
1365      SUnit *PredSuccSU = II->getSUnit();
1366      if (PredSuccSU == SU) continue;
1367      // If PredSU has another successor with no data successors, for
1368      // now don't attempt to choose either over the other.
1369      if (PredSuccSU->NumSuccs == 0)
1370        goto outer_loop_continue;
1371      // Don't break physical register dependencies.
1372      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
1373        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
1374          goto outer_loop_continue;
1375      // Don't introduce graph cycles.
1376      if (scheduleDAG->IsReachable(SU, PredSuccSU))
1377        goto outer_loop_continue;
1378    }
1379
1380    // Ok, the transformation is safe and the heuristics suggest it is
1381    // profitable. Update the graph.
1382    DEBUG(dbgs() << "Prescheduling SU # " << SU->NodeNum
1383                 << " next to PredSU # " << PredSU->NodeNum
1384                 << " to guide scheduling in the presence of multiple uses\n");
1385    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
1386      SDep Edge = PredSU->Succs[i];
1387      assert(!Edge.isAssignedRegDep());
1388      SUnit *SuccSU = Edge.getSUnit();
1389      if (SuccSU != SU) {
1390        Edge.setSUnit(PredSU);
1391        scheduleDAG->RemovePred(SuccSU, Edge);
1392        scheduleDAG->AddPred(SU, Edge);
1393        Edge.setSUnit(SU);
1394        scheduleDAG->AddPred(SuccSU, Edge);
1395        --i;
1396      }
1397    }
1398  outer_loop_continue:;
1399  }
1400}
1401
1402/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
1403/// it as a def&use operand. Add a pseudo control edge from it to the other
1404/// node (if it won't create a cycle) so the two-address one will be scheduled
1405/// first (lower in the schedule). If both nodes are two-address, favor the
1406/// one that has a CopyToReg use (more likely to be a loop induction update).
1407/// If both are two-address, but one is commutable while the other is not
1408/// commutable, favor the one that's not commutable.
1409template<class SF>
1410void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
1411  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1412    SUnit *SU = &(*SUnits)[i];
1413    if (!SU->isTwoAddress)
1414      continue;
1415
1416    SDNode *Node = SU->getNode();
1417    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
1418      continue;
1419
1420    unsigned Opc = Node->getMachineOpcode();
1421    const TargetInstrDesc &TID = TII->get(Opc);
1422    unsigned NumRes = TID.getNumDefs();
1423    unsigned NumOps = TID.getNumOperands() - NumRes;
1424    for (unsigned j = 0; j != NumOps; ++j) {
1425      if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
1426        continue;
1427      SDNode *DU = SU->getNode()->getOperand(j).getNode();
1428      if (DU->getNodeId() == -1)
1429        continue;
1430      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
1431      if (!DUSU) continue;
1432      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
1433           E = DUSU->Succs.end(); I != E; ++I) {
1434        if (I->isCtrl()) continue;
1435        SUnit *SuccSU = I->getSUnit();
1436        if (SuccSU == SU)
1437          continue;
1438        // Be conservative. Ignore if nodes aren't at roughly the same
1439        // depth and height.
1440        if (SuccSU->getHeight() < SU->getHeight() &&
1441            (SU->getHeight() - SuccSU->getHeight()) > 1)
1442          continue;
1443        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
1444        // constrains whatever is using the copy, instead of the copy
1445        // itself. In the case that the copy is coalesced, this
1446        // preserves the intent of the pseudo two-address heurietics.
1447        while (SuccSU->Succs.size() == 1 &&
1448               SuccSU->getNode()->isMachineOpcode() &&
1449               SuccSU->getNode()->getMachineOpcode() ==
1450                 TargetOpcode::COPY_TO_REGCLASS)
1451          SuccSU = SuccSU->Succs.front().getSUnit();
1452        // Don't constrain non-instruction nodes.
1453        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
1454          continue;
1455        // Don't constrain nodes with physical register defs if the
1456        // predecessor can clobber them.
1457        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
1458          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
1459            continue;
1460        }
1461        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
1462        // these may be coalesced away. We want them close to their uses.
1463        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
1464        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
1465            SuccOpc == TargetOpcode::INSERT_SUBREG ||
1466            SuccOpc == TargetOpcode::SUBREG_TO_REG)
1467          continue;
1468        if ((!canClobber(SuccSU, DUSU) ||
1469             (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
1470             (!SU->isCommutable && SuccSU->isCommutable)) &&
1471            !scheduleDAG->IsReachable(SuccSU, SU)) {
1472          DEBUG(dbgs() << "Adding a pseudo-two-addr edge from SU # "
1473                       << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
1474          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
1475                                        /*Reg=*/0, /*isNormalMemory=*/false,
1476                                        /*isMustAlias=*/false,
1477                                        /*isArtificial=*/true));
1478        }
1479      }
1480    }
1481  }
1482}
1483
1484/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1485/// scheduling units.
1486template<class SF>
1487void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
1488  SethiUllmanNumbers.assign(SUnits->size(), 0);
1489
1490  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1491    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1492}
1493
1494/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
1495/// predecessors of the successors of the SUnit SU. Stop when the provided
1496/// limit is exceeded.
1497static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
1498                                                    unsigned Limit) {
1499  unsigned Sum = 0;
1500  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1501       I != E; ++I) {
1502    const SUnit *SuccSU = I->getSUnit();
1503    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
1504         EE = SuccSU->Preds.end(); II != EE; ++II) {
1505      SUnit *PredSU = II->getSUnit();
1506      if (!PredSU->isScheduled)
1507        if (++Sum > Limit)
1508          return Sum;
1509    }
1510  }
1511  return Sum;
1512}
1513
1514
1515// Top down
1516bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1517  unsigned LPriority = SPQ->getNodePriority(left);
1518  unsigned RPriority = SPQ->getNodePriority(right);
1519  bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
1520  bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
1521  bool LIsFloater = LIsTarget && left->NumPreds == 0;
1522  bool RIsFloater = RIsTarget && right->NumPreds == 0;
1523  unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
1524  unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
1525
1526  if (left->NumSuccs == 0 && right->NumSuccs != 0)
1527    return false;
1528  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
1529    return true;
1530
1531  if (LIsFloater)
1532    LBonus -= 2;
1533  if (RIsFloater)
1534    RBonus -= 2;
1535  if (left->NumSuccs == 1)
1536    LBonus += 2;
1537  if (right->NumSuccs == 1)
1538    RBonus += 2;
1539
1540  if (LPriority+LBonus != RPriority+RBonus)
1541    return LPriority+LBonus < RPriority+RBonus;
1542
1543  if (left->getDepth() != right->getDepth())
1544    return left->getDepth() < right->getDepth();
1545
1546  if (left->NumSuccsLeft != right->NumSuccsLeft)
1547    return left->NumSuccsLeft > right->NumSuccsLeft;
1548
1549  assert(left->NodeQueueId && right->NodeQueueId &&
1550         "NodeQueueId cannot be zero");
1551  return (left->NodeQueueId > right->NodeQueueId);
1552}
1553
1554//===----------------------------------------------------------------------===//
1555//                         Public Constructor Functions
1556//===----------------------------------------------------------------------===//
1557
1558llvm::ScheduleDAGSDNodes *
1559llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1560  const TargetMachine &TM = IS->TM;
1561  const TargetInstrInfo *TII = TM.getInstrInfo();
1562  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1563
1564  BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI);
1565
1566  ScheduleDAGRRList *SD =
1567    new ScheduleDAGRRList(*IS->MF, true, PQ);
1568  PQ->setScheduleDAG(SD);
1569  return SD;
1570}
1571
1572llvm::ScheduleDAGSDNodes *
1573llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1574  const TargetMachine &TM = IS->TM;
1575  const TargetInstrInfo *TII = TM.getInstrInfo();
1576  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1577
1578  TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI);
1579
1580  ScheduleDAGRRList *SD =
1581    new ScheduleDAGRRList(*IS->MF, false, PQ);
1582  PQ->setScheduleDAG(SD);
1583  return SD;
1584}
1585
1586llvm::ScheduleDAGSDNodes *
1587llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1588  const TargetMachine &TM = IS->TM;
1589  const TargetInstrInfo *TII = TM.getInstrInfo();
1590  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1591
1592  SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(TII, TRI);
1593
1594  ScheduleDAGRRList *SD =
1595    new ScheduleDAGRRList(*IS->MF, true, PQ);
1596  PQ->setScheduleDAG(SD);
1597  return SD;
1598}
1599