ScheduleDAGRRList.cpp revision eb577ba3b815a1fa4627b060dd2345d17abf672d
1//===----- ScheduleDAGList.cpp - Reg pressure reduction list scheduler ----===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by Evan Cheng and is distributed under the
6// University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "sched"
19#include "llvm/CodeGen/ScheduleDAG.h"
20#include "llvm/CodeGen/SchedulerRegistry.h"
21#include "llvm/CodeGen/SSARegMap.h"
22#include "llvm/Target/MRegisterInfo.h"
23#include "llvm/Target/TargetData.h"
24#include "llvm/Target/TargetMachine.h"
25#include "llvm/Target/TargetInstrInfo.h"
26#include "llvm/Support/Debug.h"
27#include "llvm/Support/Visibility.h"
28#include "llvm/ADT/Statistic.h"
29#include <climits>
30#include <iostream>
31#include <queue>
32#include "llvm/Support/CommandLine.h"
33using namespace llvm;
34
35static RegisterScheduler
36  burrListDAGScheduler("list-burr",
37                       "  Bottom-up register reduction list scheduling",
38                       createBURRListDAGScheduler);
39static RegisterScheduler
40  tdrListrDAGScheduler("list-tdrr",
41                       "  Top-down register reduction list scheduling",
42                       createTDRRListDAGScheduler);
43
44namespace {
45//===----------------------------------------------------------------------===//
46/// ScheduleDAGRRList - The actual register reduction list scheduler
47/// implementation.  This supports both top-down and bottom-up scheduling.
48///
49
50class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAG {
51private:
52  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
53  /// it is top-down.
54  bool isBottomUp;
55
56  /// AvailableQueue - The priority queue to use for the available SUnits.
57  ///
58  SchedulingPriorityQueue *AvailableQueue;
59
60public:
61  ScheduleDAGRRList(SelectionDAG &dag, MachineBasicBlock *bb,
62                  const TargetMachine &tm, bool isbottomup,
63                  SchedulingPriorityQueue *availqueue)
64    : ScheduleDAG(dag, bb, tm), isBottomUp(isbottomup),
65      AvailableQueue(availqueue) {
66    }
67
68  ~ScheduleDAGRRList() {
69    delete AvailableQueue;
70  }
71
72  void Schedule();
73
74private:
75  void ReleasePred(SUnit *PredSU, bool isChain, unsigned CurCycle);
76  void ReleaseSucc(SUnit *SuccSU, bool isChain, unsigned CurCycle);
77  void ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle);
78  void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
79  void ListScheduleTopDown();
80  void ListScheduleBottomUp();
81  void CommuteNodesToReducePressure();
82};
83}  // end anonymous namespace
84
85
86/// Schedule - Schedule the DAG using list scheduling.
87void ScheduleDAGRRList::Schedule() {
88  DEBUG(std::cerr << "********** List Scheduling **********\n");
89
90  // Build scheduling units.
91  BuildSchedUnits();
92
93  CalculateDepths();
94  CalculateHeights();
95  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
96        SUnits[su].dumpAll(&DAG));
97
98  AvailableQueue->initNodes(SUnits);
99
100  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
101  if (isBottomUp)
102    ListScheduleBottomUp();
103  else
104    ListScheduleTopDown();
105
106  AvailableQueue->releaseState();
107
108  CommuteNodesToReducePressure();
109
110  DEBUG(std::cerr << "*** Final schedule ***\n");
111  DEBUG(dumpSchedule());
112  DEBUG(std::cerr << "\n");
113
114  // Emit in scheduled order
115  EmitSchedule();
116}
117
118/// CommuteNodesToReducePressure - Is a node is two-address and commutable, and
119/// it is not the last use of its first operand, add it to the CommuteSet if
120/// possible. It will be commuted when it is translated to a MI.
121void ScheduleDAGRRList::CommuteNodesToReducePressure() {
122  std::set<SUnit *> OperandSeen;
123  for (unsigned i = Sequence.size()-1; i != 0; --i) {  // Ignore first node.
124    SUnit *SU = Sequence[i];
125    if (!SU) continue;
126    if (SU->isTwoAddress && SU->isCommutable) {
127      SDNode *OpN = SU->Node->getOperand(0).Val;
128      SUnit *OpSU = SUnitMap[OpN];
129      if (OpSU && OperandSeen.count(OpSU) == 1) {
130        // Ok, so SU is not the last use of OpSU, but SU is two-address so
131        // it will clobber OpSU. Try to commute it if possible.
132        bool DoCommute = true;
133        for (unsigned j = 1, e = SU->Node->getNumOperands(); j != e; ++j) {
134          OpN = SU->Node->getOperand(j).Val;
135          OpSU = SUnitMap[OpN];
136          if (OpSU && OperandSeen.count(OpSU) == 1) {
137            DoCommute = false;
138            break;
139          }
140        }
141        if (DoCommute)
142          CommuteSet.insert(SU->Node);
143      }
144    }
145
146    for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Preds.begin(),
147           E = SU->Preds.end(); I != E; ++I) {
148      if (!I->second)
149        OperandSeen.insert(I->first);
150    }
151  }
152}
153
154//===----------------------------------------------------------------------===//
155//  Bottom-Up Scheduling
156//===----------------------------------------------------------------------===//
157
158static const TargetRegisterClass *getRegClass(SUnit *SU,
159                                              const TargetInstrInfo *TII,
160                                              const MRegisterInfo *MRI,
161                                              SSARegMap *RegMap) {
162  if (SU->Node->isTargetOpcode()) {
163    unsigned Opc = SU->Node->getTargetOpcode();
164    const TargetInstrDescriptor &II = TII->get(Opc);
165    return MRI->getRegClass(II.OpInfo->RegClass);
166  } else {
167    assert(SU->Node->getOpcode() == ISD::CopyFromReg);
168    unsigned SrcReg = cast<RegisterSDNode>(SU->Node->getOperand(1))->getReg();
169    if (MRegisterInfo::isVirtualRegister(SrcReg))
170      return RegMap->getRegClass(SrcReg);
171    else {
172      for (MRegisterInfo::regclass_iterator I = MRI->regclass_begin(),
173             E = MRI->regclass_end(); I != E; ++I)
174        if ((*I)->hasType(SU->Node->getValueType(0)) &&
175            (*I)->contains(SrcReg))
176          return *I;
177      assert(false && "Couldn't find register class for reg copy!");
178    }
179    return NULL;
180  }
181}
182
183static unsigned getNumResults(SUnit *SU) {
184  unsigned NumResults = 0;
185  for (unsigned i = 0, e = SU->Node->getNumValues(); i != e; ++i) {
186    MVT::ValueType VT = SU->Node->getValueType(i);
187    if (VT != MVT::Other && VT != MVT::Flag)
188      NumResults++;
189  }
190  return NumResults;
191}
192
193/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
194/// the Available queue is the count reaches zero. Also update its cycle bound.
195void ScheduleDAGRRList::ReleasePred(SUnit *PredSU, bool isChain,
196                                    unsigned CurCycle) {
197  // FIXME: the distance between two nodes is not always == the predecessor's
198  // latency. For example, the reader can very well read the register written
199  // by the predecessor later than the issue cycle. It also depends on the
200  // interrupt model (drain vs. freeze).
201  PredSU->CycleBound = std::max(PredSU->CycleBound, CurCycle + PredSU->Latency);
202
203  if (!isChain)
204    PredSU->NumSuccsLeft--;
205  else
206    PredSU->NumChainSuccsLeft--;
207
208#ifndef NDEBUG
209  if (PredSU->NumSuccsLeft < 0 || PredSU->NumChainSuccsLeft < 0) {
210    std::cerr << "*** List scheduling failed! ***\n";
211    PredSU->dump(&DAG);
212    std::cerr << " has been released too many times!\n";
213    assert(0);
214  }
215#endif
216
217  if ((PredSU->NumSuccsLeft + PredSU->NumChainSuccsLeft) == 0) {
218    // EntryToken has to go last!  Special case it here.
219    if (PredSU->Node->getOpcode() != ISD::EntryToken) {
220      PredSU->isAvailable = true;
221      AvailableQueue->push(PredSU);
222    }
223  }
224}
225
226/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
227/// count of its predecessors. If a predecessor pending count is zero, add it to
228/// the Available queue.
229void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
230  DEBUG(std::cerr << "*** Scheduling [" << CurCycle << "]: ");
231  DEBUG(SU->dump(&DAG));
232  SU->Cycle = CurCycle;
233
234  AvailableQueue->ScheduledNode(SU);
235  Sequence.push_back(SU);
236
237  // Bottom up: release predecessors
238  for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Preds.begin(),
239         E = SU->Preds.end(); I != E; ++I)
240    ReleasePred(I->first, I->second, CurCycle);
241  SU->isScheduled = true;
242}
243
244/// isReady - True if node's lower cycle bound is less or equal to the current
245/// scheduling cycle. Always true if all nodes have uniform latency 1.
246static inline bool isReady(SUnit *SU, unsigned CurCycle) {
247  return SU->CycleBound <= CurCycle;
248}
249
250/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
251/// schedulers.
252void ScheduleDAGRRList::ListScheduleBottomUp() {
253  unsigned CurCycle = 0;
254  // Add root to Available queue.
255  AvailableQueue->push(SUnitMap[DAG.getRoot().Val]);
256
257  // While Available queue is not empty, grab the node with the highest
258  // priority. If it is not ready put it back. Schedule the node.
259  std::vector<SUnit*> NotReady;
260  SUnit *CurNode = NULL;
261  while (!AvailableQueue->empty()) {
262    SUnit *CurNode = AvailableQueue->pop();
263    while (CurNode && !isReady(CurNode, CurCycle)) {
264      NotReady.push_back(CurNode);
265      CurNode = AvailableQueue->pop();
266    }
267
268    // Add the nodes that aren't ready back onto the available list.
269    AvailableQueue->push_all(NotReady);
270    NotReady.clear();
271
272    if (CurNode != NULL)
273      ScheduleNodeBottomUp(CurNode, CurCycle);
274    CurCycle++;
275  }
276
277  // Add entry node last
278  if (DAG.getEntryNode().Val != DAG.getRoot().Val) {
279    SUnit *Entry = SUnitMap[DAG.getEntryNode().Val];
280    Sequence.push_back(Entry);
281  }
282
283  // Reverse the order if it is bottom up.
284  std::reverse(Sequence.begin(), Sequence.end());
285
286
287#ifndef NDEBUG
288  // Verify that all SUnits were scheduled.
289  bool AnyNotSched = false;
290  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
291    if (SUnits[i].NumSuccsLeft != 0 || SUnits[i].NumChainSuccsLeft != 0) {
292      if (!AnyNotSched)
293        std::cerr << "*** List scheduling failed! ***\n";
294      SUnits[i].dump(&DAG);
295      std::cerr << "has not been scheduled!\n";
296      AnyNotSched = true;
297    }
298  }
299  assert(!AnyNotSched);
300#endif
301}
302
303//===----------------------------------------------------------------------===//
304//  Top-Down Scheduling
305//===----------------------------------------------------------------------===//
306
307/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
308/// the PendingQueue if the count reaches zero.
309void ScheduleDAGRRList::ReleaseSucc(SUnit *SuccSU, bool isChain,
310                                    unsigned CurCycle) {
311  // FIXME: the distance between two nodes is not always == the predecessor's
312  // latency. For example, the reader can very well read the register written
313  // by the predecessor later than the issue cycle. It also depends on the
314  // interrupt model (drain vs. freeze).
315  SuccSU->CycleBound = std::max(SuccSU->CycleBound, CurCycle + SuccSU->Latency);
316
317  if (!isChain)
318    SuccSU->NumPredsLeft--;
319  else
320    SuccSU->NumChainPredsLeft--;
321
322#ifndef NDEBUG
323  if (SuccSU->NumPredsLeft < 0 || SuccSU->NumChainPredsLeft < 0) {
324    std::cerr << "*** List scheduling failed! ***\n";
325    SuccSU->dump(&DAG);
326    std::cerr << " has been released too many times!\n";
327    assert(0);
328  }
329#endif
330
331  if ((SuccSU->NumPredsLeft + SuccSU->NumChainPredsLeft) == 0) {
332    SuccSU->isAvailable = true;
333    AvailableQueue->push(SuccSU);
334  }
335}
336
337
338/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
339/// count of its successors. If a successor pending count is zero, add it to
340/// the Available queue.
341void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
342  DEBUG(std::cerr << "*** Scheduling [" << CurCycle << "]: ");
343  DEBUG(SU->dump(&DAG));
344  SU->Cycle = CurCycle;
345
346  AvailableQueue->ScheduledNode(SU);
347  Sequence.push_back(SU);
348
349  // Top down: release successors
350  for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Succs.begin(),
351         E = SU->Succs.end(); I != E; ++I)
352    ReleaseSucc(I->first, I->second, CurCycle);
353  SU->isScheduled = true;
354}
355
356void ScheduleDAGRRList::ListScheduleTopDown() {
357  unsigned CurCycle = 0;
358  SUnit *Entry = SUnitMap[DAG.getEntryNode().Val];
359
360  // All leaves to Available queue.
361  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
362    // It is available if it has no predecessors.
363    if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) {
364      AvailableQueue->push(&SUnits[i]);
365      SUnits[i].isAvailable = true;
366    }
367  }
368
369  // Emit the entry node first.
370  ScheduleNodeTopDown(Entry, CurCycle);
371  CurCycle++;
372
373  // While Available queue is not empty, grab the node with the highest
374  // priority. If it is not ready put it back. Schedule the node.
375  std::vector<SUnit*> NotReady;
376  SUnit *CurNode = NULL;
377  while (!AvailableQueue->empty()) {
378    SUnit *CurNode = AvailableQueue->pop();
379    while (CurNode && !isReady(CurNode, CurCycle)) {
380      NotReady.push_back(CurNode);
381      CurNode = AvailableQueue->pop();
382    }
383
384    // Add the nodes that aren't ready back onto the available list.
385    AvailableQueue->push_all(NotReady);
386    NotReady.clear();
387
388    if (CurNode != NULL)
389      ScheduleNodeTopDown(CurNode, CurCycle);
390    CurCycle++;
391  }
392
393
394#ifndef NDEBUG
395  // Verify that all SUnits were scheduled.
396  bool AnyNotSched = false;
397  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
398    if (!SUnits[i].isScheduled) {
399      if (!AnyNotSched)
400        std::cerr << "*** List scheduling failed! ***\n";
401      SUnits[i].dump(&DAG);
402      std::cerr << "has not been scheduled!\n";
403      AnyNotSched = true;
404    }
405  }
406  assert(!AnyNotSched);
407#endif
408}
409
410
411
412//===----------------------------------------------------------------------===//
413//                RegReductionPriorityQueue Implementation
414//===----------------------------------------------------------------------===//
415//
416// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
417// to reduce register pressure.
418//
419namespace {
420  template<class SF>
421  class RegReductionPriorityQueue;
422
423  /// Sorting functions for the Available queue.
424  struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
425    RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
426    bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
427    bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
428
429    bool operator()(const SUnit* left, const SUnit* right) const;
430  };
431
432  struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
433    RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
434    td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
435    td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
436
437    bool operator()(const SUnit* left, const SUnit* right) const;
438  };
439}  // end anonymous namespace
440
441namespace {
442  template<class SF>
443  class VISIBILITY_HIDDEN RegReductionPriorityQueue
444   : public SchedulingPriorityQueue {
445    std::priority_queue<SUnit*, std::vector<SUnit*>, SF> Queue;
446
447  public:
448    RegReductionPriorityQueue() :
449    Queue(SF(this)) {}
450
451    virtual void initNodes(const std::vector<SUnit> &sunits) {}
452    virtual void releaseState() {}
453
454    virtual int getSethiUllmanNumber(unsigned NodeNum) const {
455      return 0;
456    }
457
458    bool empty() const { return Queue.empty(); }
459
460    void push(SUnit *U) {
461      Queue.push(U);
462    }
463    void push_all(const std::vector<SUnit *> &Nodes) {
464      for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
465        Queue.push(Nodes[i]);
466    }
467
468    SUnit *pop() {
469      if (empty()) return NULL;
470      SUnit *V = Queue.top();
471      Queue.pop();
472      return V;
473    }
474  };
475
476  template<class SF>
477  class VISIBILITY_HIDDEN BURegReductionPriorityQueue
478   : public RegReductionPriorityQueue<SF> {
479    // SUnits - The SUnits for the current graph.
480    const std::vector<SUnit> *SUnits;
481
482    // SethiUllmanNumbers - The SethiUllman number for each node.
483    std::vector<int> SethiUllmanNumbers;
484
485  public:
486    BURegReductionPriorityQueue() {}
487
488    void initNodes(const std::vector<SUnit> &sunits) {
489      SUnits = &sunits;
490      // Add pseudo dependency edges for two-address nodes.
491      AddPseudoTwoAddrDeps();
492      // Calculate node priorities.
493      CalculatePriorities();
494    }
495
496    void releaseState() {
497      SUnits = 0;
498      SethiUllmanNumbers.clear();
499    }
500
501    int getSethiUllmanNumber(unsigned NodeNum) const {
502      assert(NodeNum < SethiUllmanNumbers.size());
503      return SethiUllmanNumbers[NodeNum];
504    }
505
506  private:
507    void AddPseudoTwoAddrDeps();
508    void CalculatePriorities();
509    int CalcNodePriority(const SUnit *SU);
510  };
511
512
513  template<class SF>
514  class TDRegReductionPriorityQueue : public RegReductionPriorityQueue<SF> {
515    // SUnits - The SUnits for the current graph.
516    const std::vector<SUnit> *SUnits;
517
518    // SethiUllmanNumbers - The SethiUllman number for each node.
519    std::vector<int> SethiUllmanNumbers;
520
521  public:
522    TDRegReductionPriorityQueue() {}
523
524    void initNodes(const std::vector<SUnit> &sunits) {
525      SUnits = &sunits;
526      // Calculate node priorities.
527      CalculatePriorities();
528    }
529
530    void releaseState() {
531      SUnits = 0;
532      SethiUllmanNumbers.clear();
533    }
534
535    int getSethiUllmanNumber(unsigned NodeNum) const {
536      assert(NodeNum < SethiUllmanNumbers.size());
537      return SethiUllmanNumbers[NodeNum];
538    }
539
540  private:
541    void CalculatePriorities();
542    int CalcNodePriority(const SUnit *SU);
543  };
544}
545
546static bool isFloater(const SUnit *SU) {
547  if (SU->Node->isTargetOpcode()) {
548    if (SU->NumPreds == 0)
549      return true;
550    if (SU->NumPreds == 1) {
551      for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Preds.begin(),
552             E = SU->Preds.end(); I != E; ++I) {
553        if (I->second) continue;
554
555        SUnit *PredSU = I->first;
556        unsigned Opc = PredSU->Node->getOpcode();
557        if (Opc != ISD::EntryToken && Opc != ISD::TokenFactor &&
558            Opc != ISD::CopyFromReg && Opc != ISD::CopyToReg)
559          return false;
560      }
561      return true;
562    }
563  }
564  return false;
565}
566
567static bool isSimpleFloaterUse(const SUnit *SU) {
568  unsigned NumOps = 0;
569  for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Preds.begin(),
570         E = SU->Preds.end(); I != E; ++I) {
571    if (I->second) continue;
572    if (++NumOps > 1)
573      return false;
574    if (!isFloater(I->first))
575      return false;
576  }
577  return true;
578}
579
580// Bottom up
581bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
582  unsigned LeftNum  = left->NodeNum;
583  unsigned RightNum = right->NodeNum;
584  bool LIsTarget = left->Node->isTargetOpcode();
585  bool RIsTarget = right->Node->isTargetOpcode();
586  int LPriority = SPQ->getSethiUllmanNumber(LeftNum);
587  int RPriority = SPQ->getSethiUllmanNumber(RightNum);
588  int LBonus = 0;
589  int RBonus = 0;
590
591  // Schedule floaters (e.g. load from some constant address) and those nodes
592  // with a single predecessor each first. They maintain / reduce register
593  // pressure.
594  if (isFloater(left) || isSimpleFloaterUse(left))
595    LBonus += 2;
596  if (isFloater(right) || isSimpleFloaterUse(right))
597    RBonus += 2;
598
599  // Special tie breaker: if two nodes share a operand, the one that use it
600  // as a def&use operand is preferred.
601  if (LIsTarget && RIsTarget) {
602    if (left->isTwoAddress && !right->isTwoAddress) {
603      SDNode *DUNode = left->Node->getOperand(0).Val;
604      if (DUNode->isOperand(right->Node))
605        LBonus += 2;
606    }
607    if (!left->isTwoAddress && right->isTwoAddress) {
608      SDNode *DUNode = right->Node->getOperand(0).Val;
609      if (DUNode->isOperand(left->Node))
610        RBonus += 2;
611    }
612  }
613
614  if (LPriority+LBonus < RPriority+RBonus)
615    return true;
616  else if (LPriority+LBonus == RPriority+RBonus)
617    if (left->Height > right->Height)
618      return true;
619    else if (left->Height == right->Height)
620      if (left->Depth < right->Depth)
621        return true;
622      else if (left->Depth == right->Depth)
623        if (left->CycleBound > right->CycleBound)
624          return true;
625  return false;
626}
627
628static inline bool isCopyFromLiveIn(const SUnit *SU) {
629  SDNode *N = SU->Node;
630  return N->getOpcode() == ISD::CopyFromReg &&
631    N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag;
632}
633
634// FIXME: This is probably too slow!
635static void isReachable(SUnit *SU, SUnit *TargetSU,
636                        std::set<SUnit *> &Visited, bool &Reached) {
637  if (Reached) return;
638  if (SU == TargetSU) {
639    Reached = true;
640    return;
641  }
642  if (!Visited.insert(SU).second) return;
643
644  for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Preds.begin(),
645         E = SU->Preds.end(); I != E; ++I)
646    isReachable(I->first, TargetSU, Visited, Reached);
647}
648
649static bool isReachable(SUnit *SU, SUnit *TargetSU) {
650  std::set<SUnit *> Visited;
651  bool Reached = false;
652  isReachable(SU, TargetSU, Visited, Reached);
653  return Reached;
654}
655
656static SUnit *getDefUsePredecessor(SUnit *SU) {
657  SDNode *DU = SU->Node->getOperand(0).Val;
658  for (std::set<std::pair<SUnit*, bool> >::iterator
659         I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) {
660    if (I->second) continue;  // ignore chain preds
661    SUnit *PredSU = I->first;
662    if (PredSU->Node == DU)
663      return PredSU;
664  }
665
666  // Must be flagged.
667  return NULL;
668}
669
670static bool canClobber(SUnit *SU, SUnit *Op) {
671  if (SU->isTwoAddress)
672    return Op == getDefUsePredecessor(SU);
673  return false;
674}
675
676/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
677/// it as a def&use operand. Add a pseudo control edge from it to the other
678/// node (if it won't create a cycle) so the two-address one will be scheduled
679/// first (lower in the schedule).
680template<class SF>
681void BURegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
682  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
683    SUnit *SU = (SUnit *)&((*SUnits)[i]);
684    SDNode *Node = SU->Node;
685    if (!Node->isTargetOpcode())
686      continue;
687
688    if (SU->isTwoAddress) {
689      SUnit *DUSU = getDefUsePredecessor(SU);
690      if (!DUSU) continue;
691
692      for (std::set<std::pair<SUnit*, bool> >::iterator I = DUSU->Succs.begin(),
693             E = DUSU->Succs.end(); I != E; ++I) {
694        if (I->second) continue;
695        SUnit *SuccSU = I->first;
696        if (SuccSU != SU &&
697            (!canClobber(SuccSU, DUSU) ||
698             (!SU->isCommutable && SuccSU->isCommutable))){
699          if (SuccSU->Depth == SU->Depth && !isReachable(SuccSU, SU)) {
700            DEBUG(std::cerr << "Adding an edge from SU # " << SU->NodeNum
701                  << " to SU #" << SuccSU->NodeNum << "\n");
702            if (SU->Preds.insert(std::make_pair(SuccSU, true)).second)
703              SU->NumChainPredsLeft++;
704            if (SuccSU->Succs.insert(std::make_pair(SU, true)).second)
705              SuccSU->NumChainSuccsLeft++;
706          }
707        }
708      }
709    }
710  }
711}
712
713/// CalcNodePriority - Priority is the Sethi Ullman number.
714/// Smaller number is the higher priority.
715template<class SF>
716int BURegReductionPriorityQueue<SF>::CalcNodePriority(const SUnit *SU) {
717  int &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum];
718  if (SethiUllmanNumber != 0)
719    return SethiUllmanNumber;
720
721  unsigned Opc = SU->Node->getOpcode();
722  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
723    SethiUllmanNumber = INT_MAX - 10;
724  else if (SU->NumSuccsLeft == 0)
725    // If SU does not have a use, i.e. it doesn't produce a value that would
726    // be consumed (e.g. store), then it terminates a chain of computation.
727    // Give it a small SethiUllman number so it will be scheduled right before its
728    // predecessors that it doesn't lengthen their live ranges.
729    SethiUllmanNumber = INT_MIN + 10;
730  // FIXME: remove this else if? It seems to reduce register spills but often
731  // ends up increasing runtime. Need to investigate.
732  else if (SU->NumPredsLeft == 0 &&
733           (Opc != ISD::CopyFromReg || isCopyFromLiveIn(SU)))
734    SethiUllmanNumber = INT_MAX - 10;
735  else {
736    int Extra = 0;
737    for (std::set<std::pair<SUnit*, bool> >::const_iterator
738         I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) {
739      if (I->second) continue;  // ignore chain preds
740      SUnit *PredSU = I->first;
741      int PredSethiUllman = CalcNodePriority(PredSU);
742      if (PredSethiUllman > SethiUllmanNumber) {
743        SethiUllmanNumber = PredSethiUllman;
744        Extra = 0;
745      } else if (PredSethiUllman == SethiUllmanNumber && !I->second)
746        Extra++;
747    }
748
749    SethiUllmanNumber += Extra;
750  }
751
752  return SethiUllmanNumber;
753}
754
755/// CalculatePriorities - Calculate priorities of all scheduling units.
756template<class SF>
757void BURegReductionPriorityQueue<SF>::CalculatePriorities() {
758  SethiUllmanNumbers.assign(SUnits->size(), 0);
759
760  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
761    CalcNodePriority(&(*SUnits)[i]);
762}
763
764static unsigned SumOfUnscheduledPredsOfSuccs(const SUnit *SU) {
765  unsigned Sum = 0;
766  for (std::set<std::pair<SUnit*, bool> >::const_iterator
767         I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) {
768    SUnit *SuccSU = I->first;
769    for (std::set<std::pair<SUnit*, bool> >::const_iterator
770         II = SuccSU->Preds.begin(), EE = SuccSU->Preds.end(); II != EE; ++II) {
771      SUnit *PredSU = II->first;
772      if (!PredSU->isScheduled)
773        Sum++;
774    }
775  }
776
777  return Sum;
778}
779
780
781// Top down
782bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
783  unsigned LeftNum  = left->NodeNum;
784  unsigned RightNum = right->NodeNum;
785  int LPriority = SPQ->getSethiUllmanNumber(LeftNum);
786  int RPriority = SPQ->getSethiUllmanNumber(RightNum);
787  bool LIsTarget = left->Node->isTargetOpcode();
788  bool RIsTarget = right->Node->isTargetOpcode();
789  bool LIsFloater = LIsTarget && left->NumPreds == 0;
790  bool RIsFloater = RIsTarget && right->NumPreds == 0;
791  unsigned LBonus = (SumOfUnscheduledPredsOfSuccs(left) == 1) ? 2 : 0;
792  unsigned RBonus = (SumOfUnscheduledPredsOfSuccs(right) == 1) ? 2 : 0;
793
794  if (left->NumSuccs == 0 && right->NumSuccs != 0)
795    return false;
796  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
797    return true;
798
799  // Special tie breaker: if two nodes share a operand, the one that use it
800  // as a def&use operand is preferred.
801  if (LIsTarget && RIsTarget) {
802    if (left->isTwoAddress && !right->isTwoAddress) {
803      SDNode *DUNode = left->Node->getOperand(0).Val;
804      if (DUNode->isOperand(right->Node))
805        RBonus += 2;
806    }
807    if (!left->isTwoAddress && right->isTwoAddress) {
808      SDNode *DUNode = right->Node->getOperand(0).Val;
809      if (DUNode->isOperand(left->Node))
810        LBonus += 2;
811    }
812  }
813  if (LIsFloater)
814    LBonus -= 2;
815  if (RIsFloater)
816    RBonus -= 2;
817  if (left->NumSuccs == 1)
818    LBonus += 2;
819  if (right->NumSuccs == 1)
820    RBonus += 2;
821
822  if (LPriority+LBonus < RPriority+RBonus)
823    return true;
824  else if (LPriority == RPriority)
825    if (left->Depth < right->Depth)
826      return true;
827    else if (left->Depth == right->Depth)
828      if (left->NumSuccsLeft > right->NumSuccsLeft)
829        return true;
830      else if (left->NumSuccsLeft == right->NumSuccsLeft)
831        if (left->CycleBound > right->CycleBound)
832          return true;
833  return false;
834}
835
836/// CalcNodePriority - Priority is the Sethi Ullman number.
837/// Smaller number is the higher priority.
838template<class SF>
839int TDRegReductionPriorityQueue<SF>::CalcNodePriority(const SUnit *SU) {
840  int &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum];
841  if (SethiUllmanNumber != 0)
842    return SethiUllmanNumber;
843
844  unsigned Opc = SU->Node->getOpcode();
845  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
846    SethiUllmanNumber = INT_MAX - 10;
847  else if (SU->NumSuccsLeft == 0)
848    // If SU does not have a use, i.e. it doesn't produce a value that would
849    // be consumed (e.g. store), then it terminates a chain of computation.
850    // Give it a small SethiUllman number so it will be scheduled right before its
851    // predecessors that it doesn't lengthen their live ranges.
852    SethiUllmanNumber = INT_MIN + 10;
853  else if (SU->NumPredsLeft == 0 &&
854           (Opc != ISD::CopyFromReg || isCopyFromLiveIn(SU)))
855    SethiUllmanNumber = 1;
856  else {
857    int Extra = 0;
858    for (std::set<std::pair<SUnit*, bool> >::const_iterator
859         I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) {
860      if (I->second) continue;  // ignore chain preds
861      SUnit *PredSU = I->first;
862      int PredSethiUllman = CalcNodePriority(PredSU);
863      if (PredSethiUllman > SethiUllmanNumber) {
864        SethiUllmanNumber = PredSethiUllman;
865        Extra = 0;
866      } else if (PredSethiUllman == SethiUllmanNumber && !I->second)
867        Extra++;
868    }
869
870    SethiUllmanNumber += Extra;
871  }
872
873  return SethiUllmanNumber;
874}
875
876/// CalculatePriorities - Calculate priorities of all scheduling units.
877template<class SF>
878void TDRegReductionPriorityQueue<SF>::CalculatePriorities() {
879  SethiUllmanNumbers.assign(SUnits->size(), 0);
880
881  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
882    CalcNodePriority(&(*SUnits)[i]);
883}
884
885//===----------------------------------------------------------------------===//
886//                         Public Constructor Functions
887//===----------------------------------------------------------------------===//
888
889llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
890                                                    SelectionDAG *DAG,
891                                                    MachineBasicBlock *BB) {
892  return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), true,
893                               new BURegReductionPriorityQueue<bu_ls_rr_sort>());
894}
895
896llvm::ScheduleDAG* llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
897                                                    SelectionDAG *DAG,
898                                                    MachineBasicBlock *BB) {
899  return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), false,
900                               new TDRegReductionPriorityQueue<td_ls_rr_sort>());
901}
902
903