ScheduleDAGRRList.cpp revision c6deb3d44707de57e82e16642ab845bc8b9e9e01
1//===----- ScheduleDAGList.cpp - Reg pressure reduction list scheduler ----===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by Evan Cheng and is distributed under the
6// University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "sched"
19#include "llvm/CodeGen/ScheduleDAG.h"
20#include "llvm/CodeGen/SchedulerRegistry.h"
21#include "llvm/CodeGen/SSARegMap.h"
22#include "llvm/Target/MRegisterInfo.h"
23#include "llvm/Target/TargetData.h"
24#include "llvm/Target/TargetMachine.h"
25#include "llvm/Target/TargetInstrInfo.h"
26#include "llvm/Support/Debug.h"
27#include "llvm/Support/Compiler.h"
28#include "llvm/ADT/Statistic.h"
29#include <climits>
30#include <queue>
31#include "llvm/Support/CommandLine.h"
32using namespace llvm;
33
34static RegisterScheduler
35  burrListDAGScheduler("list-burr",
36                       "  Bottom-up register reduction list scheduling",
37                       createBURRListDAGScheduler);
38static RegisterScheduler
39  tdrListrDAGScheduler("list-tdrr",
40                       "  Top-down register reduction list scheduling",
41                       createTDRRListDAGScheduler);
42
43namespace {
44//===----------------------------------------------------------------------===//
45/// ScheduleDAGRRList - The actual register reduction list scheduler
46/// implementation.  This supports both top-down and bottom-up scheduling.
47///
48
49class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAG {
50private:
51  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
52  /// it is top-down.
53  bool isBottomUp;
54
55  /// AvailableQueue - The priority queue to use for the available SUnits.
56  ///
57  SchedulingPriorityQueue *AvailableQueue;
58
59public:
60  ScheduleDAGRRList(SelectionDAG &dag, MachineBasicBlock *bb,
61                  const TargetMachine &tm, bool isbottomup,
62                  SchedulingPriorityQueue *availqueue)
63    : ScheduleDAG(dag, bb, tm), isBottomUp(isbottomup),
64      AvailableQueue(availqueue) {
65    }
66
67  ~ScheduleDAGRRList() {
68    delete AvailableQueue;
69  }
70
71  void Schedule();
72
73private:
74  void ReleasePred(SUnit *PredSU, bool isChain, unsigned CurCycle);
75  void ReleaseSucc(SUnit *SuccSU, bool isChain, unsigned CurCycle);
76  void ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle);
77  void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
78  void ListScheduleTopDown();
79  void ListScheduleBottomUp();
80  void CommuteNodesToReducePressure();
81};
82}  // end anonymous namespace
83
84
85/// Schedule - Schedule the DAG using list scheduling.
86void ScheduleDAGRRList::Schedule() {
87  DOUT << "********** List Scheduling **********\n";
88
89  // Build scheduling units.
90  BuildSchedUnits();
91
92  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
93          SUnits[su].dumpAll(&DAG));
94  CalculateDepths();
95  CalculateHeights();
96
97  AvailableQueue->initNodes(SUnitMap, SUnits);
98
99  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
100  if (isBottomUp)
101    ListScheduleBottomUp();
102  else
103    ListScheduleTopDown();
104
105  AvailableQueue->releaseState();
106
107  CommuteNodesToReducePressure();
108
109  DOUT << "*** Final schedule ***\n";
110  DEBUG(dumpSchedule());
111  DOUT << "\n";
112
113  // Emit in scheduled order
114  EmitSchedule();
115}
116
117/// CommuteNodesToReducePressure - If a node is two-address and commutable, and
118/// it is not the last use of its first operand, add it to the CommuteSet if
119/// possible. It will be commuted when it is translated to a MI.
120void ScheduleDAGRRList::CommuteNodesToReducePressure() {
121  std::set<SUnit *> OperandSeen;
122  for (unsigned i = Sequence.size()-1; i != 0; --i) {  // Ignore first node.
123    SUnit *SU = Sequence[i];
124    if (!SU) continue;
125    if (SU->isCommutable) {
126      unsigned Opc = SU->Node->getTargetOpcode();
127      unsigned NumRes = CountResults(SU->Node);
128      unsigned NumOps = CountOperands(SU->Node);
129      for (unsigned j = 0; j != NumOps; ++j) {
130        if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) == -1)
131          continue;
132
133        SDNode *OpN = SU->Node->getOperand(j).Val;
134        SUnit *OpSU = SUnitMap[OpN];
135        if (OpSU && OperandSeen.count(OpSU) == 1) {
136          // Ok, so SU is not the last use of OpSU, but SU is two-address so
137          // it will clobber OpSU. Try to commute SU if no other source operands
138          // are live below.
139          bool DoCommute = true;
140          for (unsigned k = 0; k < NumOps; ++k) {
141            if (k != j) {
142              OpN = SU->Node->getOperand(k).Val;
143              OpSU = SUnitMap[OpN];
144              if (OpSU && OperandSeen.count(OpSU) == 1) {
145                DoCommute = false;
146                break;
147              }
148            }
149          }
150          if (DoCommute)
151            CommuteSet.insert(SU->Node);
152        }
153
154        // Only look at the first use&def node for now.
155        break;
156      }
157    }
158
159    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
160         I != E; ++I) {
161      if (!I->second)
162        OperandSeen.insert(I->first);
163    }
164  }
165}
166
167//===----------------------------------------------------------------------===//
168//  Bottom-Up Scheduling
169//===----------------------------------------------------------------------===//
170
171/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
172/// the Available queue is the count reaches zero. Also update its cycle bound.
173void ScheduleDAGRRList::ReleasePred(SUnit *PredSU, bool isChain,
174                                    unsigned CurCycle) {
175  // FIXME: the distance between two nodes is not always == the predecessor's
176  // latency. For example, the reader can very well read the register written
177  // by the predecessor later than the issue cycle. It also depends on the
178  // interrupt model (drain vs. freeze).
179  PredSU->CycleBound = std::max(PredSU->CycleBound, CurCycle + PredSU->Latency);
180
181  if (!isChain)
182    PredSU->NumSuccsLeft--;
183  else
184    PredSU->NumChainSuccsLeft--;
185
186#ifndef NDEBUG
187  if (PredSU->NumSuccsLeft < 0 || PredSU->NumChainSuccsLeft < 0) {
188    cerr << "*** List scheduling failed! ***\n";
189    PredSU->dump(&DAG);
190    cerr << " has been released too many times!\n";
191    assert(0);
192  }
193#endif
194
195  if ((PredSU->NumSuccsLeft + PredSU->NumChainSuccsLeft) == 0) {
196    // EntryToken has to go last!  Special case it here.
197    if (PredSU->Node->getOpcode() != ISD::EntryToken) {
198      PredSU->isAvailable = true;
199      AvailableQueue->push(PredSU);
200    }
201  }
202}
203
204/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
205/// count of its predecessors. If a predecessor pending count is zero, add it to
206/// the Available queue.
207void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
208  DOUT << "*** Scheduling [" << CurCycle << "]: ";
209  DEBUG(SU->dump(&DAG));
210  SU->Cycle = CurCycle;
211
212  AvailableQueue->ScheduledNode(SU);
213  Sequence.push_back(SU);
214
215  // Bottom up: release predecessors
216  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
217       I != E; ++I)
218    ReleasePred(I->first, I->second, CurCycle);
219  SU->isScheduled = true;
220}
221
222/// isReady - True if node's lower cycle bound is less or equal to the current
223/// scheduling cycle. Always true if all nodes have uniform latency 1.
224static inline bool isReady(SUnit *SU, unsigned CurCycle) {
225  return SU->CycleBound <= CurCycle;
226}
227
228/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
229/// schedulers.
230void ScheduleDAGRRList::ListScheduleBottomUp() {
231  unsigned CurCycle = 0;
232  // Add root to Available queue.
233  AvailableQueue->push(SUnitMap[DAG.getRoot().Val]);
234
235  // While Available queue is not empty, grab the node with the highest
236  // priority. If it is not ready put it back. Schedule the node.
237  std::vector<SUnit*> NotReady;
238  while (!AvailableQueue->empty()) {
239    SUnit *CurNode = AvailableQueue->pop();
240    while (CurNode && !isReady(CurNode, CurCycle)) {
241      NotReady.push_back(CurNode);
242      CurNode = AvailableQueue->pop();
243    }
244
245    // Add the nodes that aren't ready back onto the available list.
246    AvailableQueue->push_all(NotReady);
247    NotReady.clear();
248
249    if (CurNode != NULL)
250      ScheduleNodeBottomUp(CurNode, CurCycle);
251    CurCycle++;
252  }
253
254  // Add entry node last
255  if (DAG.getEntryNode().Val != DAG.getRoot().Val) {
256    SUnit *Entry = SUnitMap[DAG.getEntryNode().Val];
257    Sequence.push_back(Entry);
258  }
259
260  // Reverse the order if it is bottom up.
261  std::reverse(Sequence.begin(), Sequence.end());
262
263
264#ifndef NDEBUG
265  // Verify that all SUnits were scheduled.
266  bool AnyNotSched = false;
267  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
268    if (SUnits[i].NumSuccsLeft != 0 || SUnits[i].NumChainSuccsLeft != 0) {
269      if (!AnyNotSched)
270        cerr << "*** List scheduling failed! ***\n";
271      SUnits[i].dump(&DAG);
272      cerr << "has not been scheduled!\n";
273      AnyNotSched = true;
274    }
275  }
276  assert(!AnyNotSched);
277#endif
278}
279
280//===----------------------------------------------------------------------===//
281//  Top-Down Scheduling
282//===----------------------------------------------------------------------===//
283
284/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
285/// the PendingQueue if the count reaches zero.
286void ScheduleDAGRRList::ReleaseSucc(SUnit *SuccSU, bool isChain,
287                                    unsigned CurCycle) {
288  // FIXME: the distance between two nodes is not always == the predecessor's
289  // latency. For example, the reader can very well read the register written
290  // by the predecessor later than the issue cycle. It also depends on the
291  // interrupt model (drain vs. freeze).
292  SuccSU->CycleBound = std::max(SuccSU->CycleBound, CurCycle + SuccSU->Latency);
293
294  if (!isChain)
295    SuccSU->NumPredsLeft--;
296  else
297    SuccSU->NumChainPredsLeft--;
298
299#ifndef NDEBUG
300  if (SuccSU->NumPredsLeft < 0 || SuccSU->NumChainPredsLeft < 0) {
301    cerr << "*** List scheduling failed! ***\n";
302    SuccSU->dump(&DAG);
303    cerr << " has been released too many times!\n";
304    assert(0);
305  }
306#endif
307
308  if ((SuccSU->NumPredsLeft + SuccSU->NumChainPredsLeft) == 0) {
309    SuccSU->isAvailable = true;
310    AvailableQueue->push(SuccSU);
311  }
312}
313
314
315/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
316/// count of its successors. If a successor pending count is zero, add it to
317/// the Available queue.
318void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
319  DOUT << "*** Scheduling [" << CurCycle << "]: ";
320  DEBUG(SU->dump(&DAG));
321  SU->Cycle = CurCycle;
322
323  AvailableQueue->ScheduledNode(SU);
324  Sequence.push_back(SU);
325
326  // Top down: release successors
327  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
328       I != E; ++I)
329    ReleaseSucc(I->first, I->second, CurCycle);
330  SU->isScheduled = true;
331}
332
333void ScheduleDAGRRList::ListScheduleTopDown() {
334  unsigned CurCycle = 0;
335  SUnit *Entry = SUnitMap[DAG.getEntryNode().Val];
336
337  // All leaves to Available queue.
338  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
339    // It is available if it has no predecessors.
340    if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) {
341      AvailableQueue->push(&SUnits[i]);
342      SUnits[i].isAvailable = true;
343    }
344  }
345
346  // Emit the entry node first.
347  ScheduleNodeTopDown(Entry, CurCycle);
348  CurCycle++;
349
350  // While Available queue is not empty, grab the node with the highest
351  // priority. If it is not ready put it back. Schedule the node.
352  std::vector<SUnit*> NotReady;
353  while (!AvailableQueue->empty()) {
354    SUnit *CurNode = AvailableQueue->pop();
355    while (CurNode && !isReady(CurNode, CurCycle)) {
356      NotReady.push_back(CurNode);
357      CurNode = AvailableQueue->pop();
358    }
359
360    // Add the nodes that aren't ready back onto the available list.
361    AvailableQueue->push_all(NotReady);
362    NotReady.clear();
363
364    if (CurNode != NULL)
365      ScheduleNodeTopDown(CurNode, CurCycle);
366    CurCycle++;
367  }
368
369
370#ifndef NDEBUG
371  // Verify that all SUnits were scheduled.
372  bool AnyNotSched = false;
373  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
374    if (!SUnits[i].isScheduled) {
375      if (!AnyNotSched)
376        cerr << "*** List scheduling failed! ***\n";
377      SUnits[i].dump(&DAG);
378      cerr << "has not been scheduled!\n";
379      AnyNotSched = true;
380    }
381  }
382  assert(!AnyNotSched);
383#endif
384}
385
386
387
388//===----------------------------------------------------------------------===//
389//                RegReductionPriorityQueue Implementation
390//===----------------------------------------------------------------------===//
391//
392// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
393// to reduce register pressure.
394//
395namespace {
396  template<class SF>
397  class RegReductionPriorityQueue;
398
399  /// Sorting functions for the Available queue.
400  struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
401    RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
402    bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
403    bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
404
405    bool operator()(const SUnit* left, const SUnit* right) const;
406  };
407
408  struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
409    RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
410    td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
411    td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
412
413    bool operator()(const SUnit* left, const SUnit* right) const;
414  };
415}  // end anonymous namespace
416
417static inline bool isCopyFromLiveIn(const SUnit *SU) {
418  SDNode *N = SU->Node;
419  return N->getOpcode() == ISD::CopyFromReg &&
420    N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag;
421}
422
423namespace {
424  template<class SF>
425  class VISIBILITY_HIDDEN RegReductionPriorityQueue
426   : public SchedulingPriorityQueue {
427    std::priority_queue<SUnit*, std::vector<SUnit*>, SF> Queue;
428
429  public:
430    RegReductionPriorityQueue() :
431    Queue(SF(this)) {}
432
433    virtual void initNodes(DenseMap<SDNode*, SUnit*> &sumap,
434                           std::vector<SUnit> &sunits) {}
435    virtual void releaseState() {}
436
437    virtual unsigned getNodePriority(const SUnit *SU) const {
438      return 0;
439    }
440
441    bool empty() const { return Queue.empty(); }
442
443    void push(SUnit *U) {
444      Queue.push(U);
445    }
446    void push_all(const std::vector<SUnit *> &Nodes) {
447      for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
448        Queue.push(Nodes[i]);
449    }
450
451    SUnit *pop() {
452      if (empty()) return NULL;
453      SUnit *V = Queue.top();
454      Queue.pop();
455      return V;
456    }
457
458    virtual bool isDUOperand(const SUnit *SU1, const SUnit *SU2) {
459      return false;
460    }
461  };
462
463  template<class SF>
464  class VISIBILITY_HIDDEN BURegReductionPriorityQueue
465   : public RegReductionPriorityQueue<SF> {
466    // SUnitMap SDNode to SUnit mapping (n -> 1).
467    DenseMap<SDNode*, SUnit*> *SUnitMap;
468
469    // SUnits - The SUnits for the current graph.
470    const std::vector<SUnit> *SUnits;
471
472    // SethiUllmanNumbers - The SethiUllman number for each node.
473    std::vector<unsigned> SethiUllmanNumbers;
474
475    const TargetInstrInfo *TII;
476  public:
477    BURegReductionPriorityQueue(const TargetInstrInfo *tii)
478      : TII(tii) {}
479
480    void initNodes(DenseMap<SDNode*, SUnit*> &sumap,
481                   std::vector<SUnit> &sunits) {
482      SUnitMap = &sumap;
483      SUnits = &sunits;
484      // Add pseudo dependency edges for two-address nodes.
485      AddPseudoTwoAddrDeps();
486      // Calculate node priorities.
487      CalculateSethiUllmanNumbers();
488    }
489
490    void releaseState() {
491      SUnits = 0;
492      SethiUllmanNumbers.clear();
493    }
494
495    unsigned getNodePriority(const SUnit *SU) const {
496      assert(SU->NodeNum < SethiUllmanNumbers.size());
497      unsigned Opc = SU->Node->getOpcode();
498      if (Opc == ISD::CopyFromReg && !isCopyFromLiveIn(SU))
499        // CopyFromReg should be close to its def because it restricts
500        // allocation choices. But if it is a livein then perhaps we want it
501        // closer to its uses so it can be coalesced.
502        return 0xffff;
503      else if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
504        // CopyToReg should be close to its uses to facilitate coalescing and
505        // avoid spilling.
506        return 0;
507      else if (SU->NumSuccs == 0)
508        // If SU does not have a use, i.e. it doesn't produce a value that would
509        // be consumed (e.g. store), then it terminates a chain of computation.
510        // Give it a large SethiUllman number so it will be scheduled right
511        // before its predecessors that it doesn't lengthen their live ranges.
512        return 0xffff;
513      else if (SU->NumPreds == 0)
514        // If SU does not have a def, schedule it close to its uses because it
515        // does not lengthen any live ranges.
516        return 0;
517      else
518        return SethiUllmanNumbers[SU->NodeNum];
519    }
520
521    bool isDUOperand(const SUnit *SU1, const SUnit *SU2) {
522      unsigned Opc = SU1->Node->getTargetOpcode();
523      unsigned NumRes = ScheduleDAG::CountResults(SU1->Node);
524      unsigned NumOps = ScheduleDAG::CountOperands(SU1->Node);
525      for (unsigned i = 0; i != NumOps; ++i) {
526        if (TII->getOperandConstraint(Opc, i+NumRes, TOI::TIED_TO) == -1)
527          continue;
528        if (SU1->Node->getOperand(i).isOperand(SU2->Node))
529          return true;
530      }
531      return false;
532    }
533  private:
534    bool canClobber(SUnit *SU, SUnit *Op);
535    void AddPseudoTwoAddrDeps();
536    void CalculateSethiUllmanNumbers();
537    unsigned CalcNodeSethiUllmanNumber(const SUnit *SU);
538  };
539
540
541  template<class SF>
542  class TDRegReductionPriorityQueue : public RegReductionPriorityQueue<SF> {
543    // SUnitMap SDNode to SUnit mapping (n -> 1).
544    DenseMap<SDNode*, SUnit*> *SUnitMap;
545
546    // SUnits - The SUnits for the current graph.
547    const std::vector<SUnit> *SUnits;
548
549    // SethiUllmanNumbers - The SethiUllman number for each node.
550    std::vector<unsigned> SethiUllmanNumbers;
551
552  public:
553    TDRegReductionPriorityQueue() {}
554
555    void initNodes(DenseMap<SDNode*, SUnit*> &sumap,
556                   std::vector<SUnit> &sunits) {
557      SUnitMap = &sumap;
558      SUnits = &sunits;
559      // Calculate node priorities.
560      CalculateSethiUllmanNumbers();
561    }
562
563    void releaseState() {
564      SUnits = 0;
565      SethiUllmanNumbers.clear();
566    }
567
568    unsigned getNodePriority(const SUnit *SU) const {
569      assert(SU->NodeNum < SethiUllmanNumbers.size());
570      return SethiUllmanNumbers[SU->NodeNum];
571    }
572
573  private:
574    void CalculateSethiUllmanNumbers();
575    unsigned CalcNodeSethiUllmanNumber(const SUnit *SU);
576  };
577}
578
579/// closestSucc - Returns the scheduled cycle of the successor which is
580/// closet to the current cycle.
581static unsigned closestSucc(const SUnit *SU) {
582  unsigned MaxCycle = 0;
583  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
584       I != E; ++I) {
585    unsigned Cycle = I->first->Cycle;
586    // If there are bunch of CopyToRegs stacked up, they should be considered
587    // to be at the same position.
588    if (I->first->Node->getOpcode() == ISD::CopyToReg)
589      Cycle = closestSucc(I->first)+1;
590    if (Cycle > MaxCycle)
591      MaxCycle = Cycle;
592  }
593  return MaxCycle;
594}
595
596/// calcMaxScratches - Returns an cost estimate of the worse case requirement
597/// for scratch registers. Live-in operands and live-out results don't count
598/// since they are "fixed".
599static unsigned calcMaxScratches(const SUnit *SU) {
600  unsigned Scratches = 0;
601  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
602       I != E; ++I) {
603    if (I->second) continue;  // ignore chain preds
604    if (I->first->Node->getOpcode() != ISD::CopyFromReg)
605      Scratches++;
606  }
607  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
608       I != E; ++I) {
609    if (I->second) continue;  // ignore chain succs
610    if (I->first->Node->getOpcode() != ISD::CopyToReg)
611      Scratches += 10;
612  }
613  return Scratches;
614}
615
616// Bottom up
617bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
618  bool LIsTarget = left->Node->isTargetOpcode();
619  bool RIsTarget = right->Node->isTargetOpcode();
620
621  // Special tie breaker: if two nodes share a operand, the one that use it
622  // as a def&use operand is preferred.
623  if (LIsTarget && RIsTarget) {
624    if (left->isTwoAddress && !right->isTwoAddress)
625      if (SPQ->isDUOperand(left, right))
626        return false;
627    if (!left->isTwoAddress && right->isTwoAddress)
628      if (SPQ->isDUOperand(right, left))
629        return true;
630  }
631
632  unsigned LPriority = SPQ->getNodePriority(left);
633  unsigned RPriority = SPQ->getNodePriority(right);
634  if (LPriority > RPriority)
635    return true;
636  else if (LPriority == RPriority) {
637    // Try schedule def + use closer whne Sethi-Ullman numbers are the same.
638    // e.g.
639    // t1 = op t2, c1
640    // t3 = op t4, c2
641    //
642    // and the following instructions are both ready.
643    // t2 = op c3
644    // t4 = op c4
645    //
646    // Then schedule t2 = op first.
647    // i.e.
648    // t4 = op c4
649    // t2 = op c3
650    // t1 = op t2, c1
651    // t3 = op t4, c2
652    //
653    // This creates more short live intervals.
654    unsigned LDist = closestSucc(left);
655    unsigned RDist = closestSucc(right);
656    if (LDist < RDist)
657      return true;
658    else if (LDist == RDist) {
659      // Intuitively, it's good to push down instructions whose results are
660      // liveout so their long live ranges won't conflict with other values
661      // which are needed inside the BB. Further prioritize liveout instructions
662      // by the number of operands which are calculated within the BB.
663      unsigned LScratch = calcMaxScratches(left);
664      unsigned RScratch = calcMaxScratches(right);
665      if (LScratch > RScratch)
666        return true;
667      else if (LScratch == RScratch)
668        if (left->Height > right->Height)
669          return true;
670        else if (left->Height == right->Height)
671          if (left->Depth < right->Depth)
672            return true;
673          else if (left->Depth == right->Depth)
674            if (left->CycleBound > right->CycleBound)
675              return true;
676    }
677  }
678  return false;
679}
680
681// FIXME: This is probably too slow!
682static void isReachable(SUnit *SU, SUnit *TargetSU,
683                        std::set<SUnit *> &Visited, bool &Reached) {
684  if (Reached) return;
685  if (SU == TargetSU) {
686    Reached = true;
687    return;
688  }
689  if (!Visited.insert(SU).second) return;
690
691  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E;
692       ++I)
693    isReachable(I->first, TargetSU, Visited, Reached);
694}
695
696static bool isReachable(SUnit *SU, SUnit *TargetSU) {
697  std::set<SUnit *> Visited;
698  bool Reached = false;
699  isReachable(SU, TargetSU, Visited, Reached);
700  return Reached;
701}
702
703template<class SF>
704bool BURegReductionPriorityQueue<SF>::canClobber(SUnit *SU, SUnit *Op) {
705  if (SU->isTwoAddress) {
706    unsigned Opc = SU->Node->getTargetOpcode();
707    unsigned NumRes = ScheduleDAG::CountResults(SU->Node);
708    unsigned NumOps = ScheduleDAG::CountOperands(SU->Node);
709    for (unsigned i = 0; i != NumOps; ++i) {
710      if (TII->getOperandConstraint(Opc, i+NumRes, TOI::TIED_TO) != -1) {
711        SDNode *DU = SU->Node->getOperand(i).Val;
712        if (Op == (*SUnitMap)[DU])
713          return true;
714      }
715    }
716  }
717  return false;
718}
719
720
721/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
722/// it as a def&use operand. Add a pseudo control edge from it to the other
723/// node (if it won't create a cycle) so the two-address one will be scheduled
724/// first (lower in the schedule).
725template<class SF>
726void BURegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
727  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
728    SUnit *SU = (SUnit *)&((*SUnits)[i]);
729    if (!SU->isTwoAddress)
730      continue;
731
732    SDNode *Node = SU->Node;
733    if (!Node->isTargetOpcode())
734      continue;
735
736    unsigned Opc = Node->getTargetOpcode();
737    unsigned NumRes = ScheduleDAG::CountResults(Node);
738    unsigned NumOps = ScheduleDAG::CountOperands(Node);
739    for (unsigned j = 0; j != NumOps; ++j) {
740      if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) != -1) {
741        SDNode *DU = SU->Node->getOperand(j).Val;
742        SUnit *DUSU = (*SUnitMap)[DU];
743        if (!DUSU) continue;
744        for (SUnit::succ_iterator I = DUSU->Succs.begin(),E = DUSU->Succs.end();
745             I != E; ++I) {
746          if (I->second) continue;
747          SUnit *SuccSU = I->first;
748          if (SuccSU != SU &&
749              (!canClobber(SuccSU, DUSU) ||
750               (!SU->isCommutable && SuccSU->isCommutable))){
751            if (SuccSU->Depth == SU->Depth && !isReachable(SuccSU, SU)) {
752              DOUT << "Adding an edge from SU # " << SU->NodeNum
753                   << " to SU #" << SuccSU->NodeNum << "\n";
754              if (SU->addPred(SuccSU, true))
755                SU->NumChainPredsLeft++;
756              if (SuccSU->addSucc(SU, true))
757                SuccSU->NumChainSuccsLeft++;
758            }
759          }
760        }
761      }
762    }
763  }
764}
765
766/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number.
767/// Smaller number is the higher priority.
768template<class SF>
769unsigned BURegReductionPriorityQueue<SF>::
770CalcNodeSethiUllmanNumber(const SUnit *SU) {
771  unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum];
772  if (SethiUllmanNumber != 0)
773    return SethiUllmanNumber;
774
775  unsigned Extra = 0;
776  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
777       I != E; ++I) {
778    if (I->second) continue;  // ignore chain preds
779    SUnit *PredSU = I->first;
780    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU);
781    if (PredSethiUllman > SethiUllmanNumber) {
782      SethiUllmanNumber = PredSethiUllman;
783      Extra = 0;
784    } else if (PredSethiUllman == SethiUllmanNumber && !I->second)
785      Extra++;
786  }
787
788  SethiUllmanNumber += Extra;
789
790  if (SethiUllmanNumber == 0)
791    SethiUllmanNumber = 1;
792
793  return SethiUllmanNumber;
794}
795
796/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
797/// scheduling units.
798template<class SF>
799void BURegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
800  SethiUllmanNumbers.assign(SUnits->size(), 0);
801
802  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
803    CalcNodeSethiUllmanNumber(&(*SUnits)[i]);
804}
805
806static unsigned SumOfUnscheduledPredsOfSuccs(const SUnit *SU) {
807  unsigned Sum = 0;
808  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
809       I != E; ++I) {
810    SUnit *SuccSU = I->first;
811    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
812         EE = SuccSU->Preds.end(); II != EE; ++II) {
813      SUnit *PredSU = II->first;
814      if (!PredSU->isScheduled)
815        Sum++;
816    }
817  }
818
819  return Sum;
820}
821
822
823// Top down
824bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
825  unsigned LPriority = SPQ->getNodePriority(left);
826  unsigned RPriority = SPQ->getNodePriority(right);
827  bool LIsTarget = left->Node->isTargetOpcode();
828  bool RIsTarget = right->Node->isTargetOpcode();
829  bool LIsFloater = LIsTarget && left->NumPreds == 0;
830  bool RIsFloater = RIsTarget && right->NumPreds == 0;
831  unsigned LBonus = (SumOfUnscheduledPredsOfSuccs(left) == 1) ? 2 : 0;
832  unsigned RBonus = (SumOfUnscheduledPredsOfSuccs(right) == 1) ? 2 : 0;
833
834  if (left->NumSuccs == 0 && right->NumSuccs != 0)
835    return false;
836  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
837    return true;
838
839  // Special tie breaker: if two nodes share a operand, the one that use it
840  // as a def&use operand is preferred.
841  if (LIsTarget && RIsTarget) {
842    if (left->isTwoAddress && !right->isTwoAddress) {
843      SDNode *DUNode = left->Node->getOperand(0).Val;
844      if (DUNode->isOperand(right->Node))
845        RBonus += 2;
846    }
847    if (!left->isTwoAddress && right->isTwoAddress) {
848      SDNode *DUNode = right->Node->getOperand(0).Val;
849      if (DUNode->isOperand(left->Node))
850        LBonus += 2;
851    }
852  }
853  if (LIsFloater)
854    LBonus -= 2;
855  if (RIsFloater)
856    RBonus -= 2;
857  if (left->NumSuccs == 1)
858    LBonus += 2;
859  if (right->NumSuccs == 1)
860    RBonus += 2;
861
862  if (LPriority+LBonus < RPriority+RBonus)
863    return true;
864  else if (LPriority == RPriority)
865    if (left->Depth < right->Depth)
866      return true;
867    else if (left->Depth == right->Depth)
868      if (left->NumSuccsLeft > right->NumSuccsLeft)
869        return true;
870      else if (left->NumSuccsLeft == right->NumSuccsLeft)
871        if (left->CycleBound > right->CycleBound)
872          return true;
873  return false;
874}
875
876/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number.
877/// Smaller number is the higher priority.
878template<class SF>
879unsigned TDRegReductionPriorityQueue<SF>::
880CalcNodeSethiUllmanNumber(const SUnit *SU) {
881  unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum];
882  if (SethiUllmanNumber != 0)
883    return SethiUllmanNumber;
884
885  unsigned Opc = SU->Node->getOpcode();
886  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
887    SethiUllmanNumber = 0xffff;
888  else if (SU->NumSuccsLeft == 0)
889    // If SU does not have a use, i.e. it doesn't produce a value that would
890    // be consumed (e.g. store), then it terminates a chain of computation.
891    // Give it a small SethiUllman number so it will be scheduled right before
892    // its predecessors that it doesn't lengthen their live ranges.
893    SethiUllmanNumber = 0;
894  else if (SU->NumPredsLeft == 0 &&
895           (Opc != ISD::CopyFromReg || isCopyFromLiveIn(SU)))
896    SethiUllmanNumber = 0xffff;
897  else {
898    int Extra = 0;
899    for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
900         I != E; ++I) {
901      if (I->second) continue;  // ignore chain preds
902      SUnit *PredSU = I->first;
903      unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU);
904      if (PredSethiUllman > SethiUllmanNumber) {
905        SethiUllmanNumber = PredSethiUllman;
906        Extra = 0;
907      } else if (PredSethiUllman == SethiUllmanNumber && !I->second)
908        Extra++;
909    }
910
911    SethiUllmanNumber += Extra;
912  }
913
914  return SethiUllmanNumber;
915}
916
917/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
918/// scheduling units.
919template<class SF>
920void TDRegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
921  SethiUllmanNumbers.assign(SUnits->size(), 0);
922
923  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
924    CalcNodeSethiUllmanNumber(&(*SUnits)[i]);
925}
926
927//===----------------------------------------------------------------------===//
928//                         Public Constructor Functions
929//===----------------------------------------------------------------------===//
930
931llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
932                                                    SelectionDAG *DAG,
933                                                    MachineBasicBlock *BB) {
934  const TargetInstrInfo *TII = DAG->getTarget().getInstrInfo();
935  return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), true,
936                           new BURegReductionPriorityQueue<bu_ls_rr_sort>(TII));
937}
938
939llvm::ScheduleDAG* llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
940                                                    SelectionDAG *DAG,
941                                                    MachineBasicBlock *BB) {
942  return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), false,
943                              new TDRegReductionPriorityQueue<td_ls_rr_sort>());
944}
945
946