ScheduleDAGRRList.cpp revision 713a98dee8ab07a3066d1707a07648d27dd0c19c
1//===----- ScheduleDAGList.cpp - Reg pressure reduction list scheduler ----===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by Evan Cheng and is distributed under the
6// University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "pre-RA-sched"
19#include "llvm/CodeGen/ScheduleDAG.h"
20#include "llvm/CodeGen/SchedulerRegistry.h"
21#include "llvm/CodeGen/SSARegMap.h"
22#include "llvm/Target/MRegisterInfo.h"
23#include "llvm/Target/TargetData.h"
24#include "llvm/Target/TargetMachine.h"
25#include "llvm/Target/TargetInstrInfo.h"
26#include "llvm/Support/Debug.h"
27#include "llvm/Support/Compiler.h"
28#include "llvm/ADT/Statistic.h"
29#include <climits>
30#include <queue>
31#include "llvm/Support/CommandLine.h"
32using namespace llvm;
33
34static RegisterScheduler
35  burrListDAGScheduler("list-burr",
36                       "  Bottom-up register reduction list scheduling",
37                       createBURRListDAGScheduler);
38static RegisterScheduler
39  tdrListrDAGScheduler("list-tdrr",
40                       "  Top-down register reduction list scheduling",
41                       createTDRRListDAGScheduler);
42
43namespace {
44//===----------------------------------------------------------------------===//
45/// ScheduleDAGRRList - The actual register reduction list scheduler
46/// implementation.  This supports both top-down and bottom-up scheduling.
47///
48class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAG {
49private:
50  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
51  /// it is top-down.
52  bool isBottomUp;
53
54  /// AvailableQueue - The priority queue to use for the available SUnits.
55  ///
56  SchedulingPriorityQueue *AvailableQueue;
57
58public:
59  ScheduleDAGRRList(SelectionDAG &dag, MachineBasicBlock *bb,
60                  const TargetMachine &tm, bool isbottomup,
61                  SchedulingPriorityQueue *availqueue)
62    : ScheduleDAG(dag, bb, tm), isBottomUp(isbottomup),
63      AvailableQueue(availqueue) {
64    }
65
66  ~ScheduleDAGRRList() {
67    delete AvailableQueue;
68  }
69
70  void Schedule();
71
72private:
73  void ReleasePred(SUnit *PredSU, bool isChain, unsigned CurCycle);
74  void ReleaseSucc(SUnit *SuccSU, bool isChain, unsigned CurCycle);
75  void ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle);
76  void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
77  void ListScheduleTopDown();
78  void ListScheduleBottomUp();
79  void CommuteNodesToReducePressure();
80};
81}  // end anonymous namespace
82
83
84/// Schedule - Schedule the DAG using list scheduling.
85void ScheduleDAGRRList::Schedule() {
86  DOUT << "********** List Scheduling **********\n";
87
88  // Build scheduling units.
89  BuildSchedUnits();
90
91  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
92          SUnits[su].dumpAll(&DAG));
93  CalculateDepths();
94  CalculateHeights();
95
96  AvailableQueue->initNodes(SUnitMap, SUnits);
97
98  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
99  if (isBottomUp)
100    ListScheduleBottomUp();
101  else
102    ListScheduleTopDown();
103
104  AvailableQueue->releaseState();
105
106  CommuteNodesToReducePressure();
107
108  DOUT << "*** Final schedule ***\n";
109  DEBUG(dumpSchedule());
110  DOUT << "\n";
111
112  // Emit in scheduled order
113  EmitSchedule();
114}
115
116/// CommuteNodesToReducePressure - If a node is two-address and commutable, and
117/// it is not the last use of its first operand, add it to the CommuteSet if
118/// possible. It will be commuted when it is translated to a MI.
119void ScheduleDAGRRList::CommuteNodesToReducePressure() {
120  SmallPtrSet<SUnit*, 4> OperandSeen;
121  for (unsigned i = Sequence.size()-1; i != 0; --i) {  // Ignore first node.
122    SUnit *SU = Sequence[i];
123    if (!SU) continue;
124    if (SU->isCommutable) {
125      unsigned Opc = SU->Node->getTargetOpcode();
126      unsigned NumRes = TII->getNumDefs(Opc);
127      unsigned NumOps = CountOperands(SU->Node);
128      for (unsigned j = 0; j != NumOps; ++j) {
129        if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) == -1)
130          continue;
131
132        SDNode *OpN = SU->Node->getOperand(j).Val;
133        SUnit *OpSU = SUnitMap[OpN];
134        if (OpSU && OperandSeen.count(OpSU) == 1) {
135          // Ok, so SU is not the last use of OpSU, but SU is two-address so
136          // it will clobber OpSU. Try to commute SU if no other source operands
137          // are live below.
138          bool DoCommute = true;
139          for (unsigned k = 0; k < NumOps; ++k) {
140            if (k != j) {
141              OpN = SU->Node->getOperand(k).Val;
142              OpSU = SUnitMap[OpN];
143              if (OpSU && OperandSeen.count(OpSU) == 1) {
144                DoCommute = false;
145                break;
146              }
147            }
148          }
149          if (DoCommute)
150            CommuteSet.insert(SU->Node);
151        }
152
153        // Only look at the first use&def node for now.
154        break;
155      }
156    }
157
158    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
159         I != E; ++I) {
160      if (!I->isCtrl)
161        OperandSeen.insert(I->Dep);
162    }
163  }
164}
165
166//===----------------------------------------------------------------------===//
167//  Bottom-Up Scheduling
168//===----------------------------------------------------------------------===//
169
170/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
171/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
172void ScheduleDAGRRList::ReleasePred(SUnit *PredSU, bool isChain,
173                                    unsigned CurCycle) {
174  // FIXME: the distance between two nodes is not always == the predecessor's
175  // latency. For example, the reader can very well read the register written
176  // by the predecessor later than the issue cycle. It also depends on the
177  // interrupt model (drain vs. freeze).
178  PredSU->CycleBound = std::max(PredSU->CycleBound, CurCycle + PredSU->Latency);
179
180  if (!isChain)
181    PredSU->NumSuccsLeft--;
182  else
183    PredSU->NumChainSuccsLeft--;
184
185#ifndef NDEBUG
186  if (PredSU->NumSuccsLeft < 0 || PredSU->NumChainSuccsLeft < 0) {
187    cerr << "*** List scheduling failed! ***\n";
188    PredSU->dump(&DAG);
189    cerr << " has been released too many times!\n";
190    assert(0);
191  }
192#endif
193
194  if ((PredSU->NumSuccsLeft + PredSU->NumChainSuccsLeft) == 0) {
195    // EntryToken has to go last!  Special case it here.
196    if (PredSU->Node->getOpcode() != ISD::EntryToken) {
197      PredSU->isAvailable = true;
198      AvailableQueue->push(PredSU);
199    }
200  }
201}
202
203/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
204/// count of its predecessors. If a predecessor pending count is zero, add it to
205/// the Available queue.
206void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
207  DOUT << "*** Scheduling [" << CurCycle << "]: ";
208  DEBUG(SU->dump(&DAG));
209  SU->Cycle = CurCycle;
210
211  AvailableQueue->ScheduledNode(SU);
212  Sequence.push_back(SU);
213
214  // Bottom up: release predecessors
215  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
216       I != E; ++I)
217    ReleasePred(I->Dep, I->isCtrl, CurCycle);
218  SU->isScheduled = true;
219}
220
221/// isReady - True if node's lower cycle bound is less or equal to the current
222/// scheduling cycle. Always true if all nodes have uniform latency 1.
223static inline bool isReady(SUnit *SU, unsigned CurCycle) {
224  return SU->CycleBound <= CurCycle;
225}
226
227/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
228/// schedulers.
229void ScheduleDAGRRList::ListScheduleBottomUp() {
230  unsigned CurCycle = 0;
231  // Add root to Available queue.
232  AvailableQueue->push(SUnitMap[DAG.getRoot().Val]);
233
234  // While Available queue is not empty, grab the node with the highest
235  // priority. If it is not ready put it back.  Schedule the node.
236  std::vector<SUnit*> NotReady;
237  while (!AvailableQueue->empty()) {
238    SUnit *CurNode = AvailableQueue->pop();
239    while (CurNode && !isReady(CurNode, CurCycle)) {
240      NotReady.push_back(CurNode);
241      CurNode = AvailableQueue->pop();
242    }
243
244    // Add the nodes that aren't ready back onto the available list.
245    AvailableQueue->push_all(NotReady);
246    NotReady.clear();
247
248    if (CurNode != NULL)
249      ScheduleNodeBottomUp(CurNode, CurCycle);
250    CurCycle++;
251  }
252
253  // Add entry node last
254  if (DAG.getEntryNode().Val != DAG.getRoot().Val) {
255    SUnit *Entry = SUnitMap[DAG.getEntryNode().Val];
256    Sequence.push_back(Entry);
257  }
258
259  // Reverse the order if it is bottom up.
260  std::reverse(Sequence.begin(), Sequence.end());
261
262
263#ifndef NDEBUG
264  // Verify that all SUnits were scheduled.
265  bool AnyNotSched = false;
266  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
267    if (SUnits[i].NumSuccsLeft != 0 || SUnits[i].NumChainSuccsLeft != 0) {
268      if (!AnyNotSched)
269        cerr << "*** List scheduling failed! ***\n";
270      SUnits[i].dump(&DAG);
271      cerr << "has not been scheduled!\n";
272      AnyNotSched = true;
273    }
274  }
275  assert(!AnyNotSched);
276#endif
277}
278
279//===----------------------------------------------------------------------===//
280//  Top-Down Scheduling
281//===----------------------------------------------------------------------===//
282
283/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
284/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
285void ScheduleDAGRRList::ReleaseSucc(SUnit *SuccSU, bool isChain,
286                                    unsigned CurCycle) {
287  // FIXME: the distance between two nodes is not always == the predecessor's
288  // latency. For example, the reader can very well read the register written
289  // by the predecessor later than the issue cycle. It also depends on the
290  // interrupt model (drain vs. freeze).
291  SuccSU->CycleBound = std::max(SuccSU->CycleBound, CurCycle + SuccSU->Latency);
292
293  if (!isChain)
294    SuccSU->NumPredsLeft--;
295  else
296    SuccSU->NumChainPredsLeft--;
297
298#ifndef NDEBUG
299  if (SuccSU->NumPredsLeft < 0 || SuccSU->NumChainPredsLeft < 0) {
300    cerr << "*** List scheduling failed! ***\n";
301    SuccSU->dump(&DAG);
302    cerr << " has been released too many times!\n";
303    assert(0);
304  }
305#endif
306
307  if ((SuccSU->NumPredsLeft + SuccSU->NumChainPredsLeft) == 0) {
308    SuccSU->isAvailable = true;
309    AvailableQueue->push(SuccSU);
310  }
311}
312
313
314/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
315/// count of its successors. If a successor pending count is zero, add it to
316/// the Available queue.
317void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
318  DOUT << "*** Scheduling [" << CurCycle << "]: ";
319  DEBUG(SU->dump(&DAG));
320  SU->Cycle = CurCycle;
321
322  AvailableQueue->ScheduledNode(SU);
323  Sequence.push_back(SU);
324
325  // Top down: release successors
326  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
327       I != E; ++I)
328    ReleaseSucc(I->Dep, I->isCtrl, CurCycle);
329  SU->isScheduled = true;
330}
331
332/// ListScheduleTopDown - The main loop of list scheduling for top-down
333/// schedulers.
334void ScheduleDAGRRList::ListScheduleTopDown() {
335  unsigned CurCycle = 0;
336  SUnit *Entry = SUnitMap[DAG.getEntryNode().Val];
337
338  // All leaves to Available queue.
339  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
340    // It is available if it has no predecessors.
341    if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) {
342      AvailableQueue->push(&SUnits[i]);
343      SUnits[i].isAvailable = true;
344    }
345  }
346
347  // Emit the entry node first.
348  ScheduleNodeTopDown(Entry, CurCycle);
349  CurCycle++;
350
351  // While Available queue is not empty, grab the node with the highest
352  // priority. If it is not ready put it back.  Schedule the node.
353  std::vector<SUnit*> NotReady;
354  while (!AvailableQueue->empty()) {
355    SUnit *CurNode = AvailableQueue->pop();
356    while (CurNode && !isReady(CurNode, CurCycle)) {
357      NotReady.push_back(CurNode);
358      CurNode = AvailableQueue->pop();
359    }
360
361    // Add the nodes that aren't ready back onto the available list.
362    AvailableQueue->push_all(NotReady);
363    NotReady.clear();
364
365    if (CurNode != NULL)
366      ScheduleNodeTopDown(CurNode, CurCycle);
367    CurCycle++;
368  }
369
370
371#ifndef NDEBUG
372  // Verify that all SUnits were scheduled.
373  bool AnyNotSched = false;
374  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
375    if (!SUnits[i].isScheduled) {
376      if (!AnyNotSched)
377        cerr << "*** List scheduling failed! ***\n";
378      SUnits[i].dump(&DAG);
379      cerr << "has not been scheduled!\n";
380      AnyNotSched = true;
381    }
382  }
383  assert(!AnyNotSched);
384#endif
385}
386
387
388
389//===----------------------------------------------------------------------===//
390//                RegReductionPriorityQueue Implementation
391//===----------------------------------------------------------------------===//
392//
393// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
394// to reduce register pressure.
395//
396namespace {
397  template<class SF>
398  class RegReductionPriorityQueue;
399
400  /// Sorting functions for the Available queue.
401  struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
402    RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
403    bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
404    bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
405
406    bool operator()(const SUnit* left, const SUnit* right) const;
407  };
408
409  struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
410    RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
411    td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
412    td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
413
414    bool operator()(const SUnit* left, const SUnit* right) const;
415  };
416}  // end anonymous namespace
417
418static inline bool isCopyFromLiveIn(const SUnit *SU) {
419  SDNode *N = SU->Node;
420  return N->getOpcode() == ISD::CopyFromReg &&
421    N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag;
422}
423
424namespace {
425  template<class SF>
426  class VISIBILITY_HIDDEN RegReductionPriorityQueue
427   : public SchedulingPriorityQueue {
428    std::priority_queue<SUnit*, std::vector<SUnit*>, SF> Queue;
429
430  public:
431    RegReductionPriorityQueue() :
432    Queue(SF(this)) {}
433
434    virtual void initNodes(DenseMap<SDNode*, SUnit*> &sumap,
435                           std::vector<SUnit> &sunits) {}
436    virtual void releaseState() {}
437
438    virtual unsigned getNodePriority(const SUnit *SU) const {
439      return 0;
440    }
441
442    bool empty() const { return Queue.empty(); }
443
444    void push(SUnit *U) {
445      Queue.push(U);
446    }
447    void push_all(const std::vector<SUnit *> &Nodes) {
448      for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
449        Queue.push(Nodes[i]);
450    }
451
452    SUnit *pop() {
453      if (empty()) return NULL;
454      SUnit *V = Queue.top();
455      Queue.pop();
456      return V;
457    }
458
459    virtual bool isDUOperand(const SUnit *SU1, const SUnit *SU2) {
460      return false;
461    }
462  };
463
464  template<class SF>
465  class VISIBILITY_HIDDEN BURegReductionPriorityQueue
466   : public RegReductionPriorityQueue<SF> {
467    // SUnitMap SDNode to SUnit mapping (n -> 1).
468    DenseMap<SDNode*, SUnit*> *SUnitMap;
469
470    // SUnits - The SUnits for the current graph.
471    const std::vector<SUnit> *SUnits;
472
473    // SethiUllmanNumbers - The SethiUllman number for each node.
474    std::vector<unsigned> SethiUllmanNumbers;
475
476    const TargetInstrInfo *TII;
477  public:
478    explicit BURegReductionPriorityQueue(const TargetInstrInfo *tii)
479      : TII(tii) {}
480
481    void initNodes(DenseMap<SDNode*, SUnit*> &sumap,
482                   std::vector<SUnit> &sunits) {
483      SUnitMap = &sumap;
484      SUnits = &sunits;
485      // Add pseudo dependency edges for two-address nodes.
486      AddPseudoTwoAddrDeps();
487      // Calculate node priorities.
488      CalculateSethiUllmanNumbers();
489    }
490
491    void releaseState() {
492      SUnits = 0;
493      SethiUllmanNumbers.clear();
494    }
495
496    unsigned getNodePriority(const SUnit *SU) const {
497      assert(SU->NodeNum < SethiUllmanNumbers.size());
498      unsigned Opc = SU->Node->getOpcode();
499      if (Opc == ISD::CopyFromReg && !isCopyFromLiveIn(SU))
500        // CopyFromReg should be close to its def because it restricts
501        // allocation choices. But if it is a livein then perhaps we want it
502        // closer to its uses so it can be coalesced.
503        return 0xffff;
504      else if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
505        // CopyToReg should be close to its uses to facilitate coalescing and
506        // avoid spilling.
507        return 0;
508      else if (SU->NumSuccs == 0)
509        // If SU does not have a use, i.e. it doesn't produce a value that would
510        // be consumed (e.g. store), then it terminates a chain of computation.
511        // Give it a large SethiUllman number so it will be scheduled right
512        // before its predecessors that it doesn't lengthen their live ranges.
513        return 0xffff;
514      else if (SU->NumPreds == 0)
515        // If SU does not have a def, schedule it close to its uses because it
516        // does not lengthen any live ranges.
517        return 0;
518      else
519        return SethiUllmanNumbers[SU->NodeNum];
520    }
521
522    bool isDUOperand(const SUnit *SU1, const SUnit *SU2) {
523      unsigned Opc = SU1->Node->getTargetOpcode();
524      unsigned NumRes = TII->getNumDefs(Opc);
525      unsigned NumOps = ScheduleDAG::CountOperands(SU1->Node);
526      for (unsigned i = 0; i != NumOps; ++i) {
527        if (TII->getOperandConstraint(Opc, i+NumRes, TOI::TIED_TO) == -1)
528          continue;
529        if (SU1->Node->getOperand(i).isOperand(SU2->Node))
530          return true;
531      }
532      return false;
533    }
534  private:
535    bool canClobber(SUnit *SU, SUnit *Op);
536    void AddPseudoTwoAddrDeps();
537    void CalculateSethiUllmanNumbers();
538    unsigned CalcNodeSethiUllmanNumber(const SUnit *SU);
539  };
540
541
542  template<class SF>
543  class VISIBILITY_HIDDEN TDRegReductionPriorityQueue
544   : public RegReductionPriorityQueue<SF> {
545    // SUnitMap SDNode to SUnit mapping (n -> 1).
546    DenseMap<SDNode*, SUnit*> *SUnitMap;
547
548    // SUnits - The SUnits for the current graph.
549    const std::vector<SUnit> *SUnits;
550
551    // SethiUllmanNumbers - The SethiUllman number for each node.
552    std::vector<unsigned> SethiUllmanNumbers;
553
554  public:
555    TDRegReductionPriorityQueue() {}
556
557    void initNodes(DenseMap<SDNode*, SUnit*> &sumap,
558                   std::vector<SUnit> &sunits) {
559      SUnitMap = &sumap;
560      SUnits = &sunits;
561      // Calculate node priorities.
562      CalculateSethiUllmanNumbers();
563    }
564
565    void releaseState() {
566      SUnits = 0;
567      SethiUllmanNumbers.clear();
568    }
569
570    unsigned getNodePriority(const SUnit *SU) const {
571      assert(SU->NodeNum < SethiUllmanNumbers.size());
572      return SethiUllmanNumbers[SU->NodeNum];
573    }
574
575  private:
576    void CalculateSethiUllmanNumbers();
577    unsigned CalcNodeSethiUllmanNumber(const SUnit *SU);
578  };
579}
580
581/// closestSucc - Returns the scheduled cycle of the successor which is
582/// closet to the current cycle.
583static unsigned closestSucc(const SUnit *SU) {
584  unsigned MaxCycle = 0;
585  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
586       I != E; ++I) {
587    unsigned Cycle = I->Dep->Cycle;
588    // If there are bunch of CopyToRegs stacked up, they should be considered
589    // to be at the same position.
590    if (I->Dep->Node->getOpcode() == ISD::CopyToReg)
591      Cycle = closestSucc(I->Dep)+1;
592    if (Cycle > MaxCycle)
593      MaxCycle = Cycle;
594  }
595  return MaxCycle;
596}
597
598/// calcMaxScratches - Returns an cost estimate of the worse case requirement
599/// for scratch registers. Live-in operands and live-out results don't count
600/// since they are "fixed".
601static unsigned calcMaxScratches(const SUnit *SU) {
602  unsigned Scratches = 0;
603  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
604       I != E; ++I) {
605    if (I->isCtrl) continue;  // ignore chain preds
606    if (I->Dep->Node->getOpcode() != ISD::CopyFromReg)
607      Scratches++;
608  }
609  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
610       I != E; ++I) {
611    if (I->isCtrl) continue;  // ignore chain succs
612    if (I->Dep->Node->getOpcode() != ISD::CopyToReg)
613      Scratches += 10;
614  }
615  return Scratches;
616}
617
618// Bottom up
619bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
620  // There used to be a special tie breaker here that looked for
621  // two-address instructions and preferred the instruction with a
622  // def&use operand.  The special case triggered diagnostics when
623  // _GLIBCXX_DEBUG was enabled because it broke the strict weak
624  // ordering that priority_queue requires. It didn't help much anyway
625  // because AddPseudoTwoAddrDeps already covers many of the cases
626  // where it would have applied.  In addition, it's counter-intuitive
627  // that a tie breaker would be the first thing attempted.  There's a
628  // "real" tie breaker below that is the operation of last resort.
629  // The fact that the "special tie breaker" would trigger when there
630  // wasn't otherwise a tie is what broke the strict weak ordering
631  // constraint.
632
633  unsigned LPriority = SPQ->getNodePriority(left);
634  unsigned RPriority = SPQ->getNodePriority(right);
635  if (LPriority > RPriority)
636    return true;
637  else if (LPriority == RPriority) {
638    // Try schedule def + use closer when Sethi-Ullman numbers are the same.
639    // e.g.
640    // t1 = op t2, c1
641    // t3 = op t4, c2
642    //
643    // and the following instructions are both ready.
644    // t2 = op c3
645    // t4 = op c4
646    //
647    // Then schedule t2 = op first.
648    // i.e.
649    // t4 = op c4
650    // t2 = op c3
651    // t1 = op t2, c1
652    // t3 = op t4, c2
653    //
654    // This creates more short live intervals.
655    unsigned LDist = closestSucc(left);
656    unsigned RDist = closestSucc(right);
657    if (LDist < RDist)
658      return true;
659    else if (LDist == RDist) {
660      // Intuitively, it's good to push down instructions whose results are
661      // liveout so their long live ranges won't conflict with other values
662      // which are needed inside the BB. Further prioritize liveout instructions
663      // by the number of operands which are calculated within the BB.
664      unsigned LScratch = calcMaxScratches(left);
665      unsigned RScratch = calcMaxScratches(right);
666      if (LScratch > RScratch)
667        return true;
668      else if (LScratch == RScratch)
669        if (left->Height > right->Height)
670          return true;
671        else if (left->Height == right->Height)
672          if (left->Depth < right->Depth)
673            return true;
674          else if (left->Depth == right->Depth)
675            if (left->CycleBound > right->CycleBound)
676              return true;
677    }
678  }
679  return false;
680}
681
682// FIXME: This is probably too slow!
683static void isReachable(SUnit *SU, SUnit *TargetSU,
684                        SmallPtrSet<SUnit*, 32> &Visited, bool &Reached) {
685  if (Reached) return;
686  if (SU == TargetSU) {
687    Reached = true;
688    return;
689  }
690  if (!Visited.insert(SU)) return;
691
692  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E;
693       ++I)
694    isReachable(I->Dep, TargetSU, Visited, Reached);
695}
696
697static bool isReachable(SUnit *SU, SUnit *TargetSU) {
698  SmallPtrSet<SUnit*, 32> Visited;
699  bool Reached = false;
700  isReachable(SU, TargetSU, Visited, Reached);
701  return Reached;
702}
703
704template<class SF>
705bool BURegReductionPriorityQueue<SF>::canClobber(SUnit *SU, SUnit *Op) {
706  if (SU->isTwoAddress) {
707    unsigned Opc = SU->Node->getTargetOpcode();
708    unsigned NumRes = TII->getNumDefs(Opc);
709    unsigned NumOps = ScheduleDAG::CountOperands(SU->Node);
710    for (unsigned i = 0; i != NumOps; ++i) {
711      if (TII->getOperandConstraint(Opc, i+NumRes, TOI::TIED_TO) != -1) {
712        SDNode *DU = SU->Node->getOperand(i).Val;
713        if (Op == (*SUnitMap)[DU])
714          return true;
715      }
716    }
717  }
718  return false;
719}
720
721
722/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
723/// it as a def&use operand. Add a pseudo control edge from it to the other
724/// node (if it won't create a cycle) so the two-address one will be scheduled
725/// first (lower in the schedule).
726template<class SF>
727void BURegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
728  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
729    SUnit *SU = (SUnit *)&((*SUnits)[i]);
730    if (!SU->isTwoAddress)
731      continue;
732
733    SDNode *Node = SU->Node;
734    if (!Node->isTargetOpcode())
735      continue;
736
737    unsigned Opc = Node->getTargetOpcode();
738    unsigned NumRes = TII->getNumDefs(Opc);
739    unsigned NumOps = ScheduleDAG::CountOperands(Node);
740    for (unsigned j = 0; j != NumOps; ++j) {
741      if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) != -1) {
742        SDNode *DU = SU->Node->getOperand(j).Val;
743        SUnit *DUSU = (*SUnitMap)[DU];
744        if (!DUSU) continue;
745        for (SUnit::succ_iterator I = DUSU->Succs.begin(),E = DUSU->Succs.end();
746             I != E; ++I) {
747          if (I->isCtrl) continue;
748          SUnit *SuccSU = I->Dep;
749          if (SuccSU != SU &&
750              (!canClobber(SuccSU, DUSU) ||
751               (!SU->isCommutable && SuccSU->isCommutable))){
752            if (SuccSU->Depth == SU->Depth && !isReachable(SuccSU, SU)) {
753              DOUT << "Adding an edge from SU # " << SU->NodeNum
754                   << " to SU #" << SuccSU->NodeNum << "\n";
755              if (SU->addPred(SuccSU, true))
756                SU->NumChainPredsLeft++;
757              if (SuccSU->addSucc(SU, true))
758                SuccSU->NumChainSuccsLeft++;
759            }
760          }
761        }
762      }
763    }
764  }
765}
766
767/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number.
768/// Smaller number is the higher priority.
769template<class SF>
770unsigned BURegReductionPriorityQueue<SF>::
771CalcNodeSethiUllmanNumber(const SUnit *SU) {
772  unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum];
773  if (SethiUllmanNumber != 0)
774    return SethiUllmanNumber;
775
776  unsigned Extra = 0;
777  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
778       I != E; ++I) {
779    if (I->isCtrl) continue;  // ignore chain preds
780    SUnit *PredSU = I->Dep;
781    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU);
782    if (PredSethiUllman > SethiUllmanNumber) {
783      SethiUllmanNumber = PredSethiUllman;
784      Extra = 0;
785    } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl)
786      Extra++;
787  }
788
789  SethiUllmanNumber += Extra;
790
791  if (SethiUllmanNumber == 0)
792    SethiUllmanNumber = 1;
793
794  return SethiUllmanNumber;
795}
796
797/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
798/// scheduling units.
799template<class SF>
800void BURegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
801  SethiUllmanNumbers.assign(SUnits->size(), 0);
802
803  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
804    CalcNodeSethiUllmanNumber(&(*SUnits)[i]);
805}
806
807static unsigned SumOfUnscheduledPredsOfSuccs(const SUnit *SU) {
808  unsigned Sum = 0;
809  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
810       I != E; ++I) {
811    SUnit *SuccSU = I->Dep;
812    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
813         EE = SuccSU->Preds.end(); II != EE; ++II) {
814      SUnit *PredSU = II->Dep;
815      if (!PredSU->isScheduled)
816        Sum++;
817    }
818  }
819
820  return Sum;
821}
822
823
824// Top down
825bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
826  unsigned LPriority = SPQ->getNodePriority(left);
827  unsigned RPriority = SPQ->getNodePriority(right);
828  bool LIsTarget = left->Node->isTargetOpcode();
829  bool RIsTarget = right->Node->isTargetOpcode();
830  bool LIsFloater = LIsTarget && left->NumPreds == 0;
831  bool RIsFloater = RIsTarget && right->NumPreds == 0;
832  unsigned LBonus = (SumOfUnscheduledPredsOfSuccs(left) == 1) ? 2 : 0;
833  unsigned RBonus = (SumOfUnscheduledPredsOfSuccs(right) == 1) ? 2 : 0;
834
835  if (left->NumSuccs == 0 && right->NumSuccs != 0)
836    return false;
837  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
838    return true;
839
840  // Special tie breaker: if two nodes share a operand, the one that use it
841  // as a def&use operand is preferred.
842  if (LIsTarget && RIsTarget) {
843    if (left->isTwoAddress && !right->isTwoAddress) {
844      SDNode *DUNode = left->Node->getOperand(0).Val;
845      if (DUNode->isOperand(right->Node))
846        RBonus += 2;
847    }
848    if (!left->isTwoAddress && right->isTwoAddress) {
849      SDNode *DUNode = right->Node->getOperand(0).Val;
850      if (DUNode->isOperand(left->Node))
851        LBonus += 2;
852    }
853  }
854  if (LIsFloater)
855    LBonus -= 2;
856  if (RIsFloater)
857    RBonus -= 2;
858  if (left->NumSuccs == 1)
859    LBonus += 2;
860  if (right->NumSuccs == 1)
861    RBonus += 2;
862
863  if (LPriority+LBonus < RPriority+RBonus)
864    return true;
865  else if (LPriority == RPriority)
866    if (left->Depth < right->Depth)
867      return true;
868    else if (left->Depth == right->Depth)
869      if (left->NumSuccsLeft > right->NumSuccsLeft)
870        return true;
871      else if (left->NumSuccsLeft == right->NumSuccsLeft)
872        if (left->CycleBound > right->CycleBound)
873          return true;
874  return false;
875}
876
877/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number.
878/// Smaller number is the higher priority.
879template<class SF>
880unsigned TDRegReductionPriorityQueue<SF>::
881CalcNodeSethiUllmanNumber(const SUnit *SU) {
882  unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum];
883  if (SethiUllmanNumber != 0)
884    return SethiUllmanNumber;
885
886  unsigned Opc = SU->Node->getOpcode();
887  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
888    SethiUllmanNumber = 0xffff;
889  else if (SU->NumSuccsLeft == 0)
890    // If SU does not have a use, i.e. it doesn't produce a value that would
891    // be consumed (e.g. store), then it terminates a chain of computation.
892    // Give it a small SethiUllman number so it will be scheduled right before
893    // its predecessors that it doesn't lengthen their live ranges.
894    SethiUllmanNumber = 0;
895  else if (SU->NumPredsLeft == 0 &&
896           (Opc != ISD::CopyFromReg || isCopyFromLiveIn(SU)))
897    SethiUllmanNumber = 0xffff;
898  else {
899    int Extra = 0;
900    for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
901         I != E; ++I) {
902      if (I->isCtrl) continue;  // ignore chain preds
903      SUnit *PredSU = I->Dep;
904      unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU);
905      if (PredSethiUllman > SethiUllmanNumber) {
906        SethiUllmanNumber = PredSethiUllman;
907        Extra = 0;
908      } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl)
909        Extra++;
910    }
911
912    SethiUllmanNumber += Extra;
913  }
914
915  return SethiUllmanNumber;
916}
917
918/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
919/// scheduling units.
920template<class SF>
921void TDRegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
922  SethiUllmanNumbers.assign(SUnits->size(), 0);
923
924  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
925    CalcNodeSethiUllmanNumber(&(*SUnits)[i]);
926}
927
928//===----------------------------------------------------------------------===//
929//                         Public Constructor Functions
930//===----------------------------------------------------------------------===//
931
932llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
933                                                    SelectionDAG *DAG,
934                                                    MachineBasicBlock *BB) {
935  const TargetInstrInfo *TII = DAG->getTarget().getInstrInfo();
936  return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), true,
937                           new BURegReductionPriorityQueue<bu_ls_rr_sort>(TII));
938}
939
940llvm::ScheduleDAG* llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
941                                                    SelectionDAG *DAG,
942                                                    MachineBasicBlock *BB) {
943  return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), false,
944                              new TDRegReductionPriorityQueue<td_ls_rr_sort>());
945}
946
947