ScheduleDAGRRList.cpp revision 01f82cade49881cf4e294ede4fe688c07650b487
1//===----- ScheduleDAGList.cpp - Reg pressure reduction list scheduler ----===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by Evan Cheng and is distributed under the
6// University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "pre-RA-sched"
19#include "llvm/CodeGen/ScheduleDAG.h"
20#include "llvm/CodeGen/SchedulerRegistry.h"
21#include "llvm/CodeGen/SSARegMap.h"
22#include "llvm/Target/MRegisterInfo.h"
23#include "llvm/Target/TargetData.h"
24#include "llvm/Target/TargetMachine.h"
25#include "llvm/Target/TargetInstrInfo.h"
26#include "llvm/Support/Debug.h"
27#include "llvm/Support/Compiler.h"
28#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
30#include <climits>
31#include <queue>
32#include "llvm/Support/CommandLine.h"
33using namespace llvm;
34
35STATISTIC(NumBacktracks, "Number of times scheduler backtraced");
36STATISTIC(NumDups,       "Number of duplicated nodes");
37STATISTIC(NumCCCopies,   "Number of cross class copies");
38
39static RegisterScheduler
40  burrListDAGScheduler("list-burr",
41                       "  Bottom-up register reduction list scheduling",
42                       createBURRListDAGScheduler);
43static RegisterScheduler
44  tdrListrDAGScheduler("list-tdrr",
45                       "  Top-down register reduction list scheduling",
46                       createTDRRListDAGScheduler);
47
48namespace {
49//===----------------------------------------------------------------------===//
50/// ScheduleDAGRRList - The actual register reduction list scheduler
51/// implementation.  This supports both top-down and bottom-up scheduling.
52///
53class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAG {
54private:
55  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
56  /// it is top-down.
57  bool isBottomUp;
58
59  /// AvailableQueue - The priority queue to use for the available SUnits.
60  ///a
61  SchedulingPriorityQueue *AvailableQueue;
62
63  /// LiveRegs / LiveRegDefs - A set of physical registers and their definition
64  /// that are "live". These nodes must be scheduled before any other nodes that
65  /// modifies the registers can be scheduled.
66  SmallSet<unsigned, 4> LiveRegs;
67  std::vector<SUnit*> LiveRegDefs;
68  std::vector<unsigned> LiveRegCycles;
69
70public:
71  ScheduleDAGRRList(SelectionDAG &dag, MachineBasicBlock *bb,
72                  const TargetMachine &tm, bool isbottomup,
73                  SchedulingPriorityQueue *availqueue)
74    : ScheduleDAG(dag, bb, tm), isBottomUp(isbottomup),
75      AvailableQueue(availqueue) {
76    }
77
78  ~ScheduleDAGRRList() {
79    delete AvailableQueue;
80  }
81
82  void Schedule();
83
84private:
85  void ReleasePred(SUnit*, bool, unsigned);
86  void ReleaseSucc(SUnit*, bool isChain, unsigned);
87  void CapturePred(SUnit*, SUnit*, bool);
88  void ScheduleNodeBottomUp(SUnit*, unsigned);
89  void ScheduleNodeTopDown(SUnit*, unsigned);
90  void UnscheduleNodeBottomUp(SUnit*);
91  void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
92  SUnit *CopyAndMoveSuccessors(SUnit*);
93  void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned,
94                                  const TargetRegisterClass*,
95                                  const TargetRegisterClass*,
96                                  SmallVector<SUnit*, 2>&);
97  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
98  void ListScheduleTopDown();
99  void ListScheduleBottomUp();
100  void CommuteNodesToReducePressure();
101};
102}  // end anonymous namespace
103
104
105/// Schedule - Schedule the DAG using list scheduling.
106void ScheduleDAGRRList::Schedule() {
107  DOUT << "********** List Scheduling **********\n";
108
109  LiveRegDefs.resize(MRI->getNumRegs(), NULL);
110  LiveRegCycles.resize(MRI->getNumRegs(), 0);
111
112  // Build scheduling units.
113  BuildSchedUnits();
114
115  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
116          SUnits[su].dumpAll(&DAG));
117  CalculateDepths();
118  CalculateHeights();
119
120  AvailableQueue->initNodes(SUnitMap, SUnits);
121
122  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
123  if (isBottomUp)
124    ListScheduleBottomUp();
125  else
126    ListScheduleTopDown();
127
128  AvailableQueue->releaseState();
129
130  CommuteNodesToReducePressure();
131
132  DOUT << "*** Final schedule ***\n";
133  DEBUG(dumpSchedule());
134  DOUT << "\n";
135
136  // Emit in scheduled order
137  EmitSchedule();
138}
139
140/// CommuteNodesToReducePressure - If a node is two-address and commutable, and
141/// it is not the last use of its first operand, add it to the CommuteSet if
142/// possible. It will be commuted when it is translated to a MI.
143void ScheduleDAGRRList::CommuteNodesToReducePressure() {
144  SmallPtrSet<SUnit*, 4> OperandSeen;
145  for (unsigned i = Sequence.size()-1; i != 0; --i) {  // Ignore first node.
146    SUnit *SU = Sequence[i];
147    if (!SU || !SU->Node) continue;
148    if (SU->isCommutable) {
149      unsigned Opc = SU->Node->getTargetOpcode();
150      unsigned NumRes = TII->getNumDefs(Opc);
151      unsigned NumOps = CountOperands(SU->Node);
152      for (unsigned j = 0; j != NumOps; ++j) {
153        if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) == -1)
154          continue;
155
156        SDNode *OpN = SU->Node->getOperand(j).Val;
157        SUnit *OpSU = SUnitMap[OpN][SU->InstanceNo];
158        if (OpSU && OperandSeen.count(OpSU) == 1) {
159          // Ok, so SU is not the last use of OpSU, but SU is two-address so
160          // it will clobber OpSU. Try to commute SU if no other source operands
161          // are live below.
162          bool DoCommute = true;
163          for (unsigned k = 0; k < NumOps; ++k) {
164            if (k != j) {
165              OpN = SU->Node->getOperand(k).Val;
166              OpSU = SUnitMap[OpN][SU->InstanceNo];
167              if (OpSU && OperandSeen.count(OpSU) == 1) {
168                DoCommute = false;
169                break;
170              }
171            }
172          }
173          if (DoCommute)
174            CommuteSet.insert(SU->Node);
175        }
176
177        // Only look at the first use&def node for now.
178        break;
179      }
180    }
181
182    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
183         I != E; ++I) {
184      if (!I->isCtrl)
185        OperandSeen.insert(I->Dep);
186    }
187  }
188}
189
190//===----------------------------------------------------------------------===//
191//  Bottom-Up Scheduling
192//===----------------------------------------------------------------------===//
193
194/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
195/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
196void ScheduleDAGRRList::ReleasePred(SUnit *PredSU, bool isChain,
197                                    unsigned CurCycle) {
198  // FIXME: the distance between two nodes is not always == the predecessor's
199  // latency. For example, the reader can very well read the register written
200  // by the predecessor later than the issue cycle. It also depends on the
201  // interrupt model (drain vs. freeze).
202  PredSU->CycleBound = std::max(PredSU->CycleBound, CurCycle + PredSU->Latency);
203
204  if (!isChain)
205    --PredSU->NumSuccsLeft;
206  else
207    --PredSU->NumChainSuccsLeft;
208
209#ifndef NDEBUG
210  if (PredSU->NumSuccsLeft < 0 || PredSU->NumChainSuccsLeft < 0) {
211    cerr << "*** List scheduling failed! ***\n";
212    PredSU->dump(&DAG);
213    cerr << " has been released too many times!\n";
214    assert(0);
215  }
216#endif
217
218  if ((PredSU->NumSuccsLeft + PredSU->NumChainSuccsLeft) == 0) {
219    // EntryToken has to go last!  Special case it here.
220    if (!PredSU->Node || PredSU->Node->getOpcode() != ISD::EntryToken) {
221      PredSU->isAvailable = true;
222      AvailableQueue->push(PredSU);
223    }
224  }
225}
226
227/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
228/// count of its predecessors. If a predecessor pending count is zero, add it to
229/// the Available queue.
230void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
231  DOUT << "*** Scheduling [" << CurCycle << "]: ";
232  DEBUG(SU->dump(&DAG));
233  SU->Cycle = CurCycle;
234
235  AvailableQueue->ScheduledNode(SU);
236
237  // Bottom up: release predecessors
238  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
239       I != E; ++I) {
240    ReleasePred(I->Dep, I->isCtrl, CurCycle);
241    if (I->Cost < 0)  {
242      // This is a physical register dependency and it's impossible or
243      // expensive to copy the register. Make sure nothing that can
244      // clobber the register is scheduled between the predecessor and
245      // this node.
246      if (LiveRegs.insert(I->Reg)) {
247        LiveRegDefs[I->Reg] = I->Dep;
248        LiveRegCycles[I->Reg] = CurCycle;
249      }
250    }
251  }
252
253  // Release all the implicit physical register defs that are live.
254  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
255       I != E; ++I) {
256    if (I->Cost < 0)  {
257      if (LiveRegCycles[I->Reg] == I->Dep->Cycle) {
258        LiveRegs.erase(I->Reg);
259        assert(LiveRegDefs[I->Reg] == SU &&
260               "Physical register dependency violated?");
261        LiveRegDefs[I->Reg] = NULL;
262        LiveRegCycles[I->Reg] = 0;
263      }
264    }
265  }
266
267  SU->isScheduled = true;
268}
269
270/// CapturePred - This does the opposite of ReleasePred. Since SU is being
271/// unscheduled, incrcease the succ left count of its predecessors. Remove
272/// them from AvailableQueue if necessary.
273void ScheduleDAGRRList::CapturePred(SUnit *PredSU, SUnit *SU, bool isChain) {
274  PredSU->CycleBound = 0;
275  for (SUnit::succ_iterator I = PredSU->Succs.begin(), E = PredSU->Succs.end();
276       I != E; ++I) {
277    if (I->Dep == SU)
278      continue;
279    PredSU->CycleBound = std::max(PredSU->CycleBound,
280                                  I->Dep->Cycle + PredSU->Latency);
281  }
282
283  if (PredSU->isAvailable) {
284    PredSU->isAvailable = false;
285    if (!PredSU->isPending)
286      AvailableQueue->remove(PredSU);
287  }
288
289  if (!isChain)
290    ++PredSU->NumSuccsLeft;
291  else
292    ++PredSU->NumChainSuccsLeft;
293}
294
295/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
296/// its predecessor states to reflect the change.
297void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
298  DOUT << "*** Unscheduling [" << SU->Cycle << "]: ";
299  DEBUG(SU->dump(&DAG));
300
301  AvailableQueue->UnscheduledNode(SU);
302
303  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
304       I != E; ++I) {
305    CapturePred(I->Dep, SU, I->isCtrl);
306    if (I->Cost < 0 && SU->Cycle == LiveRegCycles[I->Reg])  {
307      LiveRegs.erase(I->Reg);
308      assert(LiveRegDefs[I->Reg] == I->Dep &&
309             "Physical register dependency violated?");
310      LiveRegDefs[I->Reg] = NULL;
311      LiveRegCycles[I->Reg] = 0;
312    }
313  }
314
315  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
316       I != E; ++I) {
317    if (I->Cost < 0)  {
318      if (LiveRegs.insert(I->Reg)) {
319        assert(!LiveRegDefs[I->Reg] &&
320               "Physical register dependency violated?");
321        LiveRegDefs[I->Reg] = SU;
322      }
323      if (I->Dep->Cycle < LiveRegCycles[I->Reg])
324        LiveRegCycles[I->Reg] = I->Dep->Cycle;
325    }
326  }
327
328  SU->Cycle = 0;
329  SU->isScheduled = false;
330  SU->isAvailable = true;
331  AvailableQueue->push(SU);
332}
333
334// FIXME: This is probably too slow!
335static void isReachable(SUnit *SU, SUnit *TargetSU,
336                        SmallPtrSet<SUnit*, 32> &Visited, bool &Reached) {
337  if (Reached) return;
338  if (SU == TargetSU) {
339    Reached = true;
340    return;
341  }
342  if (!Visited.insert(SU)) return;
343
344  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E;
345       ++I)
346    isReachable(I->Dep, TargetSU, Visited, Reached);
347}
348
349static bool isReachable(SUnit *SU, SUnit *TargetSU) {
350  SmallPtrSet<SUnit*, 32> Visited;
351  bool Reached = false;
352  isReachable(SU, TargetSU, Visited, Reached);
353  return Reached;
354}
355
356/// willCreateCycle - Returns true if adding an edge from SU to TargetSU will
357/// create a cycle.
358static bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
359  if (isReachable(TargetSU, SU))
360    return true;
361  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
362       I != E; ++I)
363    if (I->Cost < 0 && isReachable(TargetSU, I->Dep))
364      return true;
365  return false;
366}
367
368/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
369/// BTCycle in order to schedule a specific node. Returns the last unscheduled
370/// SUnit. Also returns if a successor is unscheduled in the process.
371void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
372                                          unsigned &CurCycle) {
373  SUnit *OldSU = NULL;
374  while (CurCycle > BtCycle) {
375    OldSU = Sequence.back();
376    Sequence.pop_back();
377    if (SU->isSucc(OldSU))
378      // Don't try to remove SU from AvailableQueue.
379      SU->isAvailable = false;
380    UnscheduleNodeBottomUp(OldSU);
381    --CurCycle;
382  }
383
384
385  if (SU->isSucc(OldSU)) {
386    assert(false && "Something is wrong!");
387    abort();
388  }
389
390  ++NumBacktracks;
391}
392
393/// isSafeToCopy - True if the SUnit for the given SDNode can safely cloned,
394/// i.e. the node does not produce a flag, it does not read a flag and it does
395/// not have an incoming chain.
396static bool isSafeToCopy(SDNode *N) {
397  if (!N)
398    return true;
399
400  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
401    if (N->getValueType(i) == MVT::Flag)
402      return false;
403  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
404    const SDOperand &Op = N->getOperand(i);
405    MVT::ValueType VT = Op.Val->getValueType(Op.ResNo);
406    if (VT == MVT::Other || VT == MVT::Flag)
407      return false;
408  }
409
410  return true;
411}
412
413/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
414/// successors to the newly created node.
415SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
416  DOUT << "Duplicating SU # " << SU->NodeNum << "\n";
417
418  SUnit *NewSU = Clone(SU);
419
420  // New SUnit has the exact same predecessors.
421  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
422       I != E; ++I)
423    if (!I->isSpecial) {
424      NewSU->addPred(I->Dep, I->isCtrl, false, I->Reg, I->Cost);
425      NewSU->Depth = std::max(NewSU->Depth, I->Dep->Depth+1);
426    }
427
428  // Only copy scheduled successors. Cut them from old node's successor
429  // list and move them over.
430  SmallVector<SDep*, 2> DelDeps;
431  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
432       I != E; ++I) {
433    if (I->isSpecial)
434      continue;
435    NewSU->Height = std::max(NewSU->Height, I->Dep->Height+1);
436    if (I->Dep->isScheduled) {
437      I->Dep->addPred(NewSU, I->isCtrl, false, I->Reg, I->Cost);
438      DelDeps.push_back(I);
439    }
440  }
441  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
442    SUnit *Succ = DelDeps[i]->Dep;
443    bool isCtrl = DelDeps[i]->isCtrl;
444    Succ->removePred(SU, isCtrl, false);
445  }
446
447  AvailableQueue->updateNode(SU);
448  AvailableQueue->addNode(NewSU);
449
450  ++NumDups;
451  return NewSU;
452}
453
454/// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies
455/// and move all scheduled successors of the given SUnit to the last copy.
456void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
457                                              const TargetRegisterClass *DestRC,
458                                              const TargetRegisterClass *SrcRC,
459                                               SmallVector<SUnit*, 2> &Copies) {
460  SUnit *CopyFromSU = NewSUnit(NULL);
461  CopyFromSU->CopySrcRC = SrcRC;
462  CopyFromSU->CopyDstRC = DestRC;
463  CopyFromSU->Depth = SU->Depth;
464  CopyFromSU->Height = SU->Height;
465
466  SUnit *CopyToSU = NewSUnit(NULL);
467  CopyToSU->CopySrcRC = DestRC;
468  CopyToSU->CopyDstRC = SrcRC;
469
470  // Only copy scheduled successors. Cut them from old node's successor
471  // list and move them over.
472  SmallVector<SDep*, 2> DelDeps;
473  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
474       I != E; ++I) {
475    if (I->isSpecial)
476      continue;
477    CopyToSU->Height = std::max(CopyToSU->Height, I->Dep->Height+1);
478    if (I->Dep->isScheduled) {
479      I->Dep->addPred(CopyToSU, I->isCtrl, false, I->Reg, I->Cost);
480      DelDeps.push_back(I);
481    }
482  }
483  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
484    SUnit *Succ = DelDeps[i]->Dep;
485    bool isCtrl = DelDeps[i]->isCtrl;
486    Succ->removePred(SU, isCtrl, false);
487  }
488
489  CopyFromSU->addPred(SU, false, false, Reg, -1);
490  CopyToSU->addPred(CopyFromSU, false, false, Reg, 1);
491
492  AvailableQueue->updateNode(SU);
493  AvailableQueue->addNode(CopyFromSU);
494  AvailableQueue->addNode(CopyToSU);
495  Copies.push_back(CopyFromSU);
496  Copies.push_back(CopyToSU);
497
498  ++NumCCCopies;
499}
500
501/// getPhysicalRegisterVT - Returns the ValueType of the physical register
502/// definition of the specified node.
503/// FIXME: Move to SelectionDAG?
504static MVT::ValueType getPhysicalRegisterVT(SDNode *N, unsigned Reg,
505                                            const TargetInstrInfo *TII) {
506  const TargetInstrDescriptor &TID = TII->get(N->getTargetOpcode());
507  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
508  unsigned NumRes = TID.numDefs;
509  for (const unsigned *ImpDef = TID.ImplicitDefs; *ImpDef; ++ImpDef) {
510    if (Reg == *ImpDef)
511      break;
512    ++NumRes;
513  }
514  return N->getValueType(NumRes);
515}
516
517/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
518/// scheduling of the given node to satisfy live physical register dependencies.
519/// If the specific node is the last one that's available to schedule, do
520/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
521bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
522                                                 SmallVector<unsigned, 4> &LRegs){
523  if (LiveRegs.empty())
524    return false;
525
526  // If this node would clobber any "live" register, then it's not ready.
527  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
528       I != E; ++I) {
529    if (I->Cost < 0)  {
530      unsigned Reg = I->Reg;
531      if (LiveRegs.count(Reg) && LiveRegDefs[Reg] != I->Dep)
532        LRegs.push_back(Reg);
533      for (const unsigned *Alias = MRI->getAliasSet(Reg);
534           *Alias; ++Alias)
535        if (LiveRegs.count(*Alias) && LiveRegDefs[*Alias] != I->Dep)
536          LRegs.push_back(*Alias);
537    }
538  }
539
540  for (unsigned i = 0, e = SU->FlaggedNodes.size()+1; i != e; ++i) {
541    SDNode *Node = (i == 0) ? SU->Node : SU->FlaggedNodes[i-1];
542    if (!Node || !Node->isTargetOpcode())
543      continue;
544    const TargetInstrDescriptor &TID = TII->get(Node->getTargetOpcode());
545    if (!TID.ImplicitDefs)
546      continue;
547    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) {
548      if (LiveRegs.count(*Reg) && LiveRegDefs[*Reg] != SU)
549        LRegs.push_back(*Reg);
550      for (const unsigned *Alias = MRI->getAliasSet(*Reg);
551           *Alias; ++Alias)
552        if (LiveRegs.count(*Alias) && LiveRegDefs[*Alias] != SU)
553          LRegs.push_back(*Alias);
554    }
555  }
556  return !LRegs.empty();
557}
558
559
560/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
561/// schedulers.
562void ScheduleDAGRRList::ListScheduleBottomUp() {
563  unsigned CurCycle = 0;
564  // Add root to Available queue.
565  SUnit *RootSU = SUnitMap[DAG.getRoot().Val].front();
566  RootSU->isAvailable = true;
567  AvailableQueue->push(RootSU);
568
569  // While Available queue is not empty, grab the node with the highest
570  // priority. If it is not ready put it back.  Schedule the node.
571  SmallVector<SUnit*, 4> NotReady;
572  while (!AvailableQueue->empty()) {
573    bool Delayed = false;
574    DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
575    SUnit *CurSU = AvailableQueue->pop();
576    while (CurSU) {
577      if (CurSU->CycleBound <= CurCycle) {
578        SmallVector<unsigned, 4> LRegs;
579        if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
580          break;
581        Delayed = true;
582        LRegsMap.insert(std::make_pair(CurSU, LRegs));
583      }
584
585      CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
586      NotReady.push_back(CurSU);
587      CurSU = AvailableQueue->pop();
588    }
589
590    // All candidates are delayed due to live physical reg dependencies.
591    // Try backtracking, code duplication, or inserting cross class copies
592    // to resolve it.
593    if (Delayed && !CurSU) {
594      for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
595        SUnit *TrySU = NotReady[i];
596        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
597
598        // Try unscheduling up to the point where it's safe to schedule
599        // this node.
600        unsigned LiveCycle = CurCycle;
601        for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
602          unsigned Reg = LRegs[j];
603          unsigned LCycle = LiveRegCycles[Reg];
604          LiveCycle = std::min(LiveCycle, LCycle);
605        }
606        SUnit *OldSU = Sequence[LiveCycle];
607        if (!WillCreateCycle(TrySU, OldSU))  {
608          BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
609          // Force the current node to be scheduled before the node that
610          // requires the physical reg dep.
611          if (OldSU->isAvailable) {
612            OldSU->isAvailable = false;
613            AvailableQueue->remove(OldSU);
614          }
615          TrySU->addPred(OldSU, true, true);
616          // If one or more successors has been unscheduled, then the current
617          // node is no longer avaialable. Schedule a successor that's now
618          // available instead.
619          if (!TrySU->isAvailable)
620            CurSU = AvailableQueue->pop();
621          else {
622            CurSU = TrySU;
623            TrySU->isPending = false;
624            NotReady.erase(NotReady.begin()+i);
625          }
626          break;
627        }
628      }
629
630      if (!CurSU) {
631        // Can't backtrace. Try duplicating the nodes that produces these
632        // "expensive to copy" values to break the dependency. In case even
633        // that doesn't work, insert cross class copies.
634        SUnit *TrySU = NotReady[0];
635        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
636        assert(LRegs.size() == 1 && "Can't handle this yet!");
637        unsigned Reg = LRegs[0];
638        SUnit *LRDef = LiveRegDefs[Reg];
639        SUnit *NewDef;
640        if (isSafeToCopy(LRDef->Node))
641          NewDef = CopyAndMoveSuccessors(LRDef);
642        else {
643          // Issue expensive cross register class copies.
644          MVT::ValueType VT = getPhysicalRegisterVT(LRDef->Node, Reg, TII);
645          const TargetRegisterClass *RC =
646            MRI->getPhysicalRegisterRegClass(VT, Reg);
647          const TargetRegisterClass *DestRC = MRI->getCrossCopyRegClass(RC);
648          if (!DestRC) {
649            assert(false && "Don't know how to copy this physical register!");
650            abort();
651          }
652          SmallVector<SUnit*, 2> Copies;
653          InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
654          DOUT << "Adding an edge from SU # " << TrySU->NodeNum
655               << " to SU #" << Copies.front()->NodeNum << "\n";
656          TrySU->addPred(Copies.front(), true, true);
657          NewDef = Copies.back();
658        }
659
660        DOUT << "Adding an edge from SU # " << NewDef->NodeNum
661             << " to SU #" << TrySU->NodeNum << "\n";
662        LiveRegDefs[Reg] = NewDef;
663        NewDef->addPred(TrySU, true, true);
664        TrySU->isAvailable = false;
665        CurSU = NewDef;
666      }
667
668      if (!CurSU) {
669        assert(false && "Unable to resolve live physical register dependencies!");
670        abort();
671      }
672    }
673
674    // Add the nodes that aren't ready back onto the available list.
675    for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
676      NotReady[i]->isPending = false;
677      // May no longer be available due to backtracking.
678      if (NotReady[i]->isAvailable)
679        AvailableQueue->push(NotReady[i]);
680    }
681    NotReady.clear();
682
683    if (!CurSU)
684      Sequence.push_back(0);
685    else {
686      ScheduleNodeBottomUp(CurSU, CurCycle);
687      Sequence.push_back(CurSU);
688    }
689    ++CurCycle;
690  }
691
692  // Add entry node last
693  if (DAG.getEntryNode().Val != DAG.getRoot().Val) {
694    SUnit *Entry = SUnitMap[DAG.getEntryNode().Val].front();
695    Sequence.push_back(Entry);
696  }
697
698  // Reverse the order if it is bottom up.
699  std::reverse(Sequence.begin(), Sequence.end());
700
701
702#ifndef NDEBUG
703  // Verify that all SUnits were scheduled.
704  bool AnyNotSched = false;
705  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
706    if (SUnits[i].NumSuccsLeft != 0 || SUnits[i].NumChainSuccsLeft != 0) {
707      if (!AnyNotSched)
708        cerr << "*** List scheduling failed! ***\n";
709      SUnits[i].dump(&DAG);
710      cerr << "has not been scheduled!\n";
711      AnyNotSched = true;
712    }
713  }
714  assert(!AnyNotSched);
715#endif
716}
717
718//===----------------------------------------------------------------------===//
719//  Top-Down Scheduling
720//===----------------------------------------------------------------------===//
721
722/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
723/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
724void ScheduleDAGRRList::ReleaseSucc(SUnit *SuccSU, bool isChain,
725                                    unsigned CurCycle) {
726  // FIXME: the distance between two nodes is not always == the predecessor's
727  // latency. For example, the reader can very well read the register written
728  // by the predecessor later than the issue cycle. It also depends on the
729  // interrupt model (drain vs. freeze).
730  SuccSU->CycleBound = std::max(SuccSU->CycleBound, CurCycle + SuccSU->Latency);
731
732  if (!isChain)
733    --SuccSU->NumPredsLeft;
734  else
735    --SuccSU->NumChainPredsLeft;
736
737#ifndef NDEBUG
738  if (SuccSU->NumPredsLeft < 0 || SuccSU->NumChainPredsLeft < 0) {
739    cerr << "*** List scheduling failed! ***\n";
740    SuccSU->dump(&DAG);
741    cerr << " has been released too many times!\n";
742    assert(0);
743  }
744#endif
745
746  if ((SuccSU->NumPredsLeft + SuccSU->NumChainPredsLeft) == 0) {
747    SuccSU->isAvailable = true;
748    AvailableQueue->push(SuccSU);
749  }
750}
751
752
753/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
754/// count of its successors. If a successor pending count is zero, add it to
755/// the Available queue.
756void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
757  DOUT << "*** Scheduling [" << CurCycle << "]: ";
758  DEBUG(SU->dump(&DAG));
759  SU->Cycle = CurCycle;
760
761  AvailableQueue->ScheduledNode(SU);
762
763  // Top down: release successors
764  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
765       I != E; ++I)
766    ReleaseSucc(I->Dep, I->isCtrl, CurCycle);
767  SU->isScheduled = true;
768}
769
770/// ListScheduleTopDown - The main loop of list scheduling for top-down
771/// schedulers.
772void ScheduleDAGRRList::ListScheduleTopDown() {
773  unsigned CurCycle = 0;
774  SUnit *Entry = SUnitMap[DAG.getEntryNode().Val].front();
775
776  // All leaves to Available queue.
777  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
778    // It is available if it has no predecessors.
779    if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) {
780      AvailableQueue->push(&SUnits[i]);
781      SUnits[i].isAvailable = true;
782    }
783  }
784
785  // Emit the entry node first.
786  ScheduleNodeTopDown(Entry, CurCycle);
787  Sequence.push_back(Entry);
788  ++CurCycle;
789
790  // While Available queue is not empty, grab the node with the highest
791  // priority. If it is not ready put it back.  Schedule the node.
792  std::vector<SUnit*> NotReady;
793  while (!AvailableQueue->empty()) {
794    SUnit *CurSU = AvailableQueue->pop();
795    while (CurSU && CurSU->CycleBound > CurCycle) {
796      NotReady.push_back(CurSU);
797      CurSU = AvailableQueue->pop();
798    }
799
800    // Add the nodes that aren't ready back onto the available list.
801    AvailableQueue->push_all(NotReady);
802    NotReady.clear();
803
804    if (!CurSU)
805      Sequence.push_back(0);
806    else {
807      ScheduleNodeTopDown(CurSU, CurCycle);
808      Sequence.push_back(CurSU);
809    }
810    CurCycle++;
811  }
812
813
814#ifndef NDEBUG
815  // Verify that all SUnits were scheduled.
816  bool AnyNotSched = false;
817  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
818    if (!SUnits[i].isScheduled) {
819      if (!AnyNotSched)
820        cerr << "*** List scheduling failed! ***\n";
821      SUnits[i].dump(&DAG);
822      cerr << "has not been scheduled!\n";
823      AnyNotSched = true;
824    }
825  }
826  assert(!AnyNotSched);
827#endif
828}
829
830
831
832//===----------------------------------------------------------------------===//
833//                RegReductionPriorityQueue Implementation
834//===----------------------------------------------------------------------===//
835//
836// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
837// to reduce register pressure.
838//
839namespace {
840  template<class SF>
841  class RegReductionPriorityQueue;
842
843  /// Sorting functions for the Available queue.
844  struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
845    RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
846    bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
847    bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
848
849    bool operator()(const SUnit* left, const SUnit* right) const;
850  };
851
852  struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
853    RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
854    td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
855    td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
856
857    bool operator()(const SUnit* left, const SUnit* right) const;
858  };
859}  // end anonymous namespace
860
861static inline bool isCopyFromLiveIn(const SUnit *SU) {
862  SDNode *N = SU->Node;
863  return N && N->getOpcode() == ISD::CopyFromReg &&
864    N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag;
865}
866
867namespace {
868  template<class SF>
869  class VISIBILITY_HIDDEN RegReductionPriorityQueue
870   : public SchedulingPriorityQueue {
871    std::priority_queue<SUnit*, std::vector<SUnit*>, SF> Queue;
872
873  public:
874    RegReductionPriorityQueue() :
875    Queue(SF(this)) {}
876
877    virtual void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap,
878                           std::vector<SUnit> &sunits) {}
879
880    virtual void addNode(const SUnit *SU) {}
881
882    virtual void updateNode(const SUnit *SU) {}
883
884    virtual void releaseState() {}
885
886    virtual unsigned getNodePriority(const SUnit *SU) const {
887      return 0;
888    }
889
890    unsigned size() const { return Queue.size(); }
891
892    bool empty() const { return Queue.empty(); }
893
894    void push(SUnit *U) {
895      Queue.push(U);
896    }
897    void push_all(const std::vector<SUnit *> &Nodes) {
898      for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
899        Queue.push(Nodes[i]);
900    }
901
902    SUnit *pop() {
903      if (empty()) return NULL;
904      SUnit *V = Queue.top();
905      Queue.pop();
906      return V;
907    }
908
909    /// remove - This is a really inefficient way to remove a node from a
910    /// priority queue.  We should roll our own heap to make this better or
911    /// something.
912    void remove(SUnit *SU) {
913      std::vector<SUnit*> Temp;
914
915      assert(!Queue.empty() && "Not in queue!");
916      while (Queue.top() != SU) {
917        Temp.push_back(Queue.top());
918        Queue.pop();
919        assert(!Queue.empty() && "Not in queue!");
920      }
921
922      // Remove the node from the PQ.
923      Queue.pop();
924
925      // Add all the other nodes back.
926      for (unsigned i = 0, e = Temp.size(); i != e; ++i)
927        Queue.push(Temp[i]);
928    }
929  };
930
931  template<class SF>
932  class VISIBILITY_HIDDEN BURegReductionPriorityQueue
933   : public RegReductionPriorityQueue<SF> {
934    // SUnitMap SDNode to SUnit mapping (n -> n).
935    DenseMap<SDNode*, std::vector<SUnit*> > *SUnitMap;
936
937    // SUnits - The SUnits for the current graph.
938    const std::vector<SUnit> *SUnits;
939
940    // SethiUllmanNumbers - The SethiUllman number for each node.
941    std::vector<unsigned> SethiUllmanNumbers;
942
943    const TargetInstrInfo *TII;
944  public:
945    explicit BURegReductionPriorityQueue(const TargetInstrInfo *tii)
946      : TII(tii) {}
947
948    void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap,
949                   std::vector<SUnit> &sunits) {
950      SUnitMap = &sumap;
951      SUnits = &sunits;
952      // Add pseudo dependency edges for two-address nodes.
953      AddPseudoTwoAddrDeps();
954      // Calculate node priorities.
955      CalculateSethiUllmanNumbers();
956    }
957
958    void addNode(const SUnit *SU) {
959      SethiUllmanNumbers.resize(SUnits->size(), 0);
960      CalcNodeSethiUllmanNumber(SU);
961    }
962
963    void updateNode(const SUnit *SU) {
964      SethiUllmanNumbers[SU->NodeNum] = 0;
965      CalcNodeSethiUllmanNumber(SU);
966    }
967
968    void releaseState() {
969      SUnits = 0;
970      SethiUllmanNumbers.clear();
971    }
972
973    unsigned getNodePriority(const SUnit *SU) const {
974      assert(SU->NodeNum < SethiUllmanNumbers.size());
975      unsigned Opc = SU->Node ? SU->Node->getOpcode() : 0;
976      if (Opc == ISD::CopyFromReg && !isCopyFromLiveIn(SU))
977        // CopyFromReg should be close to its def because it restricts
978        // allocation choices. But if it is a livein then perhaps we want it
979        // closer to its uses so it can be coalesced.
980        return 0xffff;
981      else if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
982        // CopyToReg should be close to its uses to facilitate coalescing and
983        // avoid spilling.
984        return 0;
985      else if (SU->NumSuccs == 0)
986        // If SU does not have a use, i.e. it doesn't produce a value that would
987        // be consumed (e.g. store), then it terminates a chain of computation.
988        // Give it a large SethiUllman number so it will be scheduled right
989        // before its predecessors that it doesn't lengthen their live ranges.
990        return 0xffff;
991      else if (SU->NumPreds == 0)
992        // If SU does not have a def, schedule it close to its uses because it
993        // does not lengthen any live ranges.
994        return 0;
995      else
996        return SethiUllmanNumbers[SU->NodeNum];
997    }
998
999  private:
1000    bool canClobber(SUnit *SU, SUnit *Op);
1001    void AddPseudoTwoAddrDeps();
1002    void CalculateSethiUllmanNumbers();
1003    unsigned CalcNodeSethiUllmanNumber(const SUnit *SU);
1004  };
1005
1006
1007  template<class SF>
1008  class VISIBILITY_HIDDEN TDRegReductionPriorityQueue
1009   : public RegReductionPriorityQueue<SF> {
1010    // SUnitMap SDNode to SUnit mapping (n -> n).
1011    DenseMap<SDNode*, std::vector<SUnit*> > *SUnitMap;
1012
1013    // SUnits - The SUnits for the current graph.
1014    const std::vector<SUnit> *SUnits;
1015
1016    // SethiUllmanNumbers - The SethiUllman number for each node.
1017    std::vector<unsigned> SethiUllmanNumbers;
1018
1019  public:
1020    TDRegReductionPriorityQueue() {}
1021
1022    void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap,
1023                   std::vector<SUnit> &sunits) {
1024      SUnitMap = &sumap;
1025      SUnits = &sunits;
1026      // Calculate node priorities.
1027      CalculateSethiUllmanNumbers();
1028    }
1029
1030    void addNode(const SUnit *SU) {
1031      SethiUllmanNumbers.resize(SUnits->size(), 0);
1032      CalcNodeSethiUllmanNumber(SU);
1033    }
1034
1035    void updateNode(const SUnit *SU) {
1036      SethiUllmanNumbers[SU->NodeNum] = 0;
1037      CalcNodeSethiUllmanNumber(SU);
1038    }
1039
1040    void releaseState() {
1041      SUnits = 0;
1042      SethiUllmanNumbers.clear();
1043    }
1044
1045    unsigned getNodePriority(const SUnit *SU) const {
1046      assert(SU->NodeNum < SethiUllmanNumbers.size());
1047      return SethiUllmanNumbers[SU->NodeNum];
1048    }
1049
1050  private:
1051    void CalculateSethiUllmanNumbers();
1052    unsigned CalcNodeSethiUllmanNumber(const SUnit *SU);
1053  };
1054}
1055
1056/// closestSucc - Returns the scheduled cycle of the successor which is
1057/// closet to the current cycle.
1058static unsigned closestSucc(const SUnit *SU) {
1059  unsigned MaxCycle = 0;
1060  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1061       I != E; ++I) {
1062    unsigned Cycle = I->Dep->Cycle;
1063    // If there are bunch of CopyToRegs stacked up, they should be considered
1064    // to be at the same position.
1065    if (I->Dep->Node && I->Dep->Node->getOpcode() == ISD::CopyToReg)
1066      Cycle = closestSucc(I->Dep)+1;
1067    if (Cycle > MaxCycle)
1068      MaxCycle = Cycle;
1069  }
1070  return MaxCycle;
1071}
1072
1073/// calcMaxScratches - Returns an cost estimate of the worse case requirement
1074/// for scratch registers. Live-in operands and live-out results don't count
1075/// since they are "fixed".
1076static unsigned calcMaxScratches(const SUnit *SU) {
1077  unsigned Scratches = 0;
1078  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1079       I != E; ++I) {
1080    if (I->isCtrl) continue;  // ignore chain preds
1081    if (!I->Dep->Node || I->Dep->Node->getOpcode() != ISD::CopyFromReg)
1082      Scratches++;
1083  }
1084  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1085       I != E; ++I) {
1086    if (I->isCtrl) continue;  // ignore chain succs
1087    if (!I->Dep->Node || I->Dep->Node->getOpcode() != ISD::CopyToReg)
1088      Scratches += 10;
1089  }
1090  return Scratches;
1091}
1092
1093// Bottom up
1094bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1095  // There used to be a special tie breaker here that looked for
1096  // two-address instructions and preferred the instruction with a
1097  // def&use operand.  The special case triggered diagnostics when
1098  // _GLIBCXX_DEBUG was enabled because it broke the strict weak
1099  // ordering that priority_queue requires. It didn't help much anyway
1100  // because AddPseudoTwoAddrDeps already covers many of the cases
1101  // where it would have applied.  In addition, it's counter-intuitive
1102  // that a tie breaker would be the first thing attempted.  There's a
1103  // "real" tie breaker below that is the operation of last resort.
1104  // The fact that the "special tie breaker" would trigger when there
1105  // wasn't otherwise a tie is what broke the strict weak ordering
1106  // constraint.
1107
1108  unsigned LPriority = SPQ->getNodePriority(left);
1109  unsigned RPriority = SPQ->getNodePriority(right);
1110  if (LPriority > RPriority)
1111    return true;
1112  else if (LPriority == RPriority) {
1113    // Try schedule def + use closer when Sethi-Ullman numbers are the same.
1114    // e.g.
1115    // t1 = op t2, c1
1116    // t3 = op t4, c2
1117    //
1118    // and the following instructions are both ready.
1119    // t2 = op c3
1120    // t4 = op c4
1121    //
1122    // Then schedule t2 = op first.
1123    // i.e.
1124    // t4 = op c4
1125    // t2 = op c3
1126    // t1 = op t2, c1
1127    // t3 = op t4, c2
1128    //
1129    // This creates more short live intervals.
1130    unsigned LDist = closestSucc(left);
1131    unsigned RDist = closestSucc(right);
1132    if (LDist < RDist)
1133      return true;
1134    else if (LDist == RDist) {
1135      // Intuitively, it's good to push down instructions whose results are
1136      // liveout so their long live ranges won't conflict with other values
1137      // which are needed inside the BB. Further prioritize liveout instructions
1138      // by the number of operands which are calculated within the BB.
1139      unsigned LScratch = calcMaxScratches(left);
1140      unsigned RScratch = calcMaxScratches(right);
1141      if (LScratch > RScratch)
1142        return true;
1143      else if (LScratch == RScratch)
1144        if (left->Height > right->Height)
1145          return true;
1146        else if (left->Height == right->Height)
1147          if (left->Depth < right->Depth)
1148            return true;
1149          else if (left->Depth == right->Depth)
1150            if (left->CycleBound > right->CycleBound)
1151              return true;
1152    }
1153  }
1154  return false;
1155}
1156
1157template<class SF>
1158bool BURegReductionPriorityQueue<SF>::canClobber(SUnit *SU, SUnit *Op) {
1159  if (SU->isTwoAddress) {
1160    unsigned Opc = SU->Node->getTargetOpcode();
1161    unsigned NumRes = TII->getNumDefs(Opc);
1162    unsigned NumOps = ScheduleDAG::CountOperands(SU->Node);
1163    for (unsigned i = 0; i != NumOps; ++i) {
1164      if (TII->getOperandConstraint(Opc, i+NumRes, TOI::TIED_TO) != -1) {
1165        SDNode *DU = SU->Node->getOperand(i).Val;
1166        if (Op == (*SUnitMap)[DU][SU->InstanceNo])
1167          return true;
1168      }
1169    }
1170  }
1171  return false;
1172}
1173
1174
1175/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
1176/// it as a def&use operand. Add a pseudo control edge from it to the other
1177/// node (if it won't create a cycle) so the two-address one will be scheduled
1178/// first (lower in the schedule).
1179template<class SF>
1180void BURegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
1181  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1182    SUnit *SU = (SUnit *)&((*SUnits)[i]);
1183    if (!SU->isTwoAddress)
1184      continue;
1185
1186    SDNode *Node = SU->Node;
1187    if (!Node || !Node->isTargetOpcode())
1188      continue;
1189
1190    unsigned Opc = Node->getTargetOpcode();
1191    unsigned NumRes = TII->getNumDefs(Opc);
1192    unsigned NumOps = ScheduleDAG::CountOperands(Node);
1193    for (unsigned j = 0; j != NumOps; ++j) {
1194      if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) != -1) {
1195        SDNode *DU = SU->Node->getOperand(j).Val;
1196        SUnit *DUSU = (*SUnitMap)[DU][SU->InstanceNo];
1197        if (!DUSU) continue;
1198        for (SUnit::succ_iterator I = DUSU->Succs.begin(),E = DUSU->Succs.end();
1199             I != E; ++I) {
1200          if (I->isCtrl) continue;
1201          SUnit *SuccSU = I->Dep;
1202          // Don't constraint nodes with implicit defs. It can create cycles
1203          // plus it may increase register pressures.
1204          if (SuccSU == SU || SuccSU->hasImplicitDefs)
1205            continue;
1206          // Be conservative. Ignore if nodes aren't at the same depth.
1207          if (SuccSU->Depth != SU->Depth)
1208            continue;
1209          if ((!canClobber(SuccSU, DUSU) ||
1210               (!SU->isCommutable && SuccSU->isCommutable)) &&
1211              !isReachable(SuccSU, SU)) {
1212            DOUT << "Adding an edge from SU # " << SU->NodeNum
1213                 << " to SU #" << SuccSU->NodeNum << "\n";
1214            SU->addPred(SuccSU, true, true);
1215          }
1216        }
1217      }
1218    }
1219  }
1220}
1221
1222/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number.
1223/// Smaller number is the higher priority.
1224template<class SF>
1225unsigned BURegReductionPriorityQueue<SF>::
1226CalcNodeSethiUllmanNumber(const SUnit *SU) {
1227  unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum];
1228  if (SethiUllmanNumber != 0)
1229    return SethiUllmanNumber;
1230
1231  unsigned Extra = 0;
1232  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1233       I != E; ++I) {
1234    if (I->isCtrl) continue;  // ignore chain preds
1235    SUnit *PredSU = I->Dep;
1236    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU);
1237    if (PredSethiUllman > SethiUllmanNumber) {
1238      SethiUllmanNumber = PredSethiUllman;
1239      Extra = 0;
1240    } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl)
1241      ++Extra;
1242  }
1243
1244  SethiUllmanNumber += Extra;
1245
1246  if (SethiUllmanNumber == 0)
1247    SethiUllmanNumber = 1;
1248
1249  return SethiUllmanNumber;
1250}
1251
1252/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1253/// scheduling units.
1254template<class SF>
1255void BURegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
1256  SethiUllmanNumbers.assign(SUnits->size(), 0);
1257
1258  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1259    CalcNodeSethiUllmanNumber(&(*SUnits)[i]);
1260}
1261
1262static unsigned SumOfUnscheduledPredsOfSuccs(const SUnit *SU) {
1263  unsigned Sum = 0;
1264  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1265       I != E; ++I) {
1266    SUnit *SuccSU = I->Dep;
1267    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
1268         EE = SuccSU->Preds.end(); II != EE; ++II) {
1269      SUnit *PredSU = II->Dep;
1270      if (!PredSU->isScheduled)
1271        ++Sum;
1272    }
1273  }
1274
1275  return Sum;
1276}
1277
1278
1279// Top down
1280bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1281  unsigned LPriority = SPQ->getNodePriority(left);
1282  unsigned RPriority = SPQ->getNodePriority(right);
1283  bool LIsTarget = left->Node && left->Node->isTargetOpcode();
1284  bool RIsTarget = right->Node && right->Node->isTargetOpcode();
1285  bool LIsFloater = LIsTarget && left->NumPreds == 0;
1286  bool RIsFloater = RIsTarget && right->NumPreds == 0;
1287  unsigned LBonus = (SumOfUnscheduledPredsOfSuccs(left) == 1) ? 2 : 0;
1288  unsigned RBonus = (SumOfUnscheduledPredsOfSuccs(right) == 1) ? 2 : 0;
1289
1290  if (left->NumSuccs == 0 && right->NumSuccs != 0)
1291    return false;
1292  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
1293    return true;
1294
1295  // Special tie breaker: if two nodes share a operand, the one that use it
1296  // as a def&use operand is preferred.
1297  if (LIsTarget && RIsTarget) {
1298    if (left->isTwoAddress && !right->isTwoAddress) {
1299      SDNode *DUNode = left->Node->getOperand(0).Val;
1300      if (DUNode->isOperand(right->Node))
1301        RBonus += 2;
1302    }
1303    if (!left->isTwoAddress && right->isTwoAddress) {
1304      SDNode *DUNode = right->Node->getOperand(0).Val;
1305      if (DUNode->isOperand(left->Node))
1306        LBonus += 2;
1307    }
1308  }
1309  if (LIsFloater)
1310    LBonus -= 2;
1311  if (RIsFloater)
1312    RBonus -= 2;
1313  if (left->NumSuccs == 1)
1314    LBonus += 2;
1315  if (right->NumSuccs == 1)
1316    RBonus += 2;
1317
1318  if (LPriority+LBonus < RPriority+RBonus)
1319    return true;
1320  else if (LPriority == RPriority)
1321    if (left->Depth < right->Depth)
1322      return true;
1323    else if (left->Depth == right->Depth)
1324      if (left->NumSuccsLeft > right->NumSuccsLeft)
1325        return true;
1326      else if (left->NumSuccsLeft == right->NumSuccsLeft)
1327        if (left->CycleBound > right->CycleBound)
1328          return true;
1329  return false;
1330}
1331
1332/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number.
1333/// Smaller number is the higher priority.
1334template<class SF>
1335unsigned TDRegReductionPriorityQueue<SF>::
1336CalcNodeSethiUllmanNumber(const SUnit *SU) {
1337  unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum];
1338  if (SethiUllmanNumber != 0)
1339    return SethiUllmanNumber;
1340
1341  unsigned Opc = SU->Node ? SU->Node->getOpcode() : 0;
1342  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1343    SethiUllmanNumber = 0xffff;
1344  else if (SU->NumSuccsLeft == 0)
1345    // If SU does not have a use, i.e. it doesn't produce a value that would
1346    // be consumed (e.g. store), then it terminates a chain of computation.
1347    // Give it a small SethiUllman number so it will be scheduled right before
1348    // its predecessors that it doesn't lengthen their live ranges.
1349    SethiUllmanNumber = 0;
1350  else if (SU->NumPredsLeft == 0 &&
1351           (Opc != ISD::CopyFromReg || isCopyFromLiveIn(SU)))
1352    SethiUllmanNumber = 0xffff;
1353  else {
1354    int Extra = 0;
1355    for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1356         I != E; ++I) {
1357      if (I->isCtrl) continue;  // ignore chain preds
1358      SUnit *PredSU = I->Dep;
1359      unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU);
1360      if (PredSethiUllman > SethiUllmanNumber) {
1361        SethiUllmanNumber = PredSethiUllman;
1362        Extra = 0;
1363      } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl)
1364        ++Extra;
1365    }
1366
1367    SethiUllmanNumber += Extra;
1368  }
1369
1370  return SethiUllmanNumber;
1371}
1372
1373/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1374/// scheduling units.
1375template<class SF>
1376void TDRegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
1377  SethiUllmanNumbers.assign(SUnits->size(), 0);
1378
1379  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1380    CalcNodeSethiUllmanNumber(&(*SUnits)[i]);
1381}
1382
1383//===----------------------------------------------------------------------===//
1384//                         Public Constructor Functions
1385//===----------------------------------------------------------------------===//
1386
1387llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
1388                                                    SelectionDAG *DAG,
1389                                                    MachineBasicBlock *BB) {
1390  const TargetInstrInfo *TII = DAG->getTarget().getInstrInfo();
1391  return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), true,
1392                           new BURegReductionPriorityQueue<bu_ls_rr_sort>(TII));
1393}
1394
1395llvm::ScheduleDAG* llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
1396                                                    SelectionDAG *DAG,
1397                                                    MachineBasicBlock *BB) {
1398  return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), false,
1399                              new TDRegReductionPriorityQueue<td_ls_rr_sort>());
1400}
1401
1402