ScheduleDAGRRList.cpp revision 2eb4ebd953314d7c7f96cfb5897d8ded94f6c9d4
1//===----- ScheduleDAGList.cpp - Reg pressure reduction list scheduler ----===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by Evan Cheng and is distributed under the
6// University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "pre-RA-sched"
19#include "llvm/CodeGen/ScheduleDAG.h"
20#include "llvm/CodeGen/SchedulerRegistry.h"
21#include "llvm/CodeGen/SSARegMap.h"
22#include "llvm/Target/MRegisterInfo.h"
23#include "llvm/Target/TargetData.h"
24#include "llvm/Target/TargetMachine.h"
25#include "llvm/Target/TargetInstrInfo.h"
26#include "llvm/Support/Debug.h"
27#include "llvm/Support/Compiler.h"
28#include "llvm/ADT/SmallPtrSet.h"
29#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
31#include <climits>
32#include <queue>
33#include "llvm/Support/CommandLine.h"
34using namespace llvm;
35
36STATISTIC(NumBacktracks, "Number of times scheduler backtraced");
37STATISTIC(NumUnfolds,    "Number of nodes unfolded");
38STATISTIC(NumDups,       "Number of duplicated nodes");
39STATISTIC(NumCCCopies,   "Number of cross class copies");
40
41static RegisterScheduler
42  burrListDAGScheduler("list-burr",
43                       "  Bottom-up register reduction list scheduling",
44                       createBURRListDAGScheduler);
45static RegisterScheduler
46  tdrListrDAGScheduler("list-tdrr",
47                       "  Top-down register reduction list scheduling",
48                       createTDRRListDAGScheduler);
49
50namespace {
51//===----------------------------------------------------------------------===//
52/// ScheduleDAGRRList - The actual register reduction list scheduler
53/// implementation.  This supports both top-down and bottom-up scheduling.
54///
55class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAG {
56private:
57  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
58  /// it is top-down.
59  bool isBottomUp;
60
61  /// AvailableQueue - The priority queue to use for the available SUnits.
62  ///a
63  SchedulingPriorityQueue *AvailableQueue;
64
65  /// LiveRegs / LiveRegDefs - A set of physical registers and their definition
66  /// that are "live". These nodes must be scheduled before any other nodes that
67  /// modifies the registers can be scheduled.
68  SmallSet<unsigned, 4> LiveRegs;
69  std::vector<SUnit*> LiveRegDefs;
70  std::vector<unsigned> LiveRegCycles;
71
72public:
73  ScheduleDAGRRList(SelectionDAG &dag, MachineBasicBlock *bb,
74                  const TargetMachine &tm, bool isbottomup,
75                  SchedulingPriorityQueue *availqueue)
76    : ScheduleDAG(dag, bb, tm), isBottomUp(isbottomup),
77      AvailableQueue(availqueue) {
78    }
79
80  ~ScheduleDAGRRList() {
81    delete AvailableQueue;
82  }
83
84  void Schedule();
85
86private:
87  void ReleasePred(SUnit*, bool, unsigned);
88  void ReleaseSucc(SUnit*, bool isChain, unsigned);
89  void CapturePred(SUnit*, SUnit*, bool);
90  void ScheduleNodeBottomUp(SUnit*, unsigned);
91  void ScheduleNodeTopDown(SUnit*, unsigned);
92  void UnscheduleNodeBottomUp(SUnit*);
93  void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
94  SUnit *CopyAndMoveSuccessors(SUnit*);
95  void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned,
96                                  const TargetRegisterClass*,
97                                  const TargetRegisterClass*,
98                                  SmallVector<SUnit*, 2>&);
99  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
100  void ListScheduleTopDown();
101  void ListScheduleBottomUp();
102  void CommuteNodesToReducePressure();
103};
104}  // end anonymous namespace
105
106
107/// Schedule - Schedule the DAG using list scheduling.
108void ScheduleDAGRRList::Schedule() {
109  DOUT << "********** List Scheduling **********\n";
110
111  LiveRegDefs.resize(MRI->getNumRegs(), NULL);
112  LiveRegCycles.resize(MRI->getNumRegs(), 0);
113
114  // Build scheduling units.
115  BuildSchedUnits();
116
117  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
118          SUnits[su].dumpAll(&DAG));
119  CalculateDepths();
120  CalculateHeights();
121
122  AvailableQueue->initNodes(SUnitMap, SUnits);
123
124  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
125  if (isBottomUp)
126    ListScheduleBottomUp();
127  else
128    ListScheduleTopDown();
129
130  AvailableQueue->releaseState();
131
132  CommuteNodesToReducePressure();
133
134  DOUT << "*** Final schedule ***\n";
135  DEBUG(dumpSchedule());
136  DOUT << "\n";
137
138  // Emit in scheduled order
139  EmitSchedule();
140}
141
142/// CommuteNodesToReducePressure - If a node is two-address and commutable, and
143/// it is not the last use of its first operand, add it to the CommuteSet if
144/// possible. It will be commuted when it is translated to a MI.
145void ScheduleDAGRRList::CommuteNodesToReducePressure() {
146  SmallPtrSet<SUnit*, 4> OperandSeen;
147  for (unsigned i = Sequence.size()-1; i != 0; --i) {  // Ignore first node.
148    SUnit *SU = Sequence[i];
149    if (!SU || !SU->Node) continue;
150    if (SU->isCommutable) {
151      unsigned Opc = SU->Node->getTargetOpcode();
152      unsigned NumRes = TII->getNumDefs(Opc);
153      unsigned NumOps = CountOperands(SU->Node);
154      for (unsigned j = 0; j != NumOps; ++j) {
155        if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) == -1)
156          continue;
157
158        SDNode *OpN = SU->Node->getOperand(j).Val;
159        SUnit *OpSU = SUnitMap[OpN][SU->InstanceNo];
160        if (OpSU && OperandSeen.count(OpSU) == 1) {
161          // Ok, so SU is not the last use of OpSU, but SU is two-address so
162          // it will clobber OpSU. Try to commute SU if no other source operands
163          // are live below.
164          bool DoCommute = true;
165          for (unsigned k = 0; k < NumOps; ++k) {
166            if (k != j) {
167              OpN = SU->Node->getOperand(k).Val;
168              OpSU = SUnitMap[OpN][SU->InstanceNo];
169              if (OpSU && OperandSeen.count(OpSU) == 1) {
170                DoCommute = false;
171                break;
172              }
173            }
174          }
175          if (DoCommute)
176            CommuteSet.insert(SU->Node);
177        }
178
179        // Only look at the first use&def node for now.
180        break;
181      }
182    }
183
184    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
185         I != E; ++I) {
186      if (!I->isCtrl)
187        OperandSeen.insert(I->Dep);
188    }
189  }
190}
191
192//===----------------------------------------------------------------------===//
193//  Bottom-Up Scheduling
194//===----------------------------------------------------------------------===//
195
196/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
197/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
198void ScheduleDAGRRList::ReleasePred(SUnit *PredSU, bool isChain,
199                                    unsigned CurCycle) {
200  // FIXME: the distance between two nodes is not always == the predecessor's
201  // latency. For example, the reader can very well read the register written
202  // by the predecessor later than the issue cycle. It also depends on the
203  // interrupt model (drain vs. freeze).
204  PredSU->CycleBound = std::max(PredSU->CycleBound, CurCycle + PredSU->Latency);
205
206  --PredSU->NumSuccsLeft;
207
208#ifndef NDEBUG
209  if (PredSU->NumSuccsLeft < 0) {
210    cerr << "*** List scheduling failed! ***\n";
211    PredSU->dump(&DAG);
212    cerr << " has been released too many times!\n";
213    assert(0);
214  }
215#endif
216
217  if (PredSU->NumSuccsLeft == 0) {
218    // EntryToken has to go last!  Special case it here.
219    if (!PredSU->Node || PredSU->Node->getOpcode() != ISD::EntryToken) {
220      PredSU->isAvailable = true;
221      AvailableQueue->push(PredSU);
222    }
223  }
224}
225
226/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
227/// count of its predecessors. If a predecessor pending count is zero, add it to
228/// the Available queue.
229void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
230  DOUT << "*** Scheduling [" << CurCycle << "]: ";
231  DEBUG(SU->dump(&DAG));
232  SU->Cycle = CurCycle;
233
234  AvailableQueue->ScheduledNode(SU);
235
236  // Bottom up: release predecessors
237  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
238       I != E; ++I) {
239    ReleasePred(I->Dep, I->isCtrl, CurCycle);
240    if (I->Cost < 0)  {
241      // This is a physical register dependency and it's impossible or
242      // expensive to copy the register. Make sure nothing that can
243      // clobber the register is scheduled between the predecessor and
244      // this node.
245      if (LiveRegs.insert(I->Reg)) {
246        LiveRegDefs[I->Reg] = I->Dep;
247        LiveRegCycles[I->Reg] = CurCycle;
248      }
249    }
250  }
251
252  // Release all the implicit physical register defs that are live.
253  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
254       I != E; ++I) {
255    if (I->Cost < 0)  {
256      if (LiveRegCycles[I->Reg] == I->Dep->Cycle) {
257        LiveRegs.erase(I->Reg);
258        assert(LiveRegDefs[I->Reg] == SU &&
259               "Physical register dependency violated?");
260        LiveRegDefs[I->Reg] = NULL;
261        LiveRegCycles[I->Reg] = 0;
262      }
263    }
264  }
265
266  SU->isScheduled = true;
267}
268
269/// CapturePred - This does the opposite of ReleasePred. Since SU is being
270/// unscheduled, incrcease the succ left count of its predecessors. Remove
271/// them from AvailableQueue if necessary.
272void ScheduleDAGRRList::CapturePred(SUnit *PredSU, SUnit *SU, bool isChain) {
273  PredSU->CycleBound = 0;
274  for (SUnit::succ_iterator I = PredSU->Succs.begin(), E = PredSU->Succs.end();
275       I != E; ++I) {
276    if (I->Dep == SU)
277      continue;
278    PredSU->CycleBound = std::max(PredSU->CycleBound,
279                                  I->Dep->Cycle + PredSU->Latency);
280  }
281
282  if (PredSU->isAvailable) {
283    PredSU->isAvailable = false;
284    if (!PredSU->isPending)
285      AvailableQueue->remove(PredSU);
286  }
287
288  ++PredSU->NumSuccsLeft;
289}
290
291/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
292/// its predecessor states to reflect the change.
293void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
294  DOUT << "*** Unscheduling [" << SU->Cycle << "]: ";
295  DEBUG(SU->dump(&DAG));
296
297  AvailableQueue->UnscheduledNode(SU);
298
299  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
300       I != E; ++I) {
301    CapturePred(I->Dep, SU, I->isCtrl);
302    if (I->Cost < 0 && SU->Cycle == LiveRegCycles[I->Reg])  {
303      LiveRegs.erase(I->Reg);
304      assert(LiveRegDefs[I->Reg] == I->Dep &&
305             "Physical register dependency violated?");
306      LiveRegDefs[I->Reg] = NULL;
307      LiveRegCycles[I->Reg] = 0;
308    }
309  }
310
311  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
312       I != E; ++I) {
313    if (I->Cost < 0)  {
314      if (LiveRegs.insert(I->Reg)) {
315        assert(!LiveRegDefs[I->Reg] &&
316               "Physical register dependency violated?");
317        LiveRegDefs[I->Reg] = SU;
318      }
319      if (I->Dep->Cycle < LiveRegCycles[I->Reg])
320        LiveRegCycles[I->Reg] = I->Dep->Cycle;
321    }
322  }
323
324  SU->Cycle = 0;
325  SU->isScheduled = false;
326  SU->isAvailable = true;
327  AvailableQueue->push(SU);
328}
329
330// FIXME: This is probably too slow!
331static void isReachable(SUnit *SU, SUnit *TargetSU,
332                        SmallPtrSet<SUnit*, 32> &Visited, bool &Reached) {
333  if (Reached) return;
334  if (SU == TargetSU) {
335    Reached = true;
336    return;
337  }
338  if (!Visited.insert(SU)) return;
339
340  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E;
341       ++I)
342    isReachable(I->Dep, TargetSU, Visited, Reached);
343}
344
345static bool isReachable(SUnit *SU, SUnit *TargetSU) {
346  SmallPtrSet<SUnit*, 32> Visited;
347  bool Reached = false;
348  isReachable(SU, TargetSU, Visited, Reached);
349  return Reached;
350}
351
352/// willCreateCycle - Returns true if adding an edge from SU to TargetSU will
353/// create a cycle.
354static bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
355  if (isReachable(TargetSU, SU))
356    return true;
357  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
358       I != E; ++I)
359    if (I->Cost < 0 && isReachable(TargetSU, I->Dep))
360      return true;
361  return false;
362}
363
364/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
365/// BTCycle in order to schedule a specific node. Returns the last unscheduled
366/// SUnit. Also returns if a successor is unscheduled in the process.
367void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
368                                          unsigned &CurCycle) {
369  SUnit *OldSU = NULL;
370  while (CurCycle > BtCycle) {
371    OldSU = Sequence.back();
372    Sequence.pop_back();
373    if (SU->isSucc(OldSU))
374      // Don't try to remove SU from AvailableQueue.
375      SU->isAvailable = false;
376    UnscheduleNodeBottomUp(OldSU);
377    --CurCycle;
378  }
379
380
381  if (SU->isSucc(OldSU)) {
382    assert(false && "Something is wrong!");
383    abort();
384  }
385
386  ++NumBacktracks;
387}
388
389/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
390/// successors to the newly created node.
391SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
392  if (SU->FlaggedNodes.size())
393    return NULL;
394
395  SDNode *N = SU->Node;
396  if (!N)
397    return NULL;
398
399  SUnit *NewSU;
400  bool TryUnfold = false;
401  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
402    MVT::ValueType VT = N->getValueType(i);
403    if (VT == MVT::Flag)
404      return NULL;
405    else if (VT == MVT::Other)
406      TryUnfold = true;
407  }
408  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
409    const SDOperand &Op = N->getOperand(i);
410    MVT::ValueType VT = Op.Val->getValueType(Op.ResNo);
411    if (VT == MVT::Flag)
412      return NULL;
413  }
414
415  if (TryUnfold) {
416    SmallVector<SDNode*, 4> NewNodes;
417    if (!MRI->unfoldMemoryOperand(DAG, N, NewNodes))
418      return NULL;
419
420    DOUT << "Unfolding SU # " << SU->NodeNum << "\n";
421    assert(NewNodes.size() == 2 && "Expected a load folding node!");
422
423    N = NewNodes[1];
424    SDNode *LoadNode = NewNodes[0];
425    std::vector<SDNode*> Deleted;
426    unsigned NumVals = N->getNumValues();
427    unsigned OldNumVals = SU->Node->getNumValues();
428    for (unsigned i = 0; i != NumVals; ++i)
429      DAG.ReplaceAllUsesOfValueWith(SDOperand(SU->Node, i),
430                                    SDOperand(N, i), Deleted);
431    DAG.ReplaceAllUsesOfValueWith(SDOperand(SU->Node, OldNumVals-1),
432                                  SDOperand(LoadNode, 1), Deleted);
433
434    SUnit *LoadSU = NewSUnit(LoadNode);
435    SUnit *NewSU = NewSUnit(N);
436    SUnitMap[LoadNode].push_back(LoadSU);
437    SUnitMap[N].push_back(NewSU);
438    const TargetInstrDescriptor *TID = &TII->get(LoadNode->getTargetOpcode());
439    for (unsigned i = 0; i != TID->numOperands; ++i) {
440      if (TID->getOperandConstraint(i, TOI::TIED_TO) != -1) {
441        LoadSU->isTwoAddress = true;
442        break;
443      }
444    }
445    if (TID->Flags & M_COMMUTABLE)
446      LoadSU->isCommutable = true;
447
448    TID = &TII->get(N->getTargetOpcode());
449    for (unsigned i = 0; i != TID->numOperands; ++i) {
450      if (TID->getOperandConstraint(i, TOI::TIED_TO) != -1) {
451        NewSU->isTwoAddress = true;
452        break;
453      }
454    }
455    if (TID->Flags & M_COMMUTABLE)
456      NewSU->isCommutable = true;
457
458    // FIXME: Calculate height / depth and propagate the changes?
459    LoadSU->Depth = NewSU->Depth = SU->Depth;
460    LoadSU->Height = NewSU->Height = SU->Height;
461    ComputeLatency(LoadSU);
462    ComputeLatency(NewSU);
463
464    SUnit *ChainPred = NULL;
465    SmallVector<SDep, 4> ChainSuccs;
466    SmallVector<SDep, 4> LoadPreds;
467    SmallVector<SDep, 4> NodePreds;
468    SmallVector<SDep, 4> NodeSuccs;
469    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
470         I != E; ++I) {
471      if (I->isCtrl)
472        ChainPred = I->Dep;
473      else if (I->Dep->Node && I->Dep->Node->isOperand(LoadNode))
474        LoadPreds.push_back(SDep(I->Dep, I->Reg, I->Cost, false, false));
475      else
476        NodePreds.push_back(SDep(I->Dep, I->Reg, I->Cost, false, false));
477    }
478    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
479         I != E; ++I) {
480      if (I->isCtrl)
481        ChainSuccs.push_back(SDep(I->Dep, I->Reg, I->Cost,
482                                  I->isCtrl, I->isSpecial));
483      else
484        NodeSuccs.push_back(SDep(I->Dep, I->Reg, I->Cost,
485                                 I->isCtrl, I->isSpecial));
486    }
487
488    SU->removePred(ChainPred, true, false);
489    LoadSU->addPred(ChainPred, true, false);
490    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
491      SDep *Pred = &LoadPreds[i];
492      SU->removePred(Pred->Dep, Pred->isCtrl, Pred->isSpecial);
493      LoadSU->addPred(Pred->Dep, Pred->isCtrl, Pred->isSpecial,
494                      Pred->Reg, Pred->Cost);
495    }
496    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
497      SDep *Pred = &NodePreds[i];
498      SU->removePred(Pred->Dep, Pred->isCtrl, Pred->isSpecial);
499      NewSU->addPred(Pred->Dep, Pred->isCtrl, Pred->isSpecial,
500                     Pred->Reg, Pred->Cost);
501    }
502    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
503      SDep *Succ = &NodeSuccs[i];
504      Succ->Dep->removePred(SU, Succ->isCtrl, Succ->isSpecial);
505      Succ->Dep->addPred(NewSU, Succ->isCtrl, Succ->isSpecial,
506                         Succ->Reg, Succ->Cost);
507    }
508    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
509      SDep *Succ = &ChainSuccs[i];
510      Succ->Dep->removePred(SU, Succ->isCtrl, Succ->isSpecial);
511      Succ->Dep->addPred(LoadSU, Succ->isCtrl, Succ->isSpecial,
512                         Succ->Reg, Succ->Cost);
513    }
514    NewSU->addPred(LoadSU, false, false);
515
516    AvailableQueue->addNode(LoadSU);
517    AvailableQueue->addNode(NewSU);
518
519    ++NumUnfolds;
520
521    if (NewSU->NumSuccsLeft == 0) {
522      NewSU->isAvailable = true;
523      return NewSU;
524    } else
525      SU = NewSU;
526  }
527
528  DOUT << "Duplicating SU # " << SU->NodeNum << "\n";
529  NewSU = Clone(SU);
530
531  // New SUnit has the exact same predecessors.
532  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
533       I != E; ++I)
534    if (!I->isSpecial) {
535      NewSU->addPred(I->Dep, I->isCtrl, false, I->Reg, I->Cost);
536      NewSU->Depth = std::max(NewSU->Depth, I->Dep->Depth+1);
537    }
538
539  // Only copy scheduled successors. Cut them from old node's successor
540  // list and move them over.
541  SmallVector<std::pair<SUnit*, bool>, 4> DelDeps;
542  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
543       I != E; ++I) {
544    if (I->isSpecial)
545      continue;
546    if (I->Dep->isScheduled) {
547      NewSU->Height = std::max(NewSU->Height, I->Dep->Height+1);
548      I->Dep->addPred(NewSU, I->isCtrl, false, I->Reg, I->Cost);
549      DelDeps.push_back(std::make_pair(I->Dep, I->isCtrl));
550    }
551  }
552  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
553    SUnit *Succ = DelDeps[i].first;
554    bool isCtrl = DelDeps[i].second;
555    Succ->removePred(SU, isCtrl, false);
556  }
557
558  AvailableQueue->updateNode(SU);
559  AvailableQueue->addNode(NewSU);
560
561  ++NumDups;
562  return NewSU;
563}
564
565/// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies
566/// and move all scheduled successors of the given SUnit to the last copy.
567void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
568                                              const TargetRegisterClass *DestRC,
569                                              const TargetRegisterClass *SrcRC,
570                                               SmallVector<SUnit*, 2> &Copies) {
571  SUnit *CopyFromSU = NewSUnit(NULL);
572  CopyFromSU->CopySrcRC = SrcRC;
573  CopyFromSU->CopyDstRC = DestRC;
574  CopyFromSU->Depth = SU->Depth;
575  CopyFromSU->Height = SU->Height;
576
577  SUnit *CopyToSU = NewSUnit(NULL);
578  CopyToSU->CopySrcRC = DestRC;
579  CopyToSU->CopyDstRC = SrcRC;
580
581  // Only copy scheduled successors. Cut them from old node's successor
582  // list and move them over.
583  SmallVector<std::pair<SUnit*, bool>, 4> DelDeps;
584  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
585       I != E; ++I) {
586    if (I->isSpecial)
587      continue;
588    if (I->Dep->isScheduled) {
589      CopyToSU->Height = std::max(CopyToSU->Height, I->Dep->Height+1);
590      I->Dep->addPred(CopyToSU, I->isCtrl, false, I->Reg, I->Cost);
591      DelDeps.push_back(std::make_pair(I->Dep, I->isCtrl));
592    }
593  }
594  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
595    SUnit *Succ = DelDeps[i].first;
596    bool isCtrl = DelDeps[i].second;
597    Succ->removePred(SU, isCtrl, false);
598  }
599
600  CopyFromSU->addPred(SU, false, false, Reg, -1);
601  CopyToSU->addPred(CopyFromSU, false, false, Reg, 1);
602
603  AvailableQueue->updateNode(SU);
604  AvailableQueue->addNode(CopyFromSU);
605  AvailableQueue->addNode(CopyToSU);
606  Copies.push_back(CopyFromSU);
607  Copies.push_back(CopyToSU);
608
609  ++NumCCCopies;
610}
611
612/// getPhysicalRegisterVT - Returns the ValueType of the physical register
613/// definition of the specified node.
614/// FIXME: Move to SelectionDAG?
615static MVT::ValueType getPhysicalRegisterVT(SDNode *N, unsigned Reg,
616                                            const TargetInstrInfo *TII) {
617  const TargetInstrDescriptor &TID = TII->get(N->getTargetOpcode());
618  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
619  unsigned NumRes = TID.numDefs;
620  for (const unsigned *ImpDef = TID.ImplicitDefs; *ImpDef; ++ImpDef) {
621    if (Reg == *ImpDef)
622      break;
623    ++NumRes;
624  }
625  return N->getValueType(NumRes);
626}
627
628/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
629/// scheduling of the given node to satisfy live physical register dependencies.
630/// If the specific node is the last one that's available to schedule, do
631/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
632bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
633                                                 SmallVector<unsigned, 4> &LRegs){
634  if (LiveRegs.empty())
635    return false;
636
637  SmallSet<unsigned, 4> RegAdded;
638  // If this node would clobber any "live" register, then it's not ready.
639  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
640       I != E; ++I) {
641    if (I->Cost < 0)  {
642      unsigned Reg = I->Reg;
643      if (LiveRegs.count(Reg) && LiveRegDefs[Reg] != I->Dep) {
644        if (RegAdded.insert(Reg))
645          LRegs.push_back(Reg);
646      }
647      for (const unsigned *Alias = MRI->getAliasSet(Reg);
648           *Alias; ++Alias)
649        if (LiveRegs.count(*Alias) && LiveRegDefs[*Alias] != I->Dep) {
650          if (RegAdded.insert(*Alias))
651            LRegs.push_back(*Alias);
652        }
653    }
654  }
655
656  for (unsigned i = 0, e = SU->FlaggedNodes.size()+1; i != e; ++i) {
657    SDNode *Node = (i == 0) ? SU->Node : SU->FlaggedNodes[i-1];
658    if (!Node || !Node->isTargetOpcode())
659      continue;
660    const TargetInstrDescriptor &TID = TII->get(Node->getTargetOpcode());
661    if (!TID.ImplicitDefs)
662      continue;
663    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) {
664      if (LiveRegs.count(*Reg) && LiveRegDefs[*Reg] != SU) {
665        if (RegAdded.insert(*Reg))
666          LRegs.push_back(*Reg);
667      }
668      for (const unsigned *Alias = MRI->getAliasSet(*Reg);
669           *Alias; ++Alias)
670        if (LiveRegs.count(*Alias) && LiveRegDefs[*Alias] != SU) {
671          if (RegAdded.insert(*Alias))
672            LRegs.push_back(*Alias);
673        }
674    }
675  }
676  return !LRegs.empty();
677}
678
679
680/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
681/// schedulers.
682void ScheduleDAGRRList::ListScheduleBottomUp() {
683  unsigned CurCycle = 0;
684  // Add root to Available queue.
685  SUnit *RootSU = SUnitMap[DAG.getRoot().Val].front();
686  RootSU->isAvailable = true;
687  AvailableQueue->push(RootSU);
688
689  // While Available queue is not empty, grab the node with the highest
690  // priority. If it is not ready put it back.  Schedule the node.
691  SmallVector<SUnit*, 4> NotReady;
692  while (!AvailableQueue->empty()) {
693    bool Delayed = false;
694    DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
695    SUnit *CurSU = AvailableQueue->pop();
696    while (CurSU) {
697      if (CurSU->CycleBound <= CurCycle) {
698        SmallVector<unsigned, 4> LRegs;
699        if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
700          break;
701        Delayed = true;
702        LRegsMap.insert(std::make_pair(CurSU, LRegs));
703      }
704
705      CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
706      NotReady.push_back(CurSU);
707      CurSU = AvailableQueue->pop();
708    }
709
710    // All candidates are delayed due to live physical reg dependencies.
711    // Try backtracking, code duplication, or inserting cross class copies
712    // to resolve it.
713    if (Delayed && !CurSU) {
714      for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
715        SUnit *TrySU = NotReady[i];
716        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
717
718        // Try unscheduling up to the point where it's safe to schedule
719        // this node.
720        unsigned LiveCycle = CurCycle;
721        for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
722          unsigned Reg = LRegs[j];
723          unsigned LCycle = LiveRegCycles[Reg];
724          LiveCycle = std::min(LiveCycle, LCycle);
725        }
726        SUnit *OldSU = Sequence[LiveCycle];
727        if (!WillCreateCycle(TrySU, OldSU))  {
728          BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
729          // Force the current node to be scheduled before the node that
730          // requires the physical reg dep.
731          if (OldSU->isAvailable) {
732            OldSU->isAvailable = false;
733            AvailableQueue->remove(OldSU);
734          }
735          TrySU->addPred(OldSU, true, true);
736          // If one or more successors has been unscheduled, then the current
737          // node is no longer avaialable. Schedule a successor that's now
738          // available instead.
739          if (!TrySU->isAvailable)
740            CurSU = AvailableQueue->pop();
741          else {
742            CurSU = TrySU;
743            TrySU->isPending = false;
744            NotReady.erase(NotReady.begin()+i);
745          }
746          break;
747        }
748      }
749
750      if (!CurSU) {
751        // Can't backtrace. Try duplicating the nodes that produces these
752        // "expensive to copy" values to break the dependency. In case even
753        // that doesn't work, insert cross class copies.
754        SUnit *TrySU = NotReady[0];
755        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
756        assert(LRegs.size() == 1 && "Can't handle this yet!");
757        unsigned Reg = LRegs[0];
758        SUnit *LRDef = LiveRegDefs[Reg];
759        SUnit *NewDef = CopyAndMoveSuccessors(LRDef);
760        if (!NewDef) {
761          // Issue expensive cross register class copies.
762          MVT::ValueType VT = getPhysicalRegisterVT(LRDef->Node, Reg, TII);
763          const TargetRegisterClass *RC =
764            MRI->getPhysicalRegisterRegClass(VT, Reg);
765          const TargetRegisterClass *DestRC = MRI->getCrossCopyRegClass(RC);
766          if (!DestRC) {
767            assert(false && "Don't know how to copy this physical register!");
768            abort();
769          }
770          SmallVector<SUnit*, 2> Copies;
771          InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
772          DOUT << "Adding an edge from SU # " << TrySU->NodeNum
773               << " to SU #" << Copies.front()->NodeNum << "\n";
774          TrySU->addPred(Copies.front(), true, true);
775          NewDef = Copies.back();
776        }
777
778        DOUT << "Adding an edge from SU # " << NewDef->NodeNum
779             << " to SU #" << TrySU->NodeNum << "\n";
780        LiveRegDefs[Reg] = NewDef;
781        NewDef->addPred(TrySU, true, true);
782        TrySU->isAvailable = false;
783        CurSU = NewDef;
784      }
785
786      if (!CurSU) {
787        assert(false && "Unable to resolve live physical register dependencies!");
788        abort();
789      }
790    }
791
792    // Add the nodes that aren't ready back onto the available list.
793    for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
794      NotReady[i]->isPending = false;
795      // May no longer be available due to backtracking.
796      if (NotReady[i]->isAvailable)
797        AvailableQueue->push(NotReady[i]);
798    }
799    NotReady.clear();
800
801    if (!CurSU)
802      Sequence.push_back(0);
803    else {
804      ScheduleNodeBottomUp(CurSU, CurCycle);
805      Sequence.push_back(CurSU);
806    }
807    ++CurCycle;
808  }
809
810  // Add entry node last
811  if (DAG.getEntryNode().Val != DAG.getRoot().Val) {
812    SUnit *Entry = SUnitMap[DAG.getEntryNode().Val].front();
813    Sequence.push_back(Entry);
814  }
815
816  // Reverse the order if it is bottom up.
817  std::reverse(Sequence.begin(), Sequence.end());
818
819
820#ifndef NDEBUG
821  // Verify that all SUnits were scheduled.
822  bool AnyNotSched = false;
823  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
824    if (SUnits[i].NumSuccsLeft != 0) {
825      if (!AnyNotSched)
826        cerr << "*** List scheduling failed! ***\n";
827      SUnits[i].dump(&DAG);
828      cerr << "has not been scheduled!\n";
829      AnyNotSched = true;
830    }
831  }
832  assert(!AnyNotSched);
833#endif
834}
835
836//===----------------------------------------------------------------------===//
837//  Top-Down Scheduling
838//===----------------------------------------------------------------------===//
839
840/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
841/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
842void ScheduleDAGRRList::ReleaseSucc(SUnit *SuccSU, bool isChain,
843                                    unsigned CurCycle) {
844  // FIXME: the distance between two nodes is not always == the predecessor's
845  // latency. For example, the reader can very well read the register written
846  // by the predecessor later than the issue cycle. It also depends on the
847  // interrupt model (drain vs. freeze).
848  SuccSU->CycleBound = std::max(SuccSU->CycleBound, CurCycle + SuccSU->Latency);
849
850  --SuccSU->NumPredsLeft;
851
852#ifndef NDEBUG
853  if (SuccSU->NumPredsLeft < 0) {
854    cerr << "*** List scheduling failed! ***\n";
855    SuccSU->dump(&DAG);
856    cerr << " has been released too many times!\n";
857    assert(0);
858  }
859#endif
860
861  if (SuccSU->NumPredsLeft == 0) {
862    SuccSU->isAvailable = true;
863    AvailableQueue->push(SuccSU);
864  }
865}
866
867
868/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
869/// count of its successors. If a successor pending count is zero, add it to
870/// the Available queue.
871void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
872  DOUT << "*** Scheduling [" << CurCycle << "]: ";
873  DEBUG(SU->dump(&DAG));
874  SU->Cycle = CurCycle;
875
876  AvailableQueue->ScheduledNode(SU);
877
878  // Top down: release successors
879  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
880       I != E; ++I)
881    ReleaseSucc(I->Dep, I->isCtrl, CurCycle);
882  SU->isScheduled = true;
883}
884
885/// ListScheduleTopDown - The main loop of list scheduling for top-down
886/// schedulers.
887void ScheduleDAGRRList::ListScheduleTopDown() {
888  unsigned CurCycle = 0;
889  SUnit *Entry = SUnitMap[DAG.getEntryNode().Val].front();
890
891  // All leaves to Available queue.
892  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
893    // It is available if it has no predecessors.
894    if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) {
895      AvailableQueue->push(&SUnits[i]);
896      SUnits[i].isAvailable = true;
897    }
898  }
899
900  // Emit the entry node first.
901  ScheduleNodeTopDown(Entry, CurCycle);
902  Sequence.push_back(Entry);
903  ++CurCycle;
904
905  // While Available queue is not empty, grab the node with the highest
906  // priority. If it is not ready put it back.  Schedule the node.
907  std::vector<SUnit*> NotReady;
908  while (!AvailableQueue->empty()) {
909    SUnit *CurSU = AvailableQueue->pop();
910    while (CurSU && CurSU->CycleBound > CurCycle) {
911      NotReady.push_back(CurSU);
912      CurSU = AvailableQueue->pop();
913    }
914
915    // Add the nodes that aren't ready back onto the available list.
916    AvailableQueue->push_all(NotReady);
917    NotReady.clear();
918
919    if (!CurSU)
920      Sequence.push_back(0);
921    else {
922      ScheduleNodeTopDown(CurSU, CurCycle);
923      Sequence.push_back(CurSU);
924    }
925    CurCycle++;
926  }
927
928
929#ifndef NDEBUG
930  // Verify that all SUnits were scheduled.
931  bool AnyNotSched = false;
932  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
933    if (!SUnits[i].isScheduled) {
934      if (!AnyNotSched)
935        cerr << "*** List scheduling failed! ***\n";
936      SUnits[i].dump(&DAG);
937      cerr << "has not been scheduled!\n";
938      AnyNotSched = true;
939    }
940  }
941  assert(!AnyNotSched);
942#endif
943}
944
945
946
947//===----------------------------------------------------------------------===//
948//                RegReductionPriorityQueue Implementation
949//===----------------------------------------------------------------------===//
950//
951// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
952// to reduce register pressure.
953//
954namespace {
955  template<class SF>
956  class RegReductionPriorityQueue;
957
958  /// Sorting functions for the Available queue.
959  struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
960    RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
961    bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
962    bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
963
964    bool operator()(const SUnit* left, const SUnit* right) const;
965  };
966
967  struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
968    RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
969    td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
970    td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
971
972    bool operator()(const SUnit* left, const SUnit* right) const;
973  };
974}  // end anonymous namespace
975
976static inline bool isCopyFromLiveIn(const SUnit *SU) {
977  SDNode *N = SU->Node;
978  return N && N->getOpcode() == ISD::CopyFromReg &&
979    N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag;
980}
981
982namespace {
983  template<class SF>
984  class VISIBILITY_HIDDEN RegReductionPriorityQueue
985   : public SchedulingPriorityQueue {
986    std::priority_queue<SUnit*, std::vector<SUnit*>, SF> Queue;
987
988  public:
989    RegReductionPriorityQueue() :
990    Queue(SF(this)) {}
991
992    virtual void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap,
993                           std::vector<SUnit> &sunits) {}
994
995    virtual void addNode(const SUnit *SU) {}
996
997    virtual void updateNode(const SUnit *SU) {}
998
999    virtual void releaseState() {}
1000
1001    virtual unsigned getNodePriority(const SUnit *SU) const {
1002      return 0;
1003    }
1004
1005    unsigned size() const { return Queue.size(); }
1006
1007    bool empty() const { return Queue.empty(); }
1008
1009    void push(SUnit *U) {
1010      Queue.push(U);
1011    }
1012    void push_all(const std::vector<SUnit *> &Nodes) {
1013      for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
1014        Queue.push(Nodes[i]);
1015    }
1016
1017    SUnit *pop() {
1018      if (empty()) return NULL;
1019      SUnit *V = Queue.top();
1020      Queue.pop();
1021      return V;
1022    }
1023
1024    /// remove - This is a really inefficient way to remove a node from a
1025    /// priority queue.  We should roll our own heap to make this better or
1026    /// something.
1027    void remove(SUnit *SU) {
1028      std::vector<SUnit*> Temp;
1029
1030      assert(!Queue.empty() && "Not in queue!");
1031      while (Queue.top() != SU) {
1032        Temp.push_back(Queue.top());
1033        Queue.pop();
1034        assert(!Queue.empty() && "Not in queue!");
1035      }
1036
1037      // Remove the node from the PQ.
1038      Queue.pop();
1039
1040      // Add all the other nodes back.
1041      for (unsigned i = 0, e = Temp.size(); i != e; ++i)
1042        Queue.push(Temp[i]);
1043    }
1044  };
1045
1046  template<class SF>
1047  class VISIBILITY_HIDDEN BURegReductionPriorityQueue
1048   : public RegReductionPriorityQueue<SF> {
1049    // SUnitMap SDNode to SUnit mapping (n -> n).
1050    DenseMap<SDNode*, std::vector<SUnit*> > *SUnitMap;
1051
1052    // SUnits - The SUnits for the current graph.
1053    const std::vector<SUnit> *SUnits;
1054
1055    // SethiUllmanNumbers - The SethiUllman number for each node.
1056    std::vector<unsigned> SethiUllmanNumbers;
1057
1058    const TargetInstrInfo *TII;
1059  public:
1060    explicit BURegReductionPriorityQueue(const TargetInstrInfo *tii)
1061      : TII(tii) {}
1062
1063    void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap,
1064                   std::vector<SUnit> &sunits) {
1065      SUnitMap = &sumap;
1066      SUnits = &sunits;
1067      // Add pseudo dependency edges for two-address nodes.
1068      AddPseudoTwoAddrDeps();
1069      // Calculate node priorities.
1070      CalculateSethiUllmanNumbers();
1071    }
1072
1073    void addNode(const SUnit *SU) {
1074      SethiUllmanNumbers.resize(SUnits->size(), 0);
1075      CalcNodeSethiUllmanNumber(SU);
1076    }
1077
1078    void updateNode(const SUnit *SU) {
1079      SethiUllmanNumbers[SU->NodeNum] = 0;
1080      CalcNodeSethiUllmanNumber(SU);
1081    }
1082
1083    void releaseState() {
1084      SUnits = 0;
1085      SethiUllmanNumbers.clear();
1086    }
1087
1088    unsigned getNodePriority(const SUnit *SU) const {
1089      assert(SU->NodeNum < SethiUllmanNumbers.size());
1090      unsigned Opc = SU->Node ? SU->Node->getOpcode() : 0;
1091      if (Opc == ISD::CopyFromReg && !isCopyFromLiveIn(SU))
1092        // CopyFromReg should be close to its def because it restricts
1093        // allocation choices. But if it is a livein then perhaps we want it
1094        // closer to its uses so it can be coalesced.
1095        return 0xffff;
1096      else if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1097        // CopyToReg should be close to its uses to facilitate coalescing and
1098        // avoid spilling.
1099        return 0;
1100      else if (SU->NumSuccs == 0)
1101        // If SU does not have a use, i.e. it doesn't produce a value that would
1102        // be consumed (e.g. store), then it terminates a chain of computation.
1103        // Give it a large SethiUllman number so it will be scheduled right
1104        // before its predecessors that it doesn't lengthen their live ranges.
1105        return 0xffff;
1106      else if (SU->NumPreds == 0)
1107        // If SU does not have a def, schedule it close to its uses because it
1108        // does not lengthen any live ranges.
1109        return 0;
1110      else
1111        return SethiUllmanNumbers[SU->NodeNum];
1112    }
1113
1114  private:
1115    bool canClobber(SUnit *SU, SUnit *Op);
1116    void AddPseudoTwoAddrDeps();
1117    void CalculateSethiUllmanNumbers();
1118    unsigned CalcNodeSethiUllmanNumber(const SUnit *SU);
1119  };
1120
1121
1122  template<class SF>
1123  class VISIBILITY_HIDDEN TDRegReductionPriorityQueue
1124   : public RegReductionPriorityQueue<SF> {
1125    // SUnitMap SDNode to SUnit mapping (n -> n).
1126    DenseMap<SDNode*, std::vector<SUnit*> > *SUnitMap;
1127
1128    // SUnits - The SUnits for the current graph.
1129    const std::vector<SUnit> *SUnits;
1130
1131    // SethiUllmanNumbers - The SethiUllman number for each node.
1132    std::vector<unsigned> SethiUllmanNumbers;
1133
1134  public:
1135    TDRegReductionPriorityQueue() {}
1136
1137    void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap,
1138                   std::vector<SUnit> &sunits) {
1139      SUnitMap = &sumap;
1140      SUnits = &sunits;
1141      // Calculate node priorities.
1142      CalculateSethiUllmanNumbers();
1143    }
1144
1145    void addNode(const SUnit *SU) {
1146      SethiUllmanNumbers.resize(SUnits->size(), 0);
1147      CalcNodeSethiUllmanNumber(SU);
1148    }
1149
1150    void updateNode(const SUnit *SU) {
1151      SethiUllmanNumbers[SU->NodeNum] = 0;
1152      CalcNodeSethiUllmanNumber(SU);
1153    }
1154
1155    void releaseState() {
1156      SUnits = 0;
1157      SethiUllmanNumbers.clear();
1158    }
1159
1160    unsigned getNodePriority(const SUnit *SU) const {
1161      assert(SU->NodeNum < SethiUllmanNumbers.size());
1162      return SethiUllmanNumbers[SU->NodeNum];
1163    }
1164
1165  private:
1166    void CalculateSethiUllmanNumbers();
1167    unsigned CalcNodeSethiUllmanNumber(const SUnit *SU);
1168  };
1169}
1170
1171/// closestSucc - Returns the scheduled cycle of the successor which is
1172/// closet to the current cycle.
1173static unsigned closestSucc(const SUnit *SU) {
1174  unsigned MaxCycle = 0;
1175  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1176       I != E; ++I) {
1177    unsigned Cycle = I->Dep->Cycle;
1178    // If there are bunch of CopyToRegs stacked up, they should be considered
1179    // to be at the same position.
1180    if (I->Dep->Node && I->Dep->Node->getOpcode() == ISD::CopyToReg)
1181      Cycle = closestSucc(I->Dep)+1;
1182    if (Cycle > MaxCycle)
1183      MaxCycle = Cycle;
1184  }
1185  return MaxCycle;
1186}
1187
1188// Bottom up
1189bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1190  // There used to be a special tie breaker here that looked for
1191  // two-address instructions and preferred the instruction with a
1192  // def&use operand.  The special case triggered diagnostics when
1193  // _GLIBCXX_DEBUG was enabled because it broke the strict weak
1194  // ordering that priority_queue requires. It didn't help much anyway
1195  // because AddPseudoTwoAddrDeps already covers many of the cases
1196  // where it would have applied.  In addition, it's counter-intuitive
1197  // that a tie breaker would be the first thing attempted.  There's a
1198  // "real" tie breaker below that is the operation of last resort.
1199  // The fact that the "special tie breaker" would trigger when there
1200  // wasn't otherwise a tie is what broke the strict weak ordering
1201  // constraint.
1202
1203  unsigned LPriority = SPQ->getNodePriority(left);
1204  unsigned RPriority = SPQ->getNodePriority(right);
1205  if (LPriority > RPriority)
1206    return true;
1207  else if (LPriority == RPriority) {
1208    // Try schedule def + use closer when Sethi-Ullman numbers are the same.
1209    // e.g.
1210    // t1 = op t2, c1
1211    // t3 = op t4, c2
1212    //
1213    // and the following instructions are both ready.
1214    // t2 = op c3
1215    // t4 = op c4
1216    //
1217    // Then schedule t2 = op first.
1218    // i.e.
1219    // t4 = op c4
1220    // t2 = op c3
1221    // t1 = op t2, c1
1222    // t3 = op t4, c2
1223    //
1224    // This creates more short live intervals.
1225    unsigned LDist = closestSucc(left);
1226    unsigned RDist = closestSucc(right);
1227    if (LDist < RDist)
1228      return true;
1229    else if (LDist == RDist) {
1230      if (left->Height > right->Height)
1231        return true;
1232      else if (left->Height == right->Height)
1233        if (left->Depth < right->Depth)
1234          return true;
1235        else if (left->Depth == right->Depth)
1236          if (left->CycleBound > right->CycleBound)
1237            return true;
1238    }
1239  }
1240  return false;
1241}
1242
1243template<class SF>
1244bool BURegReductionPriorityQueue<SF>::canClobber(SUnit *SU, SUnit *Op) {
1245  if (SU->isTwoAddress) {
1246    unsigned Opc = SU->Node->getTargetOpcode();
1247    unsigned NumRes = TII->getNumDefs(Opc);
1248    unsigned NumOps = ScheduleDAG::CountOperands(SU->Node);
1249    for (unsigned i = 0; i != NumOps; ++i) {
1250      if (TII->getOperandConstraint(Opc, i+NumRes, TOI::TIED_TO) != -1) {
1251        SDNode *DU = SU->Node->getOperand(i).Val;
1252        if (Op == (*SUnitMap)[DU][SU->InstanceNo])
1253          return true;
1254      }
1255    }
1256  }
1257  return false;
1258}
1259
1260
1261/// hasCopyToRegUse - Return true if SU has a value successor that is a
1262/// CopyToReg node.
1263static bool hasCopyToRegUse(SUnit *SU) {
1264  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1265       I != E; ++I) {
1266    if (I->isCtrl) continue;
1267    SUnit *SuccSU = I->Dep;
1268    if (SuccSU->Node && SuccSU->Node->getOpcode() == ISD::CopyToReg)
1269      return true;
1270  }
1271  return false;
1272}
1273
1274/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
1275/// it as a def&use operand. Add a pseudo control edge from it to the other
1276/// node (if it won't create a cycle) so the two-address one will be scheduled
1277/// first (lower in the schedule). If both nodes are two-address, favor the
1278/// one that has a CopyToReg use (more likely to be a loop induction update).
1279/// If both are two-address, but one is commutable while the other is not
1280/// commutable, favor the one that's not commutable.
1281template<class SF>
1282void BURegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
1283  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1284    SUnit *SU = (SUnit *)&((*SUnits)[i]);
1285    if (!SU->isTwoAddress)
1286      continue;
1287
1288    SDNode *Node = SU->Node;
1289    if (!Node || !Node->isTargetOpcode() || SU->FlaggedNodes.size() > 0)
1290      continue;
1291
1292    unsigned Opc = Node->getTargetOpcode();
1293    unsigned NumRes = TII->getNumDefs(Opc);
1294    unsigned NumOps = ScheduleDAG::CountOperands(Node);
1295    for (unsigned j = 0; j != NumOps; ++j) {
1296      if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) != -1) {
1297        SDNode *DU = SU->Node->getOperand(j).Val;
1298        SUnit *DUSU = (*SUnitMap)[DU][SU->InstanceNo];
1299        if (!DUSU) continue;
1300        for (SUnit::succ_iterator I = DUSU->Succs.begin(),E = DUSU->Succs.end();
1301             I != E; ++I) {
1302          if (I->isCtrl) continue;
1303          SUnit *SuccSU = I->Dep;
1304          // Don't constrain nodes with implicit defs. It can create cycles
1305          // plus it may increase register pressures.
1306          if (SuccSU == SU || SuccSU->hasPhysRegDefs)
1307            continue;
1308          // Be conservative. Ignore if nodes aren't at the same depth.
1309          if (SuccSU->Depth != SU->Depth)
1310            continue;
1311          if ((!canClobber(SuccSU, DUSU) ||
1312               (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
1313               (!SU->isCommutable && SuccSU->isCommutable)) &&
1314              !isReachable(SuccSU, SU)) {
1315            DOUT << "Adding an edge from SU # " << SU->NodeNum
1316                 << " to SU #" << SuccSU->NodeNum << "\n";
1317            SU->addPred(SuccSU, true, true);
1318          }
1319        }
1320      }
1321    }
1322  }
1323}
1324
1325/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number.
1326/// Smaller number is the higher priority.
1327template<class SF>
1328unsigned BURegReductionPriorityQueue<SF>::
1329CalcNodeSethiUllmanNumber(const SUnit *SU) {
1330  unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum];
1331  if (SethiUllmanNumber != 0)
1332    return SethiUllmanNumber;
1333
1334  unsigned Extra = 0;
1335  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1336       I != E; ++I) {
1337    if (I->isCtrl) continue;  // ignore chain preds
1338    SUnit *PredSU = I->Dep;
1339    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU);
1340    if (PredSethiUllman > SethiUllmanNumber) {
1341      SethiUllmanNumber = PredSethiUllman;
1342      Extra = 0;
1343    } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl)
1344      ++Extra;
1345  }
1346
1347  SethiUllmanNumber += Extra;
1348
1349  if (SethiUllmanNumber == 0)
1350    SethiUllmanNumber = 1;
1351
1352  return SethiUllmanNumber;
1353}
1354
1355/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1356/// scheduling units.
1357template<class SF>
1358void BURegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
1359  SethiUllmanNumbers.assign(SUnits->size(), 0);
1360
1361  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1362    CalcNodeSethiUllmanNumber(&(*SUnits)[i]);
1363}
1364
1365static unsigned SumOfUnscheduledPredsOfSuccs(const SUnit *SU) {
1366  unsigned Sum = 0;
1367  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1368       I != E; ++I) {
1369    SUnit *SuccSU = I->Dep;
1370    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
1371         EE = SuccSU->Preds.end(); II != EE; ++II) {
1372      SUnit *PredSU = II->Dep;
1373      if (!PredSU->isScheduled)
1374        ++Sum;
1375    }
1376  }
1377
1378  return Sum;
1379}
1380
1381
1382// Top down
1383bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1384  unsigned LPriority = SPQ->getNodePriority(left);
1385  unsigned RPriority = SPQ->getNodePriority(right);
1386  bool LIsTarget = left->Node && left->Node->isTargetOpcode();
1387  bool RIsTarget = right->Node && right->Node->isTargetOpcode();
1388  bool LIsFloater = LIsTarget && left->NumPreds == 0;
1389  bool RIsFloater = RIsTarget && right->NumPreds == 0;
1390  unsigned LBonus = (SumOfUnscheduledPredsOfSuccs(left) == 1) ? 2 : 0;
1391  unsigned RBonus = (SumOfUnscheduledPredsOfSuccs(right) == 1) ? 2 : 0;
1392
1393  if (left->NumSuccs == 0 && right->NumSuccs != 0)
1394    return false;
1395  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
1396    return true;
1397
1398  // Special tie breaker: if two nodes share a operand, the one that use it
1399  // as a def&use operand is preferred.
1400  if (LIsTarget && RIsTarget) {
1401    if (left->isTwoAddress && !right->isTwoAddress) {
1402      SDNode *DUNode = left->Node->getOperand(0).Val;
1403      if (DUNode->isOperand(right->Node))
1404        RBonus += 2;
1405    }
1406    if (!left->isTwoAddress && right->isTwoAddress) {
1407      SDNode *DUNode = right->Node->getOperand(0).Val;
1408      if (DUNode->isOperand(left->Node))
1409        LBonus += 2;
1410    }
1411  }
1412  if (LIsFloater)
1413    LBonus -= 2;
1414  if (RIsFloater)
1415    RBonus -= 2;
1416  if (left->NumSuccs == 1)
1417    LBonus += 2;
1418  if (right->NumSuccs == 1)
1419    RBonus += 2;
1420
1421  if (LPriority+LBonus < RPriority+RBonus)
1422    return true;
1423  else if (LPriority == RPriority)
1424    if (left->Depth < right->Depth)
1425      return true;
1426    else if (left->Depth == right->Depth)
1427      if (left->NumSuccsLeft > right->NumSuccsLeft)
1428        return true;
1429      else if (left->NumSuccsLeft == right->NumSuccsLeft)
1430        if (left->CycleBound > right->CycleBound)
1431          return true;
1432  return false;
1433}
1434
1435/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number.
1436/// Smaller number is the higher priority.
1437template<class SF>
1438unsigned TDRegReductionPriorityQueue<SF>::
1439CalcNodeSethiUllmanNumber(const SUnit *SU) {
1440  unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum];
1441  if (SethiUllmanNumber != 0)
1442    return SethiUllmanNumber;
1443
1444  unsigned Opc = SU->Node ? SU->Node->getOpcode() : 0;
1445  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1446    SethiUllmanNumber = 0xffff;
1447  else if (SU->NumSuccsLeft == 0)
1448    // If SU does not have a use, i.e. it doesn't produce a value that would
1449    // be consumed (e.g. store), then it terminates a chain of computation.
1450    // Give it a small SethiUllman number so it will be scheduled right before
1451    // its predecessors that it doesn't lengthen their live ranges.
1452    SethiUllmanNumber = 0;
1453  else if (SU->NumPredsLeft == 0 &&
1454           (Opc != ISD::CopyFromReg || isCopyFromLiveIn(SU)))
1455    SethiUllmanNumber = 0xffff;
1456  else {
1457    int Extra = 0;
1458    for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1459         I != E; ++I) {
1460      if (I->isCtrl) continue;  // ignore chain preds
1461      SUnit *PredSU = I->Dep;
1462      unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU);
1463      if (PredSethiUllman > SethiUllmanNumber) {
1464        SethiUllmanNumber = PredSethiUllman;
1465        Extra = 0;
1466      } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl)
1467        ++Extra;
1468    }
1469
1470    SethiUllmanNumber += Extra;
1471  }
1472
1473  return SethiUllmanNumber;
1474}
1475
1476/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1477/// scheduling units.
1478template<class SF>
1479void TDRegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
1480  SethiUllmanNumbers.assign(SUnits->size(), 0);
1481
1482  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1483    CalcNodeSethiUllmanNumber(&(*SUnits)[i]);
1484}
1485
1486//===----------------------------------------------------------------------===//
1487//                         Public Constructor Functions
1488//===----------------------------------------------------------------------===//
1489
1490llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
1491                                                    SelectionDAG *DAG,
1492                                                    MachineBasicBlock *BB) {
1493  const TargetInstrInfo *TII = DAG->getTarget().getInstrInfo();
1494  return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), true,
1495                           new BURegReductionPriorityQueue<bu_ls_rr_sort>(TII));
1496}
1497
1498llvm::ScheduleDAG* llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
1499                                                    SelectionDAG *DAG,
1500                                                    MachineBasicBlock *BB) {
1501  return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), false,
1502                              new TDRegReductionPriorityQueue<td_ls_rr_sort>());
1503}
1504
1505