ScheduleDAGRRList.cpp revision 7d2325fdbbbfe925bbaebfbcca6faae7619f3d02
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "pre-RA-sched"
19#include "ScheduleDAGSDNodes.h"
20#include "llvm/InlineAsm.h"
21#include "llvm/CodeGen/SchedulerRegistry.h"
22#include "llvm/CodeGen/SelectionDAGISel.h"
23#include "llvm/Target/TargetRegisterInfo.h"
24#include "llvm/Target/TargetData.h"
25#include "llvm/Target/TargetMachine.h"
26#include "llvm/Target/TargetInstrInfo.h"
27#include "llvm/ADT/SmallSet.h"
28#include "llvm/ADT/Statistic.h"
29#include "llvm/ADT/STLExtras.h"
30#include "llvm/Support/Debug.h"
31#include "llvm/Support/ErrorHandling.h"
32#include "llvm/Support/raw_ostream.h"
33#include <climits>
34using namespace llvm;
35
36STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
37STATISTIC(NumUnfolds,    "Number of nodes unfolded");
38STATISTIC(NumDups,       "Number of duplicated nodes");
39STATISTIC(NumPRCopies,   "Number of physical register copies");
40
41static RegisterScheduler
42  burrListDAGScheduler("list-burr",
43                       "Bottom-up register reduction list scheduling",
44                       createBURRListDAGScheduler);
45static RegisterScheduler
46  tdrListrDAGScheduler("list-tdrr",
47                       "Top-down register reduction list scheduling",
48                       createTDRRListDAGScheduler);
49static RegisterScheduler
50  sourceListDAGScheduler("source",
51                         "Similar to list-burr but schedules in source "
52                         "order when possible",
53                         createSourceListDAGScheduler);
54
55static RegisterScheduler
56  hybridListDAGScheduler("list-hybrid",
57                         "Bottom-up rr list scheduling which avoid stalls for "
58                         "long latency instructions",
59                         createHybridListDAGScheduler);
60
61namespace {
62//===----------------------------------------------------------------------===//
63/// ScheduleDAGRRList - The actual register reduction list scheduler
64/// implementation.  This supports both top-down and bottom-up scheduling.
65///
66class ScheduleDAGRRList : public ScheduleDAGSDNodes {
67private:
68  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
69  /// it is top-down.
70  bool isBottomUp;
71
72  /// NeedLatency - True if the scheduler will make use of latency information.
73  ///
74  bool NeedLatency;
75
76  /// AvailableQueue - The priority queue to use for the available SUnits.
77  SchedulingPriorityQueue *AvailableQueue;
78
79  /// LiveRegDefs - A set of physical registers and their definition
80  /// that are "live". These nodes must be scheduled before any other nodes that
81  /// modifies the registers can be scheduled.
82  unsigned NumLiveRegs;
83  std::vector<SUnit*> LiveRegDefs;
84  std::vector<unsigned> LiveRegCycles;
85
86  /// Topo - A topological ordering for SUnits which permits fast IsReachable
87  /// and similar queries.
88  ScheduleDAGTopologicalSort Topo;
89
90public:
91  ScheduleDAGRRList(MachineFunction &mf,
92                    bool isbottomup, bool needlatency,
93                    SchedulingPriorityQueue *availqueue)
94    : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency),
95      AvailableQueue(availqueue), Topo(SUnits) {
96    }
97
98  ~ScheduleDAGRRList() {
99    delete AvailableQueue;
100  }
101
102  void Schedule();
103
104  /// IsReachable - Checks if SU is reachable from TargetSU.
105  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
106    return Topo.IsReachable(SU, TargetSU);
107  }
108
109  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
110  /// create a cycle.
111  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
112    return Topo.WillCreateCycle(SU, TargetSU);
113  }
114
115  /// AddPred - adds a predecessor edge to SUnit SU.
116  /// This returns true if this is a new predecessor.
117  /// Updates the topological ordering if required.
118  void AddPred(SUnit *SU, const SDep &D) {
119    Topo.AddPred(SU, D.getSUnit());
120    SU->addPred(D);
121  }
122
123  /// RemovePred - removes a predecessor edge from SUnit SU.
124  /// This returns true if an edge was removed.
125  /// Updates the topological ordering if required.
126  void RemovePred(SUnit *SU, const SDep &D) {
127    Topo.RemovePred(SU, D.getSUnit());
128    SU->removePred(D);
129  }
130
131private:
132  void ReleasePred(SUnit *SU, const SDep *PredEdge);
133  void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
134  void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
135  void ReleaseSuccessors(SUnit *SU);
136  void CapturePred(SDep *PredEdge);
137  void ScheduleNodeBottomUp(SUnit*, unsigned);
138  void ScheduleNodeTopDown(SUnit*, unsigned);
139  void UnscheduleNodeBottomUp(SUnit*);
140  void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
141  SUnit *CopyAndMoveSuccessors(SUnit*);
142  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
143                                const TargetRegisterClass*,
144                                const TargetRegisterClass*,
145                                SmallVector<SUnit*, 2>&);
146  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
147  void ListScheduleTopDown();
148  void ListScheduleBottomUp();
149
150
151  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
152  /// Updates the topological ordering if required.
153  SUnit *CreateNewSUnit(SDNode *N) {
154    unsigned NumSUnits = SUnits.size();
155    SUnit *NewNode = NewSUnit(N);
156    // Update the topological ordering.
157    if (NewNode->NodeNum >= NumSUnits)
158      Topo.InitDAGTopologicalSorting();
159    return NewNode;
160  }
161
162  /// CreateClone - Creates a new SUnit from an existing one.
163  /// Updates the topological ordering if required.
164  SUnit *CreateClone(SUnit *N) {
165    unsigned NumSUnits = SUnits.size();
166    SUnit *NewNode = Clone(N);
167    // Update the topological ordering.
168    if (NewNode->NodeNum >= NumSUnits)
169      Topo.InitDAGTopologicalSorting();
170    return NewNode;
171  }
172
173  /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
174  /// need actual latency information but the hybrid scheduler does.
175  bool ForceUnitLatencies() const {
176    return !NeedLatency;
177  }
178};
179}  // end anonymous namespace
180
181
182/// Schedule - Schedule the DAG using list scheduling.
183void ScheduleDAGRRList::Schedule() {
184  DEBUG(dbgs() << "********** List Scheduling **********\n");
185
186  NumLiveRegs = 0;
187  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
188  LiveRegCycles.resize(TRI->getNumRegs(), 0);
189
190  // Build the scheduling graph.
191  BuildSchedGraph(NULL);
192
193  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
194          SUnits[su].dumpAll(this));
195  Topo.InitDAGTopologicalSorting();
196
197  AvailableQueue->initNodes(SUnits);
198
199  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
200  if (isBottomUp)
201    ListScheduleBottomUp();
202  else
203    ListScheduleTopDown();
204
205  AvailableQueue->releaseState();
206}
207
208//===----------------------------------------------------------------------===//
209//  Bottom-Up Scheduling
210//===----------------------------------------------------------------------===//
211
212/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
213/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
214void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
215  SUnit *PredSU = PredEdge->getSUnit();
216
217#ifndef NDEBUG
218  if (PredSU->NumSuccsLeft == 0) {
219    dbgs() << "*** Scheduling failed! ***\n";
220    PredSU->dump(this);
221    dbgs() << " has been released too many times!\n";
222    llvm_unreachable(0);
223  }
224#endif
225  --PredSU->NumSuccsLeft;
226
227  if (!ForceUnitLatencies()) {
228    // Updating predecessor's height. This is now the cycle when the
229    // predecessor can be scheduled without causing a pipeline stall.
230    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
231  }
232
233  // If all the node's successors are scheduled, this node is ready
234  // to be scheduled. Ignore the special EntrySU node.
235  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
236    PredSU->isAvailable = true;
237    AvailableQueue->push(PredSU);
238  }
239}
240
241void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
242  // Bottom up: release predecessors
243  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
244       I != E; ++I) {
245    ReleasePred(SU, &*I);
246    if (I->isAssignedRegDep()) {
247      // This is a physical register dependency and it's impossible or
248      // expensive to copy the register. Make sure nothing that can
249      // clobber the register is scheduled between the predecessor and
250      // this node.
251      if (!LiveRegDefs[I->getReg()]) {
252        ++NumLiveRegs;
253        LiveRegDefs[I->getReg()] = I->getSUnit();
254        LiveRegCycles[I->getReg()] = CurCycle;
255      }
256    }
257  }
258}
259
260/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
261/// count of its predecessors. If a predecessor pending count is zero, add it to
262/// the Available queue.
263void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
264  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
265  DEBUG(SU->dump(this));
266
267#ifndef NDEBUG
268  if (CurCycle < SU->getHeight())
269    DEBUG(dbgs() << "   Height [" << SU->getHeight() << "] pipeline stall!\n");
270#endif
271
272  // FIXME: Handle noop hazard.
273  SU->setHeightToAtLeast(CurCycle);
274  Sequence.push_back(SU);
275
276  ReleasePredecessors(SU, CurCycle);
277
278  // Release all the implicit physical register defs that are live.
279  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
280       I != E; ++I) {
281    if (I->isAssignedRegDep()) {
282      if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
283        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
284        assert(LiveRegDefs[I->getReg()] == SU &&
285               "Physical register dependency violated?");
286        --NumLiveRegs;
287        LiveRegDefs[I->getReg()] = NULL;
288        LiveRegCycles[I->getReg()] = 0;
289      }
290    }
291  }
292
293  SU->isScheduled = true;
294  AvailableQueue->ScheduledNode(SU);
295}
296
297/// CapturePred - This does the opposite of ReleasePred. Since SU is being
298/// unscheduled, incrcease the succ left count of its predecessors. Remove
299/// them from AvailableQueue if necessary.
300void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
301  SUnit *PredSU = PredEdge->getSUnit();
302  if (PredSU->isAvailable) {
303    PredSU->isAvailable = false;
304    if (!PredSU->isPending)
305      AvailableQueue->remove(PredSU);
306  }
307
308  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
309  ++PredSU->NumSuccsLeft;
310}
311
312/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
313/// its predecessor states to reflect the change.
314void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
315  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
316  DEBUG(SU->dump(this));
317
318  AvailableQueue->UnscheduledNode(SU);
319
320  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
321       I != E; ++I) {
322    CapturePred(&*I);
323    if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){
324      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
325      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
326             "Physical register dependency violated?");
327      --NumLiveRegs;
328      LiveRegDefs[I->getReg()] = NULL;
329      LiveRegCycles[I->getReg()] = 0;
330    }
331  }
332
333  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
334       I != E; ++I) {
335    if (I->isAssignedRegDep()) {
336      if (!LiveRegDefs[I->getReg()]) {
337        LiveRegDefs[I->getReg()] = SU;
338        ++NumLiveRegs;
339      }
340      if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
341        LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
342    }
343  }
344
345  SU->setHeightDirty();
346  SU->isScheduled = false;
347  SU->isAvailable = true;
348  AvailableQueue->push(SU);
349}
350
351/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
352/// BTCycle in order to schedule a specific node.
353void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
354                                          unsigned &CurCycle) {
355  SUnit *OldSU = NULL;
356  while (CurCycle > BtCycle) {
357    OldSU = Sequence.back();
358    Sequence.pop_back();
359    if (SU->isSucc(OldSU))
360      // Don't try to remove SU from AvailableQueue.
361      SU->isAvailable = false;
362    UnscheduleNodeBottomUp(OldSU);
363    --CurCycle;
364    AvailableQueue->setCurCycle(CurCycle);
365  }
366
367  assert(!SU->isSucc(OldSU) && "Something is wrong!");
368
369  ++NumBacktracks;
370}
371
372static bool isOperandOf(const SUnit *SU, SDNode *N) {
373  for (const SDNode *SUNode = SU->getNode(); SUNode;
374       SUNode = SUNode->getFlaggedNode()) {
375    if (SUNode->isOperandOf(N))
376      return true;
377  }
378  return false;
379}
380
381/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
382/// successors to the newly created node.
383SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
384  if (SU->getNode()->getFlaggedNode())
385    return NULL;
386
387  SDNode *N = SU->getNode();
388  if (!N)
389    return NULL;
390
391  SUnit *NewSU;
392  bool TryUnfold = false;
393  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
394    EVT VT = N->getValueType(i);
395    if (VT == MVT::Flag)
396      return NULL;
397    else if (VT == MVT::Other)
398      TryUnfold = true;
399  }
400  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
401    const SDValue &Op = N->getOperand(i);
402    EVT VT = Op.getNode()->getValueType(Op.getResNo());
403    if (VT == MVT::Flag)
404      return NULL;
405  }
406
407  if (TryUnfold) {
408    SmallVector<SDNode*, 2> NewNodes;
409    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
410      return NULL;
411
412    DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
413    assert(NewNodes.size() == 2 && "Expected a load folding node!");
414
415    N = NewNodes[1];
416    SDNode *LoadNode = NewNodes[0];
417    unsigned NumVals = N->getNumValues();
418    unsigned OldNumVals = SU->getNode()->getNumValues();
419    for (unsigned i = 0; i != NumVals; ++i)
420      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
421    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
422                                   SDValue(LoadNode, 1));
423
424    // LoadNode may already exist. This can happen when there is another
425    // load from the same location and producing the same type of value
426    // but it has different alignment or volatileness.
427    bool isNewLoad = true;
428    SUnit *LoadSU;
429    if (LoadNode->getNodeId() != -1) {
430      LoadSU = &SUnits[LoadNode->getNodeId()];
431      isNewLoad = false;
432    } else {
433      LoadSU = CreateNewSUnit(LoadNode);
434      LoadNode->setNodeId(LoadSU->NodeNum);
435      ComputeLatency(LoadSU);
436    }
437
438    SUnit *NewSU = CreateNewSUnit(N);
439    assert(N->getNodeId() == -1 && "Node already inserted!");
440    N->setNodeId(NewSU->NodeNum);
441
442    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
443    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
444      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
445        NewSU->isTwoAddress = true;
446        break;
447      }
448    }
449    if (TID.isCommutable())
450      NewSU->isCommutable = true;
451    ComputeLatency(NewSU);
452
453    // Record all the edges to and from the old SU, by category.
454    SmallVector<SDep, 4> ChainPreds;
455    SmallVector<SDep, 4> ChainSuccs;
456    SmallVector<SDep, 4> LoadPreds;
457    SmallVector<SDep, 4> NodePreds;
458    SmallVector<SDep, 4> NodeSuccs;
459    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
460         I != E; ++I) {
461      if (I->isCtrl())
462        ChainPreds.push_back(*I);
463      else if (isOperandOf(I->getSUnit(), LoadNode))
464        LoadPreds.push_back(*I);
465      else
466        NodePreds.push_back(*I);
467    }
468    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
469         I != E; ++I) {
470      if (I->isCtrl())
471        ChainSuccs.push_back(*I);
472      else
473        NodeSuccs.push_back(*I);
474    }
475
476    // Now assign edges to the newly-created nodes.
477    for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
478      const SDep &Pred = ChainPreds[i];
479      RemovePred(SU, Pred);
480      if (isNewLoad)
481        AddPred(LoadSU, Pred);
482    }
483    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
484      const SDep &Pred = LoadPreds[i];
485      RemovePred(SU, Pred);
486      if (isNewLoad)
487        AddPred(LoadSU, Pred);
488    }
489    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
490      const SDep &Pred = NodePreds[i];
491      RemovePred(SU, Pred);
492      AddPred(NewSU, Pred);
493    }
494    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
495      SDep D = NodeSuccs[i];
496      SUnit *SuccDep = D.getSUnit();
497      D.setSUnit(SU);
498      RemovePred(SuccDep, D);
499      D.setSUnit(NewSU);
500      AddPred(SuccDep, D);
501    }
502    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
503      SDep D = ChainSuccs[i];
504      SUnit *SuccDep = D.getSUnit();
505      D.setSUnit(SU);
506      RemovePred(SuccDep, D);
507      if (isNewLoad) {
508        D.setSUnit(LoadSU);
509        AddPred(SuccDep, D);
510      }
511    }
512
513    // Add a data dependency to reflect that NewSU reads the value defined
514    // by LoadSU.
515    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
516
517    if (isNewLoad)
518      AvailableQueue->addNode(LoadSU);
519    AvailableQueue->addNode(NewSU);
520
521    ++NumUnfolds;
522
523    if (NewSU->NumSuccsLeft == 0) {
524      NewSU->isAvailable = true;
525      return NewSU;
526    }
527    SU = NewSU;
528  }
529
530  DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n");
531  NewSU = CreateClone(SU);
532
533  // New SUnit has the exact same predecessors.
534  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
535       I != E; ++I)
536    if (!I->isArtificial())
537      AddPred(NewSU, *I);
538
539  // Only copy scheduled successors. Cut them from old node's successor
540  // list and move them over.
541  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
542  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
543       I != E; ++I) {
544    if (I->isArtificial())
545      continue;
546    SUnit *SuccSU = I->getSUnit();
547    if (SuccSU->isScheduled) {
548      SDep D = *I;
549      D.setSUnit(NewSU);
550      AddPred(SuccSU, D);
551      D.setSUnit(SU);
552      DelDeps.push_back(std::make_pair(SuccSU, D));
553    }
554  }
555  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
556    RemovePred(DelDeps[i].first, DelDeps[i].second);
557
558  AvailableQueue->updateNode(SU);
559  AvailableQueue->addNode(NewSU);
560
561  ++NumDups;
562  return NewSU;
563}
564
565/// InsertCopiesAndMoveSuccs - Insert register copies and move all
566/// scheduled successors of the given SUnit to the last copy.
567void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
568                                               const TargetRegisterClass *DestRC,
569                                               const TargetRegisterClass *SrcRC,
570                                               SmallVector<SUnit*, 2> &Copies) {
571  SUnit *CopyFromSU = CreateNewSUnit(NULL);
572  CopyFromSU->CopySrcRC = SrcRC;
573  CopyFromSU->CopyDstRC = DestRC;
574
575  SUnit *CopyToSU = CreateNewSUnit(NULL);
576  CopyToSU->CopySrcRC = DestRC;
577  CopyToSU->CopyDstRC = SrcRC;
578
579  // Only copy scheduled successors. Cut them from old node's successor
580  // list and move them over.
581  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
582  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
583       I != E; ++I) {
584    if (I->isArtificial())
585      continue;
586    SUnit *SuccSU = I->getSUnit();
587    if (SuccSU->isScheduled) {
588      SDep D = *I;
589      D.setSUnit(CopyToSU);
590      AddPred(SuccSU, D);
591      DelDeps.push_back(std::make_pair(SuccSU, *I));
592    }
593  }
594  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
595    RemovePred(DelDeps[i].first, DelDeps[i].second);
596
597  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
598  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
599
600  AvailableQueue->updateNode(SU);
601  AvailableQueue->addNode(CopyFromSU);
602  AvailableQueue->addNode(CopyToSU);
603  Copies.push_back(CopyFromSU);
604  Copies.push_back(CopyToSU);
605
606  ++NumPRCopies;
607}
608
609/// getPhysicalRegisterVT - Returns the ValueType of the physical register
610/// definition of the specified node.
611/// FIXME: Move to SelectionDAG?
612static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
613                                 const TargetInstrInfo *TII) {
614  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
615  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
616  unsigned NumRes = TID.getNumDefs();
617  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
618    if (Reg == *ImpDef)
619      break;
620    ++NumRes;
621  }
622  return N->getValueType(NumRes);
623}
624
625/// CheckForLiveRegDef - Return true and update live register vector if the
626/// specified register def of the specified SUnit clobbers any "live" registers.
627static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
628                               std::vector<SUnit*> &LiveRegDefs,
629                               SmallSet<unsigned, 4> &RegAdded,
630                               SmallVector<unsigned, 4> &LRegs,
631                               const TargetRegisterInfo *TRI) {
632  bool Added = false;
633  if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
634    if (RegAdded.insert(Reg)) {
635      LRegs.push_back(Reg);
636      Added = true;
637    }
638  }
639  for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
640    if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
641      if (RegAdded.insert(*Alias)) {
642        LRegs.push_back(*Alias);
643        Added = true;
644      }
645    }
646  return Added;
647}
648
649/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
650/// scheduling of the given node to satisfy live physical register dependencies.
651/// If the specific node is the last one that's available to schedule, do
652/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
653bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
654                                                 SmallVector<unsigned, 4> &LRegs){
655  if (NumLiveRegs == 0)
656    return false;
657
658  SmallSet<unsigned, 4> RegAdded;
659  // If this node would clobber any "live" register, then it's not ready.
660  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
661       I != E; ++I) {
662    if (I->isAssignedRegDep())
663      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
664                         RegAdded, LRegs, TRI);
665  }
666
667  for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
668    if (Node->getOpcode() == ISD::INLINEASM) {
669      // Inline asm can clobber physical defs.
670      unsigned NumOps = Node->getNumOperands();
671      if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
672        --NumOps;  // Ignore the flag operand.
673
674      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
675        unsigned Flags =
676          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
677        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
678
679        ++i; // Skip the ID value.
680        if (InlineAsm::isRegDefKind(Flags) ||
681            InlineAsm::isRegDefEarlyClobberKind(Flags)) {
682          // Check for def of register or earlyclobber register.
683          for (; NumVals; --NumVals, ++i) {
684            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
685            if (TargetRegisterInfo::isPhysicalRegister(Reg))
686              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
687          }
688        } else
689          i += NumVals;
690      }
691      continue;
692    }
693
694    if (!Node->isMachineOpcode())
695      continue;
696    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
697    if (!TID.ImplicitDefs)
698      continue;
699    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
700      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
701  }
702  return !LRegs.empty();
703}
704
705
706/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
707/// schedulers.
708void ScheduleDAGRRList::ListScheduleBottomUp() {
709  unsigned CurCycle = 0;
710
711  // Release any predecessors of the special Exit node.
712  ReleasePredecessors(&ExitSU, CurCycle);
713
714  // Add root to Available queue.
715  if (!SUnits.empty()) {
716    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
717    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
718    RootSU->isAvailable = true;
719    AvailableQueue->push(RootSU);
720  }
721
722  // While Available queue is not empty, grab the node with the highest
723  // priority. If it is not ready put it back.  Schedule the node.
724  SmallVector<SUnit*, 4> NotReady;
725  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
726  Sequence.reserve(SUnits.size());
727  while (!AvailableQueue->empty()) {
728    bool Delayed = false;
729    LRegsMap.clear();
730    SUnit *CurSU = AvailableQueue->pop();
731    while (CurSU) {
732      SmallVector<unsigned, 4> LRegs;
733      if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
734        break;
735      Delayed = true;
736      LRegsMap.insert(std::make_pair(CurSU, LRegs));
737
738      CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
739      NotReady.push_back(CurSU);
740      CurSU = AvailableQueue->pop();
741    }
742
743    // All candidates are delayed due to live physical reg dependencies.
744    // Try backtracking, code duplication, or inserting cross class copies
745    // to resolve it.
746    if (Delayed && !CurSU) {
747      for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
748        SUnit *TrySU = NotReady[i];
749        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
750
751        // Try unscheduling up to the point where it's safe to schedule
752        // this node.
753        unsigned LiveCycle = CurCycle;
754        for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
755          unsigned Reg = LRegs[j];
756          unsigned LCycle = LiveRegCycles[Reg];
757          LiveCycle = std::min(LiveCycle, LCycle);
758        }
759        SUnit *OldSU = Sequence[LiveCycle];
760        if (!WillCreateCycle(TrySU, OldSU))  {
761          BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
762          // Force the current node to be scheduled before the node that
763          // requires the physical reg dep.
764          if (OldSU->isAvailable) {
765            OldSU->isAvailable = false;
766            AvailableQueue->remove(OldSU);
767          }
768          AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
769                              /*Reg=*/0, /*isNormalMemory=*/false,
770                              /*isMustAlias=*/false, /*isArtificial=*/true));
771          // If one or more successors has been unscheduled, then the current
772          // node is no longer avaialable. Schedule a successor that's now
773          // available instead.
774          if (!TrySU->isAvailable)
775            CurSU = AvailableQueue->pop();
776          else {
777            CurSU = TrySU;
778            TrySU->isPending = false;
779            NotReady.erase(NotReady.begin()+i);
780          }
781          break;
782        }
783      }
784
785      if (!CurSU) {
786        // Can't backtrack. If it's too expensive to copy the value, then try
787        // duplicate the nodes that produces these "too expensive to copy"
788        // values to break the dependency. In case even that doesn't work,
789        // insert cross class copies.
790        // If it's not too expensive, i.e. cost != -1, issue copies.
791        SUnit *TrySU = NotReady[0];
792        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
793        assert(LRegs.size() == 1 && "Can't handle this yet!");
794        unsigned Reg = LRegs[0];
795        SUnit *LRDef = LiveRegDefs[Reg];
796        EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
797        const TargetRegisterClass *RC =
798          TRI->getPhysicalRegisterRegClass(Reg, VT);
799        const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
800
801        // If cross copy register class is null, then it must be possible copy
802        // the value directly. Do not try duplicate the def.
803        SUnit *NewDef = 0;
804        if (DestRC)
805          NewDef = CopyAndMoveSuccessors(LRDef);
806        else
807          DestRC = RC;
808        if (!NewDef) {
809          // Issue copies, these can be expensive cross register class copies.
810          SmallVector<SUnit*, 2> Copies;
811          InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
812          DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
813                       << " to SU #" << Copies.front()->NodeNum << "\n");
814          AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
815                              /*Reg=*/0, /*isNormalMemory=*/false,
816                              /*isMustAlias=*/false,
817                              /*isArtificial=*/true));
818          NewDef = Copies.back();
819        }
820
821        DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
822                     << " to SU #" << TrySU->NodeNum << "\n");
823        LiveRegDefs[Reg] = NewDef;
824        AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
825                             /*Reg=*/0, /*isNormalMemory=*/false,
826                             /*isMustAlias=*/false,
827                             /*isArtificial=*/true));
828        TrySU->isAvailable = false;
829        CurSU = NewDef;
830      }
831
832      assert(CurSU && "Unable to resolve live physical register dependencies!");
833    }
834
835    // Add the nodes that aren't ready back onto the available list.
836    for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
837      NotReady[i]->isPending = false;
838      // May no longer be available due to backtracking.
839      if (NotReady[i]->isAvailable)
840        AvailableQueue->push(NotReady[i]);
841    }
842    NotReady.clear();
843
844    if (CurSU)
845      ScheduleNodeBottomUp(CurSU, CurCycle);
846    ++CurCycle;
847    AvailableQueue->setCurCycle(CurCycle);
848  }
849
850  // Reverse the order if it is bottom up.
851  std::reverse(Sequence.begin(), Sequence.end());
852
853#ifndef NDEBUG
854  VerifySchedule(isBottomUp);
855#endif
856}
857
858//===----------------------------------------------------------------------===//
859//  Top-Down Scheduling
860//===----------------------------------------------------------------------===//
861
862/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
863/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
864void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
865  SUnit *SuccSU = SuccEdge->getSUnit();
866
867#ifndef NDEBUG
868  if (SuccSU->NumPredsLeft == 0) {
869    dbgs() << "*** Scheduling failed! ***\n";
870    SuccSU->dump(this);
871    dbgs() << " has been released too many times!\n";
872    llvm_unreachable(0);
873  }
874#endif
875  --SuccSU->NumPredsLeft;
876
877  // If all the node's predecessors are scheduled, this node is ready
878  // to be scheduled. Ignore the special ExitSU node.
879  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
880    SuccSU->isAvailable = true;
881    AvailableQueue->push(SuccSU);
882  }
883}
884
885void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
886  // Top down: release successors
887  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
888       I != E; ++I) {
889    assert(!I->isAssignedRegDep() &&
890           "The list-tdrr scheduler doesn't yet support physreg dependencies!");
891
892    ReleaseSucc(SU, &*I);
893  }
894}
895
896/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
897/// count of its successors. If a successor pending count is zero, add it to
898/// the Available queue.
899void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
900  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
901  DEBUG(SU->dump(this));
902
903  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
904  SU->setDepthToAtLeast(CurCycle);
905  Sequence.push_back(SU);
906
907  ReleaseSuccessors(SU);
908  SU->isScheduled = true;
909  AvailableQueue->ScheduledNode(SU);
910}
911
912/// ListScheduleTopDown - The main loop of list scheduling for top-down
913/// schedulers.
914void ScheduleDAGRRList::ListScheduleTopDown() {
915  unsigned CurCycle = 0;
916  AvailableQueue->setCurCycle(CurCycle);
917
918  // Release any successors of the special Entry node.
919  ReleaseSuccessors(&EntrySU);
920
921  // All leaves to Available queue.
922  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
923    // It is available if it has no predecessors.
924    if (SUnits[i].Preds.empty()) {
925      AvailableQueue->push(&SUnits[i]);
926      SUnits[i].isAvailable = true;
927    }
928  }
929
930  // While Available queue is not empty, grab the node with the highest
931  // priority. If it is not ready put it back.  Schedule the node.
932  Sequence.reserve(SUnits.size());
933  while (!AvailableQueue->empty()) {
934    SUnit *CurSU = AvailableQueue->pop();
935
936    if (CurSU)
937      ScheduleNodeTopDown(CurSU, CurCycle);
938    ++CurCycle;
939    AvailableQueue->setCurCycle(CurCycle);
940  }
941
942#ifndef NDEBUG
943  VerifySchedule(isBottomUp);
944#endif
945}
946
947
948//===----------------------------------------------------------------------===//
949//                RegReductionPriorityQueue Implementation
950//===----------------------------------------------------------------------===//
951//
952// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
953// to reduce register pressure.
954//
955namespace {
956  template<class SF>
957  class RegReductionPriorityQueue;
958
959  /// Sorting functions for the Available queue.
960  struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
961    RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
962    bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
963    bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
964
965    bool operator()(const SUnit* left, const SUnit* right) const;
966  };
967
968  struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
969    RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
970    td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
971    td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
972
973    bool operator()(const SUnit* left, const SUnit* right) const;
974  };
975
976  struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
977    RegReductionPriorityQueue<src_ls_rr_sort> *SPQ;
978    src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq)
979      : SPQ(spq) {}
980    src_ls_rr_sort(const src_ls_rr_sort &RHS)
981      : SPQ(RHS.SPQ) {}
982
983    bool operator()(const SUnit* left, const SUnit* right) const;
984  };
985
986  struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
987    RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ;
988    hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq)
989      : SPQ(spq) {}
990    hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
991      : SPQ(RHS.SPQ) {}
992
993    bool operator()(const SUnit* left, const SUnit* right) const;
994  };
995}  // end anonymous namespace
996
997/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
998/// Smaller number is the higher priority.
999static unsigned
1000CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1001  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
1002  if (SethiUllmanNumber != 0)
1003    return SethiUllmanNumber;
1004
1005  unsigned Extra = 0;
1006  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1007       I != E; ++I) {
1008    if (I->isCtrl()) continue;  // ignore chain preds
1009    SUnit *PredSU = I->getSUnit();
1010    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
1011    if (PredSethiUllman > SethiUllmanNumber) {
1012      SethiUllmanNumber = PredSethiUllman;
1013      Extra = 0;
1014    } else if (PredSethiUllman == SethiUllmanNumber)
1015      ++Extra;
1016  }
1017
1018  SethiUllmanNumber += Extra;
1019
1020  if (SethiUllmanNumber == 0)
1021    SethiUllmanNumber = 1;
1022
1023  return SethiUllmanNumber;
1024}
1025
1026namespace {
1027  template<class SF>
1028  class RegReductionPriorityQueue : public SchedulingPriorityQueue {
1029    std::vector<SUnit*> Queue;
1030    SF Picker;
1031    unsigned CurQueueId;
1032
1033  protected:
1034    // SUnits - The SUnits for the current graph.
1035    std::vector<SUnit> *SUnits;
1036
1037    const TargetInstrInfo *TII;
1038    const TargetRegisterInfo *TRI;
1039    ScheduleDAGRRList *scheduleDAG;
1040
1041    // SethiUllmanNumbers - The SethiUllman number for each node.
1042    std::vector<unsigned> SethiUllmanNumbers;
1043
1044  public:
1045    RegReductionPriorityQueue(const TargetInstrInfo *tii,
1046                              const TargetRegisterInfo *tri)
1047      : Picker(this), CurQueueId(0),
1048        TII(tii), TRI(tri), scheduleDAG(NULL) {}
1049
1050    void initNodes(std::vector<SUnit> &sunits) {
1051      SUnits = &sunits;
1052      // Add pseudo dependency edges for two-address nodes.
1053      AddPseudoTwoAddrDeps();
1054      // Reroute edges to nodes with multiple uses.
1055      PrescheduleNodesWithMultipleUses();
1056      // Calculate node priorities.
1057      CalculateSethiUllmanNumbers();
1058    }
1059
1060    void addNode(const SUnit *SU) {
1061      unsigned SUSize = SethiUllmanNumbers.size();
1062      if (SUnits->size() > SUSize)
1063        SethiUllmanNumbers.resize(SUSize*2, 0);
1064      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1065    }
1066
1067    void updateNode(const SUnit *SU) {
1068      SethiUllmanNumbers[SU->NodeNum] = 0;
1069      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1070    }
1071
1072    void releaseState() {
1073      SUnits = 0;
1074      SethiUllmanNumbers.clear();
1075    }
1076
1077    unsigned getNodePriority(const SUnit *SU) const {
1078      assert(SU->NodeNum < SethiUllmanNumbers.size());
1079      unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1080      if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1081        // CopyToReg should be close to its uses to facilitate coalescing and
1082        // avoid spilling.
1083        return 0;
1084      if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1085          Opc == TargetOpcode::SUBREG_TO_REG ||
1086          Opc == TargetOpcode::INSERT_SUBREG)
1087        // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1088        // close to their uses to facilitate coalescing.
1089        return 0;
1090      if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1091        // If SU does not have a register use, i.e. it doesn't produce a value
1092        // that would be consumed (e.g. store), then it terminates a chain of
1093        // computation.  Give it a large SethiUllman number so it will be
1094        // scheduled right before its predecessors that it doesn't lengthen
1095        // their live ranges.
1096        return 0xffff;
1097      if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1098        // If SU does not have a register def, schedule it close to its uses
1099        // because it does not lengthen any live ranges.
1100        return 0;
1101      return SethiUllmanNumbers[SU->NodeNum];
1102    }
1103
1104    unsigned getNodeOrdering(const SUnit *SU) const {
1105      return scheduleDAG->DAG->GetOrdering(SU->getNode());
1106    }
1107
1108    bool empty() const { return Queue.empty(); }
1109
1110    void push(SUnit *U) {
1111      assert(!U->NodeQueueId && "Node in the queue already");
1112      U->NodeQueueId = ++CurQueueId;
1113      Queue.push_back(U);
1114    }
1115
1116    SUnit *pop() {
1117      if (empty()) return NULL;
1118      std::vector<SUnit *>::iterator Best = Queue.begin();
1119      for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
1120           E = Queue.end(); I != E; ++I)
1121        if (Picker(*Best, *I))
1122          Best = I;
1123      SUnit *V = *Best;
1124      if (Best != prior(Queue.end()))
1125        std::swap(*Best, Queue.back());
1126      Queue.pop_back();
1127      V->NodeQueueId = 0;
1128      return V;
1129    }
1130
1131    void remove(SUnit *SU) {
1132      assert(!Queue.empty() && "Queue is empty!");
1133      assert(SU->NodeQueueId != 0 && "Not in queue!");
1134      std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
1135                                                   SU);
1136      if (I != prior(Queue.end()))
1137        std::swap(*I, Queue.back());
1138      Queue.pop_back();
1139      SU->NodeQueueId = 0;
1140    }
1141
1142    void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1143      scheduleDAG = scheduleDag;
1144    }
1145
1146  protected:
1147    bool canClobber(const SUnit *SU, const SUnit *Op);
1148    void AddPseudoTwoAddrDeps();
1149    void PrescheduleNodesWithMultipleUses();
1150    void CalculateSethiUllmanNumbers();
1151  };
1152
1153  typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1154    BURegReductionPriorityQueue;
1155
1156  typedef RegReductionPriorityQueue<td_ls_rr_sort>
1157    TDRegReductionPriorityQueue;
1158
1159  typedef RegReductionPriorityQueue<src_ls_rr_sort>
1160    SrcRegReductionPriorityQueue;
1161
1162  typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1163    HybridBURRPriorityQueue;
1164}
1165
1166/// closestSucc - Returns the scheduled cycle of the successor which is
1167/// closest to the current cycle.
1168static unsigned closestSucc(const SUnit *SU) {
1169  unsigned MaxHeight = 0;
1170  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1171       I != E; ++I) {
1172    if (I->isCtrl()) continue;  // ignore chain succs
1173    unsigned Height = I->getSUnit()->getHeight();
1174    // If there are bunch of CopyToRegs stacked up, they should be considered
1175    // to be at the same position.
1176    if (I->getSUnit()->getNode() &&
1177        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
1178      Height = closestSucc(I->getSUnit())+1;
1179    if (Height > MaxHeight)
1180      MaxHeight = Height;
1181  }
1182  return MaxHeight;
1183}
1184
1185/// calcMaxScratches - Returns an cost estimate of the worse case requirement
1186/// for scratch registers, i.e. number of data dependencies.
1187static unsigned calcMaxScratches(const SUnit *SU) {
1188  unsigned Scratches = 0;
1189  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1190       I != E; ++I) {
1191    if (I->isCtrl()) continue;  // ignore chain preds
1192    Scratches++;
1193  }
1194  return Scratches;
1195}
1196
1197template <typename RRSort>
1198static bool BURRSort(const SUnit *left, const SUnit *right,
1199                     const RegReductionPriorityQueue<RRSort> *SPQ) {
1200  unsigned LPriority = SPQ->getNodePriority(left);
1201  unsigned RPriority = SPQ->getNodePriority(right);
1202  if (LPriority != RPriority)
1203    return LPriority > RPriority;
1204
1205  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
1206  // e.g.
1207  // t1 = op t2, c1
1208  // t3 = op t4, c2
1209  //
1210  // and the following instructions are both ready.
1211  // t2 = op c3
1212  // t4 = op c4
1213  //
1214  // Then schedule t2 = op first.
1215  // i.e.
1216  // t4 = op c4
1217  // t2 = op c3
1218  // t1 = op t2, c1
1219  // t3 = op t4, c2
1220  //
1221  // This creates more short live intervals.
1222  unsigned LDist = closestSucc(left);
1223  unsigned RDist = closestSucc(right);
1224  if (LDist != RDist)
1225    return LDist < RDist;
1226
1227  // How many registers becomes live when the node is scheduled.
1228  unsigned LScratch = calcMaxScratches(left);
1229  unsigned RScratch = calcMaxScratches(right);
1230  if (LScratch != RScratch)
1231    return LScratch > RScratch;
1232
1233  if (left->getHeight() != right->getHeight())
1234    return left->getHeight() > right->getHeight();
1235
1236  if (left->getDepth() != right->getDepth())
1237    return left->getDepth() < right->getDepth();
1238
1239  assert(left->NodeQueueId && right->NodeQueueId &&
1240         "NodeQueueId cannot be zero");
1241  return (left->NodeQueueId > right->NodeQueueId);
1242}
1243
1244// Bottom up
1245bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1246  return BURRSort(left, right, SPQ);
1247}
1248
1249// Source order, otherwise bottom up.
1250bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1251  unsigned LOrder = SPQ->getNodeOrdering(left);
1252  unsigned ROrder = SPQ->getNodeOrdering(right);
1253
1254  // Prefer an ordering where the lower the non-zero order number, the higher
1255  // the preference.
1256  if ((LOrder || ROrder) && LOrder != ROrder)
1257    return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
1258
1259  return BURRSort(left, right, SPQ);
1260}
1261
1262bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
1263  bool LStall = left->SchedulingPref == Sched::Latency &&
1264    SPQ->getCurCycle() < left->getHeight();
1265  bool RStall = right->SchedulingPref == Sched::Latency &&
1266    SPQ->getCurCycle() < right->getHeight();
1267  // If scheduling one of the node will cause a pipeline stall, delay it.
1268  // If scheduling either one of the node will cause a pipeline stall, sort them
1269  // according to their height.
1270  // If neither will cause a pipeline stall, try to reduce register pressure.
1271  if (LStall) {
1272    if (!RStall)
1273      return true;
1274    if (left->getHeight() != right->getHeight())
1275      return left->getHeight() > right->getHeight();
1276  } else if (RStall)
1277      return false;
1278
1279  // If either node is scheduling for latency, sort them by height and latency
1280  // first.
1281  if (left->SchedulingPref == Sched::Latency ||
1282      right->SchedulingPref == Sched::Latency) {
1283    if (left->getHeight() != right->getHeight())
1284      return left->getHeight() > right->getHeight();
1285    if (left->Latency != right->Latency)
1286      return left->Latency > right->Latency;
1287  }
1288
1289  return BURRSort(left, right, SPQ);
1290}
1291
1292template<class SF>
1293bool
1294RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
1295  if (SU->isTwoAddress) {
1296    unsigned Opc = SU->getNode()->getMachineOpcode();
1297    const TargetInstrDesc &TID = TII->get(Opc);
1298    unsigned NumRes = TID.getNumDefs();
1299    unsigned NumOps = TID.getNumOperands() - NumRes;
1300    for (unsigned i = 0; i != NumOps; ++i) {
1301      if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
1302        SDNode *DU = SU->getNode()->getOperand(i).getNode();
1303        if (DU->getNodeId() != -1 &&
1304            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
1305          return true;
1306      }
1307    }
1308  }
1309  return false;
1310}
1311
1312/// hasCopyToRegUse - Return true if SU has a value successor that is a
1313/// CopyToReg node.
1314static bool hasCopyToRegUse(const SUnit *SU) {
1315  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1316       I != E; ++I) {
1317    if (I->isCtrl()) continue;
1318    const SUnit *SuccSU = I->getSUnit();
1319    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)
1320      return true;
1321  }
1322  return false;
1323}
1324
1325/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
1326/// physical register defs.
1327static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
1328                                  const TargetInstrInfo *TII,
1329                                  const TargetRegisterInfo *TRI) {
1330  SDNode *N = SuccSU->getNode();
1331  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1332  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
1333  assert(ImpDefs && "Caller should check hasPhysRegDefs");
1334  for (const SDNode *SUNode = SU->getNode(); SUNode;
1335       SUNode = SUNode->getFlaggedNode()) {
1336    if (!SUNode->isMachineOpcode())
1337      continue;
1338    const unsigned *SUImpDefs =
1339      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
1340    if (!SUImpDefs)
1341      return false;
1342    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1343      EVT VT = N->getValueType(i);
1344      if (VT == MVT::Flag || VT == MVT::Other)
1345        continue;
1346      if (!N->hasAnyUseOfValue(i))
1347        continue;
1348      unsigned Reg = ImpDefs[i - NumDefs];
1349      for (;*SUImpDefs; ++SUImpDefs) {
1350        unsigned SUReg = *SUImpDefs;
1351        if (TRI->regsOverlap(Reg, SUReg))
1352          return true;
1353      }
1354    }
1355  }
1356  return false;
1357}
1358
1359/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
1360/// are not handled well by the general register pressure reduction
1361/// heuristics. When presented with code like this:
1362///
1363///      N
1364///    / |
1365///   /  |
1366///  U  store
1367///  |
1368/// ...
1369///
1370/// the heuristics tend to push the store up, but since the
1371/// operand of the store has another use (U), this would increase
1372/// the length of that other use (the U->N edge).
1373///
1374/// This function transforms code like the above to route U's
1375/// dependence through the store when possible, like this:
1376///
1377///      N
1378///      ||
1379///      ||
1380///     store
1381///       |
1382///       U
1383///       |
1384///      ...
1385///
1386/// This results in the store being scheduled immediately
1387/// after N, which shortens the U->N live range, reducing
1388/// register pressure.
1389///
1390template<class SF>
1391void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
1392  // Visit all the nodes in topological order, working top-down.
1393  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1394    SUnit *SU = &(*SUnits)[i];
1395    // For now, only look at nodes with no data successors, such as stores.
1396    // These are especially important, due to the heuristics in
1397    // getNodePriority for nodes with no data successors.
1398    if (SU->NumSuccs != 0)
1399      continue;
1400    // For now, only look at nodes with exactly one data predecessor.
1401    if (SU->NumPreds != 1)
1402      continue;
1403    // Avoid prescheduling copies to virtual registers, which don't behave
1404    // like other nodes from the perspective of scheduling heuristics.
1405    if (SDNode *N = SU->getNode())
1406      if (N->getOpcode() == ISD::CopyToReg &&
1407          TargetRegisterInfo::isVirtualRegister
1408            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1409        continue;
1410
1411    // Locate the single data predecessor.
1412    SUnit *PredSU = 0;
1413    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
1414         EE = SU->Preds.end(); II != EE; ++II)
1415      if (!II->isCtrl()) {
1416        PredSU = II->getSUnit();
1417        break;
1418      }
1419    assert(PredSU);
1420
1421    // Don't rewrite edges that carry physregs, because that requires additional
1422    // support infrastructure.
1423    if (PredSU->hasPhysRegDefs)
1424      continue;
1425    // Short-circuit the case where SU is PredSU's only data successor.
1426    if (PredSU->NumSuccs == 1)
1427      continue;
1428    // Avoid prescheduling to copies from virtual registers, which don't behave
1429    // like other nodes from the perspective of scheduling // heuristics.
1430    if (SDNode *N = SU->getNode())
1431      if (N->getOpcode() == ISD::CopyFromReg &&
1432          TargetRegisterInfo::isVirtualRegister
1433            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1434        continue;
1435
1436    // Perform checks on the successors of PredSU.
1437    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
1438         EE = PredSU->Succs.end(); II != EE; ++II) {
1439      SUnit *PredSuccSU = II->getSUnit();
1440      if (PredSuccSU == SU) continue;
1441      // If PredSU has another successor with no data successors, for
1442      // now don't attempt to choose either over the other.
1443      if (PredSuccSU->NumSuccs == 0)
1444        goto outer_loop_continue;
1445      // Don't break physical register dependencies.
1446      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
1447        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
1448          goto outer_loop_continue;
1449      // Don't introduce graph cycles.
1450      if (scheduleDAG->IsReachable(SU, PredSuccSU))
1451        goto outer_loop_continue;
1452    }
1453
1454    // Ok, the transformation is safe and the heuristics suggest it is
1455    // profitable. Update the graph.
1456    DEBUG(dbgs() << "    Prescheduling SU #" << SU->NodeNum
1457                 << " next to PredSU #" << PredSU->NodeNum
1458                 << " to guide scheduling in the presence of multiple uses\n");
1459    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
1460      SDep Edge = PredSU->Succs[i];
1461      assert(!Edge.isAssignedRegDep());
1462      SUnit *SuccSU = Edge.getSUnit();
1463      if (SuccSU != SU) {
1464        Edge.setSUnit(PredSU);
1465        scheduleDAG->RemovePred(SuccSU, Edge);
1466        scheduleDAG->AddPred(SU, Edge);
1467        Edge.setSUnit(SU);
1468        scheduleDAG->AddPred(SuccSU, Edge);
1469        --i;
1470      }
1471    }
1472  outer_loop_continue:;
1473  }
1474}
1475
1476/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
1477/// it as a def&use operand. Add a pseudo control edge from it to the other
1478/// node (if it won't create a cycle) so the two-address one will be scheduled
1479/// first (lower in the schedule). If both nodes are two-address, favor the
1480/// one that has a CopyToReg use (more likely to be a loop induction update).
1481/// If both are two-address, but one is commutable while the other is not
1482/// commutable, favor the one that's not commutable.
1483template<class SF>
1484void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
1485  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1486    SUnit *SU = &(*SUnits)[i];
1487    if (!SU->isTwoAddress)
1488      continue;
1489
1490    SDNode *Node = SU->getNode();
1491    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
1492      continue;
1493
1494    unsigned Opc = Node->getMachineOpcode();
1495    const TargetInstrDesc &TID = TII->get(Opc);
1496    unsigned NumRes = TID.getNumDefs();
1497    unsigned NumOps = TID.getNumOperands() - NumRes;
1498    for (unsigned j = 0; j != NumOps; ++j) {
1499      if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
1500        continue;
1501      SDNode *DU = SU->getNode()->getOperand(j).getNode();
1502      if (DU->getNodeId() == -1)
1503        continue;
1504      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
1505      if (!DUSU) continue;
1506      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
1507           E = DUSU->Succs.end(); I != E; ++I) {
1508        if (I->isCtrl()) continue;
1509        SUnit *SuccSU = I->getSUnit();
1510        if (SuccSU == SU)
1511          continue;
1512        // Be conservative. Ignore if nodes aren't at roughly the same
1513        // depth and height.
1514        if (SuccSU->getHeight() < SU->getHeight() &&
1515            (SU->getHeight() - SuccSU->getHeight()) > 1)
1516          continue;
1517        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
1518        // constrains whatever is using the copy, instead of the copy
1519        // itself. In the case that the copy is coalesced, this
1520        // preserves the intent of the pseudo two-address heurietics.
1521        while (SuccSU->Succs.size() == 1 &&
1522               SuccSU->getNode()->isMachineOpcode() &&
1523               SuccSU->getNode()->getMachineOpcode() ==
1524                 TargetOpcode::COPY_TO_REGCLASS)
1525          SuccSU = SuccSU->Succs.front().getSUnit();
1526        // Don't constrain non-instruction nodes.
1527        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
1528          continue;
1529        // Don't constrain nodes with physical register defs if the
1530        // predecessor can clobber them.
1531        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
1532          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
1533            continue;
1534        }
1535        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
1536        // these may be coalesced away. We want them close to their uses.
1537        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
1538        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
1539            SuccOpc == TargetOpcode::INSERT_SUBREG ||
1540            SuccOpc == TargetOpcode::SUBREG_TO_REG)
1541          continue;
1542        if ((!canClobber(SuccSU, DUSU) ||
1543             (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
1544             (!SU->isCommutable && SuccSU->isCommutable)) &&
1545            !scheduleDAG->IsReachable(SuccSU, SU)) {
1546          DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
1547                       << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
1548          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
1549                                        /*Reg=*/0, /*isNormalMemory=*/false,
1550                                        /*isMustAlias=*/false,
1551                                        /*isArtificial=*/true));
1552        }
1553      }
1554    }
1555  }
1556}
1557
1558/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1559/// scheduling units.
1560template<class SF>
1561void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
1562  SethiUllmanNumbers.assign(SUnits->size(), 0);
1563
1564  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1565    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1566}
1567
1568/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
1569/// predecessors of the successors of the SUnit SU. Stop when the provided
1570/// limit is exceeded.
1571static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
1572                                                    unsigned Limit) {
1573  unsigned Sum = 0;
1574  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1575       I != E; ++I) {
1576    const SUnit *SuccSU = I->getSUnit();
1577    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
1578         EE = SuccSU->Preds.end(); II != EE; ++II) {
1579      SUnit *PredSU = II->getSUnit();
1580      if (!PredSU->isScheduled)
1581        if (++Sum > Limit)
1582          return Sum;
1583    }
1584  }
1585  return Sum;
1586}
1587
1588
1589// Top down
1590bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1591  unsigned LPriority = SPQ->getNodePriority(left);
1592  unsigned RPriority = SPQ->getNodePriority(right);
1593  bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
1594  bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
1595  bool LIsFloater = LIsTarget && left->NumPreds == 0;
1596  bool RIsFloater = RIsTarget && right->NumPreds == 0;
1597  unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
1598  unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
1599
1600  if (left->NumSuccs == 0 && right->NumSuccs != 0)
1601    return false;
1602  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
1603    return true;
1604
1605  if (LIsFloater)
1606    LBonus -= 2;
1607  if (RIsFloater)
1608    RBonus -= 2;
1609  if (left->NumSuccs == 1)
1610    LBonus += 2;
1611  if (right->NumSuccs == 1)
1612    RBonus += 2;
1613
1614  if (LPriority+LBonus != RPriority+RBonus)
1615    return LPriority+LBonus < RPriority+RBonus;
1616
1617  if (left->getDepth() != right->getDepth())
1618    return left->getDepth() < right->getDepth();
1619
1620  if (left->NumSuccsLeft != right->NumSuccsLeft)
1621    return left->NumSuccsLeft > right->NumSuccsLeft;
1622
1623  assert(left->NodeQueueId && right->NodeQueueId &&
1624         "NodeQueueId cannot be zero");
1625  return (left->NodeQueueId > right->NodeQueueId);
1626}
1627
1628//===----------------------------------------------------------------------===//
1629//                         Public Constructor Functions
1630//===----------------------------------------------------------------------===//
1631
1632llvm::ScheduleDAGSDNodes *
1633llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1634  const TargetMachine &TM = IS->TM;
1635  const TargetInstrInfo *TII = TM.getInstrInfo();
1636  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1637
1638  BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI);
1639
1640  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
1641  PQ->setScheduleDAG(SD);
1642  return SD;
1643}
1644
1645llvm::ScheduleDAGSDNodes *
1646llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1647  const TargetMachine &TM = IS->TM;
1648  const TargetInstrInfo *TII = TM.getInstrInfo();
1649  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1650
1651  TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI);
1652
1653  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ);
1654  PQ->setScheduleDAG(SD);
1655  return SD;
1656}
1657
1658llvm::ScheduleDAGSDNodes *
1659llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1660  const TargetMachine &TM = IS->TM;
1661  const TargetInstrInfo *TII = TM.getInstrInfo();
1662  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1663
1664  SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(TII, TRI);
1665
1666  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
1667  PQ->setScheduleDAG(SD);
1668  return SD;
1669}
1670
1671llvm::ScheduleDAGSDNodes *
1672llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1673  const TargetMachine &TM = IS->TM;
1674  const TargetInstrInfo *TII = TM.getInstrInfo();
1675  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1676
1677  HybridBURRPriorityQueue *PQ = new HybridBURRPriorityQueue(TII, TRI);
1678
1679  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
1680  PQ->setScheduleDAG(SD);
1681  return SD;
1682}
1683