ScheduleDAGRRList.cpp revision e264f62ca09a8f65c87a46d562a4d0f9ec5d457e
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "pre-RA-sched"
19#include "ScheduleDAGSDNodes.h"
20#include "llvm/CodeGen/SchedulerRegistry.h"
21#include "llvm/CodeGen/SelectionDAGISel.h"
22#include "llvm/Target/TargetRegisterInfo.h"
23#include "llvm/Target/TargetData.h"
24#include "llvm/Target/TargetMachine.h"
25#include "llvm/Target/TargetInstrInfo.h"
26#include "llvm/Support/Debug.h"
27#include "llvm/Support/ErrorHandling.h"
28#include "llvm/ADT/PriorityQueue.h"
29#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/Support/raw_ostream.h"
33#include <climits>
34using namespace llvm;
35
36STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
37STATISTIC(NumUnfolds,    "Number of nodes unfolded");
38STATISTIC(NumDups,       "Number of duplicated nodes");
39STATISTIC(NumPRCopies,   "Number of physical register copies");
40
41static RegisterScheduler
42  burrListDAGScheduler("list-burr",
43                       "Bottom-up register reduction list scheduling",
44                       createBURRListDAGScheduler);
45static RegisterScheduler
46  tdrListrDAGScheduler("list-tdrr",
47                       "Top-down register reduction list scheduling",
48                       createTDRRListDAGScheduler);
49static RegisterScheduler
50  sourceListDAGScheduler("source",
51                         "Similar to list-burr but schedules in source "
52                         "order when possible",
53                         createSourceListDAGScheduler);
54
55namespace {
56//===----------------------------------------------------------------------===//
57/// ScheduleDAGRRList - The actual register reduction list scheduler
58/// implementation.  This supports both top-down and bottom-up scheduling.
59///
60class ScheduleDAGRRList : public ScheduleDAGSDNodes {
61private:
62  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
63  /// it is top-down.
64  bool isBottomUp;
65
66  /// AvailableQueue - The priority queue to use for the available SUnits.
67  SchedulingPriorityQueue *AvailableQueue;
68
69  /// LiveRegDefs - A set of physical registers and their definition
70  /// that are "live". These nodes must be scheduled before any other nodes that
71  /// modifies the registers can be scheduled.
72  unsigned NumLiveRegs;
73  std::vector<SUnit*> LiveRegDefs;
74  std::vector<unsigned> LiveRegCycles;
75
76  /// Topo - A topological ordering for SUnits which permits fast IsReachable
77  /// and similar queries.
78  ScheduleDAGTopologicalSort Topo;
79
80public:
81  ScheduleDAGRRList(MachineFunction &mf,
82                    bool isbottomup,
83                    SchedulingPriorityQueue *availqueue)
84    : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup),
85      AvailableQueue(availqueue), Topo(SUnits) {
86    }
87
88  ~ScheduleDAGRRList() {
89    delete AvailableQueue;
90  }
91
92  void Schedule();
93
94  /// IsReachable - Checks if SU is reachable from TargetSU.
95  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
96    return Topo.IsReachable(SU, TargetSU);
97  }
98
99  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
100  /// create a cycle.
101  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
102    return Topo.WillCreateCycle(SU, TargetSU);
103  }
104
105  /// AddPred - adds a predecessor edge to SUnit SU.
106  /// This returns true if this is a new predecessor.
107  /// Updates the topological ordering if required.
108  void AddPred(SUnit *SU, const SDep &D) {
109    Topo.AddPred(SU, D.getSUnit());
110    SU->addPred(D);
111  }
112
113  /// RemovePred - removes a predecessor edge from SUnit SU.
114  /// This returns true if an edge was removed.
115  /// Updates the topological ordering if required.
116  void RemovePred(SUnit *SU, const SDep &D) {
117    Topo.RemovePred(SU, D.getSUnit());
118    SU->removePred(D);
119  }
120
121private:
122  void ReleasePred(SUnit *SU, const SDep *PredEdge);
123  void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
124  void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
125  void ReleaseSuccessors(SUnit *SU);
126  void CapturePred(SDep *PredEdge);
127  void ScheduleNodeBottomUp(SUnit*, unsigned);
128  void ScheduleNodeTopDown(SUnit*, unsigned);
129  void UnscheduleNodeBottomUp(SUnit*);
130  void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
131  SUnit *CopyAndMoveSuccessors(SUnit*);
132  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
133                                const TargetRegisterClass*,
134                                const TargetRegisterClass*,
135                                SmallVector<SUnit*, 2>&);
136  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
137  void ListScheduleTopDown();
138  void ListScheduleBottomUp();
139
140
141  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
142  /// Updates the topological ordering if required.
143  SUnit *CreateNewSUnit(SDNode *N) {
144    unsigned NumSUnits = SUnits.size();
145    SUnit *NewNode = NewSUnit(N);
146    // Update the topological ordering.
147    if (NewNode->NodeNum >= NumSUnits)
148      Topo.InitDAGTopologicalSorting();
149    return NewNode;
150  }
151
152  /// CreateClone - Creates a new SUnit from an existing one.
153  /// Updates the topological ordering if required.
154  SUnit *CreateClone(SUnit *N) {
155    unsigned NumSUnits = SUnits.size();
156    SUnit *NewNode = Clone(N);
157    // Update the topological ordering.
158    if (NewNode->NodeNum >= NumSUnits)
159      Topo.InitDAGTopologicalSorting();
160    return NewNode;
161  }
162
163  /// ForceUnitLatencies - Return true, since register-pressure-reducing
164  /// scheduling doesn't need actual latency information.
165  bool ForceUnitLatencies() const { return true; }
166};
167}  // end anonymous namespace
168
169
170/// Schedule - Schedule the DAG using list scheduling.
171void ScheduleDAGRRList::Schedule() {
172  DEBUG(dbgs() << "********** List Scheduling **********\n");
173
174  NumLiveRegs = 0;
175  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
176  LiveRegCycles.resize(TRI->getNumRegs(), 0);
177
178  // Build the scheduling graph.
179  BuildSchedGraph(NULL);
180
181  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
182          SUnits[su].dumpAll(this));
183  Topo.InitDAGTopologicalSorting();
184
185  AvailableQueue->initNodes(SUnits);
186
187  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
188  if (isBottomUp)
189    ListScheduleBottomUp();
190  else
191    ListScheduleTopDown();
192
193  AvailableQueue->releaseState();
194}
195
196//===----------------------------------------------------------------------===//
197//  Bottom-Up Scheduling
198//===----------------------------------------------------------------------===//
199
200/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
201/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
202void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
203  SUnit *PredSU = PredEdge->getSUnit();
204
205#ifndef NDEBUG
206  if (PredSU->NumSuccsLeft == 0) {
207    dbgs() << "*** Scheduling failed! ***\n";
208    PredSU->dump(this);
209    dbgs() << " has been released too many times!\n";
210    llvm_unreachable(0);
211  }
212#endif
213  --PredSU->NumSuccsLeft;
214
215  // If all the node's successors are scheduled, this node is ready
216  // to be scheduled. Ignore the special EntrySU node.
217  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
218    PredSU->isAvailable = true;
219    AvailableQueue->push(PredSU);
220  }
221}
222
223void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
224  // Bottom up: release predecessors
225  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
226       I != E; ++I) {
227    ReleasePred(SU, &*I);
228    if (I->isAssignedRegDep()) {
229      // This is a physical register dependency and it's impossible or
230      // expensive to copy the register. Make sure nothing that can
231      // clobber the register is scheduled between the predecessor and
232      // this node.
233      if (!LiveRegDefs[I->getReg()]) {
234        ++NumLiveRegs;
235        LiveRegDefs[I->getReg()] = I->getSUnit();
236        LiveRegCycles[I->getReg()] = CurCycle;
237      }
238    }
239  }
240}
241
242/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
243/// count of its predecessors. If a predecessor pending count is zero, add it to
244/// the Available queue.
245void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
246  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
247  DEBUG(SU->dump(this));
248
249  assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
250  SU->setHeightToAtLeast(CurCycle);
251  Sequence.push_back(SU);
252
253  ReleasePredecessors(SU, CurCycle);
254
255  // Release all the implicit physical register defs that are live.
256  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
257       I != E; ++I) {
258    if (I->isAssignedRegDep()) {
259      if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
260        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
261        assert(LiveRegDefs[I->getReg()] == SU &&
262               "Physical register dependency violated?");
263        --NumLiveRegs;
264        LiveRegDefs[I->getReg()] = NULL;
265        LiveRegCycles[I->getReg()] = 0;
266      }
267    }
268  }
269
270  SU->isScheduled = true;
271  AvailableQueue->ScheduledNode(SU);
272}
273
274/// CapturePred - This does the opposite of ReleasePred. Since SU is being
275/// unscheduled, incrcease the succ left count of its predecessors. Remove
276/// them from AvailableQueue if necessary.
277void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
278  SUnit *PredSU = PredEdge->getSUnit();
279  if (PredSU->isAvailable) {
280    PredSU->isAvailable = false;
281    if (!PredSU->isPending)
282      AvailableQueue->remove(PredSU);
283  }
284
285  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
286  ++PredSU->NumSuccsLeft;
287}
288
289/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
290/// its predecessor states to reflect the change.
291void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
292  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
293  DEBUG(SU->dump(this));
294
295  AvailableQueue->UnscheduledNode(SU);
296
297  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
298       I != E; ++I) {
299    CapturePred(&*I);
300    if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) {
301      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
302      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
303             "Physical register dependency violated?");
304      --NumLiveRegs;
305      LiveRegDefs[I->getReg()] = NULL;
306      LiveRegCycles[I->getReg()] = 0;
307    }
308  }
309
310  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
311       I != E; ++I) {
312    if (I->isAssignedRegDep()) {
313      if (!LiveRegDefs[I->getReg()]) {
314        LiveRegDefs[I->getReg()] = SU;
315        ++NumLiveRegs;
316      }
317      if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
318        LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
319    }
320  }
321
322  SU->setHeightDirty();
323  SU->isScheduled = false;
324  SU->isAvailable = true;
325  AvailableQueue->push(SU);
326}
327
328/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
329/// BTCycle in order to schedule a specific node.
330void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
331                                          unsigned &CurCycle) {
332  SUnit *OldSU = NULL;
333  while (CurCycle > BtCycle) {
334    OldSU = Sequence.back();
335    Sequence.pop_back();
336    if (SU->isSucc(OldSU))
337      // Don't try to remove SU from AvailableQueue.
338      SU->isAvailable = false;
339    UnscheduleNodeBottomUp(OldSU);
340    --CurCycle;
341  }
342
343  assert(!SU->isSucc(OldSU) && "Something is wrong!");
344
345  ++NumBacktracks;
346}
347
348static bool isOperandOf(const SUnit *SU, SDNode *N) {
349  for (const SDNode *SUNode = SU->getNode(); SUNode;
350       SUNode = SUNode->getFlaggedNode()) {
351    if (SUNode->isOperandOf(N))
352      return true;
353  }
354  return false;
355}
356
357/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
358/// successors to the newly created node.
359SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
360  if (SU->getNode()->getFlaggedNode())
361    return NULL;
362
363  SDNode *N = SU->getNode();
364  if (!N)
365    return NULL;
366
367  SUnit *NewSU;
368  bool TryUnfold = false;
369  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
370    EVT VT = N->getValueType(i);
371    if (VT == MVT::Flag)
372      return NULL;
373    else if (VT == MVT::Other)
374      TryUnfold = true;
375  }
376  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
377    const SDValue &Op = N->getOperand(i);
378    EVT VT = Op.getNode()->getValueType(Op.getResNo());
379    if (VT == MVT::Flag)
380      return NULL;
381  }
382
383  if (TryUnfold) {
384    SmallVector<SDNode*, 2> NewNodes;
385    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
386      return NULL;
387
388    DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
389    assert(NewNodes.size() == 2 && "Expected a load folding node!");
390
391    N = NewNodes[1];
392    SDNode *LoadNode = NewNodes[0];
393    unsigned NumVals = N->getNumValues();
394    unsigned OldNumVals = SU->getNode()->getNumValues();
395    for (unsigned i = 0; i != NumVals; ++i)
396      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
397    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
398                                   SDValue(LoadNode, 1));
399
400    // LoadNode may already exist. This can happen when there is another
401    // load from the same location and producing the same type of value
402    // but it has different alignment or volatileness.
403    bool isNewLoad = true;
404    SUnit *LoadSU;
405    if (LoadNode->getNodeId() != -1) {
406      LoadSU = &SUnits[LoadNode->getNodeId()];
407      isNewLoad = false;
408    } else {
409      LoadSU = CreateNewSUnit(LoadNode);
410      LoadNode->setNodeId(LoadSU->NodeNum);
411      ComputeLatency(LoadSU);
412    }
413
414    SUnit *NewSU = CreateNewSUnit(N);
415    assert(N->getNodeId() == -1 && "Node already inserted!");
416    N->setNodeId(NewSU->NodeNum);
417
418    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
419    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
420      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
421        NewSU->isTwoAddress = true;
422        break;
423      }
424    }
425    if (TID.isCommutable())
426      NewSU->isCommutable = true;
427    ComputeLatency(NewSU);
428
429    // Record all the edges to and from the old SU, by category.
430    SmallVector<SDep, 4> ChainPreds;
431    SmallVector<SDep, 4> ChainSuccs;
432    SmallVector<SDep, 4> LoadPreds;
433    SmallVector<SDep, 4> NodePreds;
434    SmallVector<SDep, 4> NodeSuccs;
435    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
436         I != E; ++I) {
437      if (I->isCtrl())
438        ChainPreds.push_back(*I);
439      else if (isOperandOf(I->getSUnit(), LoadNode))
440        LoadPreds.push_back(*I);
441      else
442        NodePreds.push_back(*I);
443    }
444    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
445         I != E; ++I) {
446      if (I->isCtrl())
447        ChainSuccs.push_back(*I);
448      else
449        NodeSuccs.push_back(*I);
450    }
451
452    // Now assign edges to the newly-created nodes.
453    for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
454      const SDep &Pred = ChainPreds[i];
455      RemovePred(SU, Pred);
456      if (isNewLoad)
457        AddPred(LoadSU, Pred);
458    }
459    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
460      const SDep &Pred = LoadPreds[i];
461      RemovePred(SU, Pred);
462      if (isNewLoad)
463        AddPred(LoadSU, Pred);
464    }
465    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
466      const SDep &Pred = NodePreds[i];
467      RemovePred(SU, Pred);
468      AddPred(NewSU, Pred);
469    }
470    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
471      SDep D = NodeSuccs[i];
472      SUnit *SuccDep = D.getSUnit();
473      D.setSUnit(SU);
474      RemovePred(SuccDep, D);
475      D.setSUnit(NewSU);
476      AddPred(SuccDep, D);
477    }
478    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
479      SDep D = ChainSuccs[i];
480      SUnit *SuccDep = D.getSUnit();
481      D.setSUnit(SU);
482      RemovePred(SuccDep, D);
483      if (isNewLoad) {
484        D.setSUnit(LoadSU);
485        AddPred(SuccDep, D);
486      }
487    }
488
489    // Add a data dependency to reflect that NewSU reads the value defined
490    // by LoadSU.
491    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
492
493    if (isNewLoad)
494      AvailableQueue->addNode(LoadSU);
495    AvailableQueue->addNode(NewSU);
496
497    ++NumUnfolds;
498
499    if (NewSU->NumSuccsLeft == 0) {
500      NewSU->isAvailable = true;
501      return NewSU;
502    }
503    SU = NewSU;
504  }
505
506  DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
507  NewSU = CreateClone(SU);
508
509  // New SUnit has the exact same predecessors.
510  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
511       I != E; ++I)
512    if (!I->isArtificial())
513      AddPred(NewSU, *I);
514
515  // Only copy scheduled successors. Cut them from old node's successor
516  // list and move them over.
517  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
518  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
519       I != E; ++I) {
520    if (I->isArtificial())
521      continue;
522    SUnit *SuccSU = I->getSUnit();
523    if (SuccSU->isScheduled) {
524      SDep D = *I;
525      D.setSUnit(NewSU);
526      AddPred(SuccSU, D);
527      D.setSUnit(SU);
528      DelDeps.push_back(std::make_pair(SuccSU, D));
529    }
530  }
531  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
532    RemovePred(DelDeps[i].first, DelDeps[i].second);
533
534  AvailableQueue->updateNode(SU);
535  AvailableQueue->addNode(NewSU);
536
537  ++NumDups;
538  return NewSU;
539}
540
541/// InsertCopiesAndMoveSuccs - Insert register copies and move all
542/// scheduled successors of the given SUnit to the last copy.
543void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
544                                               const TargetRegisterClass *DestRC,
545                                               const TargetRegisterClass *SrcRC,
546                                               SmallVector<SUnit*, 2> &Copies) {
547  SUnit *CopyFromSU = CreateNewSUnit(NULL);
548  CopyFromSU->CopySrcRC = SrcRC;
549  CopyFromSU->CopyDstRC = DestRC;
550
551  SUnit *CopyToSU = CreateNewSUnit(NULL);
552  CopyToSU->CopySrcRC = DestRC;
553  CopyToSU->CopyDstRC = SrcRC;
554
555  // Only copy scheduled successors. Cut them from old node's successor
556  // list and move them over.
557  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
558  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
559       I != E; ++I) {
560    if (I->isArtificial())
561      continue;
562    SUnit *SuccSU = I->getSUnit();
563    if (SuccSU->isScheduled) {
564      SDep D = *I;
565      D.setSUnit(CopyToSU);
566      AddPred(SuccSU, D);
567      DelDeps.push_back(std::make_pair(SuccSU, *I));
568    }
569  }
570  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
571    RemovePred(DelDeps[i].first, DelDeps[i].second);
572
573  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
574  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
575
576  AvailableQueue->updateNode(SU);
577  AvailableQueue->addNode(CopyFromSU);
578  AvailableQueue->addNode(CopyToSU);
579  Copies.push_back(CopyFromSU);
580  Copies.push_back(CopyToSU);
581
582  ++NumPRCopies;
583}
584
585/// getPhysicalRegisterVT - Returns the ValueType of the physical register
586/// definition of the specified node.
587/// FIXME: Move to SelectionDAG?
588static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
589                                 const TargetInstrInfo *TII) {
590  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
591  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
592  unsigned NumRes = TID.getNumDefs();
593  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
594    if (Reg == *ImpDef)
595      break;
596    ++NumRes;
597  }
598  return N->getValueType(NumRes);
599}
600
601/// CheckForLiveRegDef - Return true and update live register vector if the
602/// specified register def of the specified SUnit clobbers any "live" registers.
603static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
604                               std::vector<SUnit*> &LiveRegDefs,
605                               SmallSet<unsigned, 4> &RegAdded,
606                               SmallVector<unsigned, 4> &LRegs,
607                               const TargetRegisterInfo *TRI) {
608  bool Added = false;
609  if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
610    if (RegAdded.insert(Reg)) {
611      LRegs.push_back(Reg);
612      Added = true;
613    }
614  }
615  for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
616    if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
617      if (RegAdded.insert(*Alias)) {
618        LRegs.push_back(*Alias);
619        Added = true;
620      }
621    }
622  return Added;
623}
624
625/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
626/// scheduling of the given node to satisfy live physical register dependencies.
627/// If the specific node is the last one that's available to schedule, do
628/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
629bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
630                                                 SmallVector<unsigned, 4> &LRegs){
631  if (NumLiveRegs == 0)
632    return false;
633
634  SmallSet<unsigned, 4> RegAdded;
635  // If this node would clobber any "live" register, then it's not ready.
636  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
637       I != E; ++I) {
638    if (I->isAssignedRegDep())
639      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
640                         RegAdded, LRegs, TRI);
641  }
642
643  for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
644    if (Node->getOpcode() == ISD::INLINEASM) {
645      // Inline asm can clobber physical defs.
646      unsigned NumOps = Node->getNumOperands();
647      if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
648        --NumOps;  // Ignore the flag operand.
649
650      for (unsigned i = 2; i != NumOps;) {
651        unsigned Flags =
652          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
653        unsigned NumVals = (Flags & 0xffff) >> 3;
654
655        ++i; // Skip the ID value.
656        if ((Flags & 7) == 2 || (Flags & 7) == 6) {
657          // Check for def of register or earlyclobber register.
658          for (; NumVals; --NumVals, ++i) {
659            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
660            if (TargetRegisterInfo::isPhysicalRegister(Reg))
661              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
662          }
663        } else
664          i += NumVals;
665      }
666      continue;
667    }
668
669    if (!Node->isMachineOpcode())
670      continue;
671    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
672    if (!TID.ImplicitDefs)
673      continue;
674    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
675      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
676  }
677  return !LRegs.empty();
678}
679
680
681/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
682/// schedulers.
683void ScheduleDAGRRList::ListScheduleBottomUp() {
684  unsigned CurCycle = 0;
685
686  // Release any predecessors of the special Exit node.
687  ReleasePredecessors(&ExitSU, CurCycle);
688
689  // Add root to Available queue.
690  if (!SUnits.empty()) {
691    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
692    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
693    RootSU->isAvailable = true;
694    AvailableQueue->push(RootSU);
695  }
696
697  // While Available queue is not empty, grab the node with the highest
698  // priority. If it is not ready put it back.  Schedule the node.
699  SmallVector<SUnit*, 4> NotReady;
700  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
701  Sequence.reserve(SUnits.size());
702  while (!AvailableQueue->empty()) {
703    bool Delayed = false;
704    LRegsMap.clear();
705    SUnit *CurSU = AvailableQueue->pop();
706    while (CurSU) {
707      SmallVector<unsigned, 4> LRegs;
708      if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
709        break;
710      Delayed = true;
711      LRegsMap.insert(std::make_pair(CurSU, LRegs));
712
713      CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
714      NotReady.push_back(CurSU);
715      CurSU = AvailableQueue->pop();
716    }
717
718    // All candidates are delayed due to live physical reg dependencies.
719    // Try backtracking, code duplication, or inserting cross class copies
720    // to resolve it.
721    if (Delayed && !CurSU) {
722      for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
723        SUnit *TrySU = NotReady[i];
724        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
725
726        // Try unscheduling up to the point where it's safe to schedule
727        // this node.
728        unsigned LiveCycle = CurCycle;
729        for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
730          unsigned Reg = LRegs[j];
731          unsigned LCycle = LiveRegCycles[Reg];
732          LiveCycle = std::min(LiveCycle, LCycle);
733        }
734        SUnit *OldSU = Sequence[LiveCycle];
735        if (!WillCreateCycle(TrySU, OldSU))  {
736          BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
737          // Force the current node to be scheduled before the node that
738          // requires the physical reg dep.
739          if (OldSU->isAvailable) {
740            OldSU->isAvailable = false;
741            AvailableQueue->remove(OldSU);
742          }
743          AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
744                              /*Reg=*/0, /*isNormalMemory=*/false,
745                              /*isMustAlias=*/false, /*isArtificial=*/true));
746          // If one or more successors has been unscheduled, then the current
747          // node is no longer avaialable. Schedule a successor that's now
748          // available instead.
749          if (!TrySU->isAvailable)
750            CurSU = AvailableQueue->pop();
751          else {
752            CurSU = TrySU;
753            TrySU->isPending = false;
754            NotReady.erase(NotReady.begin()+i);
755          }
756          break;
757        }
758      }
759
760      if (!CurSU) {
761        // Can't backtrack. If it's too expensive to copy the value, then try
762        // duplicate the nodes that produces these "too expensive to copy"
763        // values to break the dependency. In case even that doesn't work,
764        // insert cross class copies.
765        // If it's not too expensive, i.e. cost != -1, issue copies.
766        SUnit *TrySU = NotReady[0];
767        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
768        assert(LRegs.size() == 1 && "Can't handle this yet!");
769        unsigned Reg = LRegs[0];
770        SUnit *LRDef = LiveRegDefs[Reg];
771        EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
772        const TargetRegisterClass *RC =
773          TRI->getPhysicalRegisterRegClass(Reg, VT);
774        const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
775
776        // If cross copy register class is null, then it must be possible copy
777        // the value directly. Do not try duplicate the def.
778        SUnit *NewDef = 0;
779        if (DestRC)
780          NewDef = CopyAndMoveSuccessors(LRDef);
781        else
782          DestRC = RC;
783        if (!NewDef) {
784          // Issue copies, these can be expensive cross register class copies.
785          SmallVector<SUnit*, 2> Copies;
786          InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
787          DEBUG(dbgs() << "Adding an edge from SU #" << TrySU->NodeNum
788                       << " to SU #" << Copies.front()->NodeNum << "\n");
789          AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
790                              /*Reg=*/0, /*isNormalMemory=*/false,
791                              /*isMustAlias=*/false,
792                              /*isArtificial=*/true));
793          NewDef = Copies.back();
794        }
795
796        DEBUG(dbgs() << "Adding an edge from SU #" << NewDef->NodeNum
797                     << " to SU #" << TrySU->NodeNum << "\n");
798        LiveRegDefs[Reg] = NewDef;
799        AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
800                             /*Reg=*/0, /*isNormalMemory=*/false,
801                             /*isMustAlias=*/false,
802                             /*isArtificial=*/true));
803        TrySU->isAvailable = false;
804        CurSU = NewDef;
805      }
806
807      assert(CurSU && "Unable to resolve live physical register dependencies!");
808    }
809
810    // Add the nodes that aren't ready back onto the available list.
811    for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
812      NotReady[i]->isPending = false;
813      // May no longer be available due to backtracking.
814      if (NotReady[i]->isAvailable)
815        AvailableQueue->push(NotReady[i]);
816    }
817    NotReady.clear();
818
819    if (CurSU)
820      ScheduleNodeBottomUp(CurSU, CurCycle);
821    ++CurCycle;
822  }
823
824  // Reverse the order if it is bottom up.
825  std::reverse(Sequence.begin(), Sequence.end());
826
827#ifndef NDEBUG
828  VerifySchedule(isBottomUp);
829#endif
830}
831
832//===----------------------------------------------------------------------===//
833//  Top-Down Scheduling
834//===----------------------------------------------------------------------===//
835
836/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
837/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
838void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
839  SUnit *SuccSU = SuccEdge->getSUnit();
840
841#ifndef NDEBUG
842  if (SuccSU->NumPredsLeft == 0) {
843    dbgs() << "*** Scheduling failed! ***\n";
844    SuccSU->dump(this);
845    dbgs() << " has been released too many times!\n";
846    llvm_unreachable(0);
847  }
848#endif
849  --SuccSU->NumPredsLeft;
850
851  // If all the node's predecessors are scheduled, this node is ready
852  // to be scheduled. Ignore the special ExitSU node.
853  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
854    SuccSU->isAvailable = true;
855    AvailableQueue->push(SuccSU);
856  }
857}
858
859void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
860  // Top down: release successors
861  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
862       I != E; ++I) {
863    assert(!I->isAssignedRegDep() &&
864           "The list-tdrr scheduler doesn't yet support physreg dependencies!");
865
866    ReleaseSucc(SU, &*I);
867  }
868}
869
870/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
871/// count of its successors. If a successor pending count is zero, add it to
872/// the Available queue.
873void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
874  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
875  DEBUG(SU->dump(this));
876
877  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
878  SU->setDepthToAtLeast(CurCycle);
879  Sequence.push_back(SU);
880
881  ReleaseSuccessors(SU);
882  SU->isScheduled = true;
883  AvailableQueue->ScheduledNode(SU);
884}
885
886/// ListScheduleTopDown - The main loop of list scheduling for top-down
887/// schedulers.
888void ScheduleDAGRRList::ListScheduleTopDown() {
889  unsigned CurCycle = 0;
890
891  // Release any successors of the special Entry node.
892  ReleaseSuccessors(&EntrySU);
893
894  // All leaves to Available queue.
895  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
896    // It is available if it has no predecessors.
897    if (SUnits[i].Preds.empty()) {
898      AvailableQueue->push(&SUnits[i]);
899      SUnits[i].isAvailable = true;
900    }
901  }
902
903  // While Available queue is not empty, grab the node with the highest
904  // priority. If it is not ready put it back.  Schedule the node.
905  Sequence.reserve(SUnits.size());
906  while (!AvailableQueue->empty()) {
907    SUnit *CurSU = AvailableQueue->pop();
908
909    if (CurSU)
910      ScheduleNodeTopDown(CurSU, CurCycle);
911    ++CurCycle;
912  }
913
914#ifndef NDEBUG
915  VerifySchedule(isBottomUp);
916#endif
917}
918
919
920//===----------------------------------------------------------------------===//
921//                RegReductionPriorityQueue Implementation
922//===----------------------------------------------------------------------===//
923//
924// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
925// to reduce register pressure.
926//
927namespace {
928  template<class SF>
929  class RegReductionPriorityQueue;
930
931  /// Sorting functions for the Available queue.
932  struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
933    RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
934    bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
935    bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
936
937    bool operator()(const SUnit* left, const SUnit* right) const;
938  };
939
940  struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
941    RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
942    td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
943    td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
944
945    bool operator()(const SUnit* left, const SUnit* right) const;
946  };
947
948  struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
949    RegReductionPriorityQueue<src_ls_rr_sort> *SPQ;
950    src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq)
951      : SPQ(spq) {}
952    src_ls_rr_sort(const src_ls_rr_sort &RHS)
953      : SPQ(RHS.SPQ) {}
954
955    bool operator()(const SUnit* left, const SUnit* right) const;
956  };
957}  // end anonymous namespace
958
959/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
960/// Smaller number is the higher priority.
961static unsigned
962CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
963  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
964  if (SethiUllmanNumber != 0)
965    return SethiUllmanNumber;
966
967  unsigned Extra = 0;
968  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
969       I != E; ++I) {
970    if (I->isCtrl()) continue;  // ignore chain preds
971    SUnit *PredSU = I->getSUnit();
972    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
973    if (PredSethiUllman > SethiUllmanNumber) {
974      SethiUllmanNumber = PredSethiUllman;
975      Extra = 0;
976    } else if (PredSethiUllman == SethiUllmanNumber)
977      ++Extra;
978  }
979
980  SethiUllmanNumber += Extra;
981
982  if (SethiUllmanNumber == 0)
983    SethiUllmanNumber = 1;
984
985  return SethiUllmanNumber;
986}
987
988namespace {
989  template<class SF>
990  class RegReductionPriorityQueue : public SchedulingPriorityQueue {
991    PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue;
992    unsigned currentQueueId;
993
994  protected:
995    // SUnits - The SUnits for the current graph.
996    std::vector<SUnit> *SUnits;
997
998    const TargetInstrInfo *TII;
999    const TargetRegisterInfo *TRI;
1000    ScheduleDAGRRList *scheduleDAG;
1001
1002    // SethiUllmanNumbers - The SethiUllman number for each node.
1003    std::vector<unsigned> SethiUllmanNumbers;
1004
1005  public:
1006    RegReductionPriorityQueue(const TargetInstrInfo *tii,
1007                              const TargetRegisterInfo *tri)
1008      : Queue(SF(this)), currentQueueId(0),
1009        TII(tii), TRI(tri), scheduleDAG(NULL) {}
1010
1011    void initNodes(std::vector<SUnit> &sunits) {
1012      SUnits = &sunits;
1013      // Add pseudo dependency edges for two-address nodes.
1014      AddPseudoTwoAddrDeps();
1015      // Reroute edges to nodes with multiple uses.
1016      PrescheduleNodesWithMultipleUses();
1017      // Calculate node priorities.
1018      CalculateSethiUllmanNumbers();
1019    }
1020
1021    void addNode(const SUnit *SU) {
1022      unsigned SUSize = SethiUllmanNumbers.size();
1023      if (SUnits->size() > SUSize)
1024        SethiUllmanNumbers.resize(SUSize*2, 0);
1025      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1026    }
1027
1028    void updateNode(const SUnit *SU) {
1029      SethiUllmanNumbers[SU->NodeNum] = 0;
1030      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1031    }
1032
1033    void releaseState() {
1034      SUnits = 0;
1035      SethiUllmanNumbers.clear();
1036    }
1037
1038    unsigned getNodePriority(const SUnit *SU) const {
1039      assert(SU->NodeNum < SethiUllmanNumbers.size());
1040      unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1041      if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1042        // CopyToReg should be close to its uses to facilitate coalescing and
1043        // avoid spilling.
1044        return 0;
1045      if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1046          Opc == TargetOpcode::SUBREG_TO_REG ||
1047          Opc == TargetOpcode::INSERT_SUBREG)
1048        // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1049        // close to their uses to facilitate coalescing.
1050        return 0;
1051      if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1052        // If SU does not have a register use, i.e. it doesn't produce a value
1053        // that would be consumed (e.g. store), then it terminates a chain of
1054        // computation.  Give it a large SethiUllman number so it will be
1055        // scheduled right before its predecessors that it doesn't lengthen
1056        // their live ranges.
1057        return 0xffff;
1058      if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1059        // If SU does not have a register def, schedule it close to its uses
1060        // because it does not lengthen any live ranges.
1061        return 0;
1062      return SethiUllmanNumbers[SU->NodeNum];
1063    }
1064
1065    unsigned getNodeOrdering(const SUnit *SU) const {
1066      return scheduleDAG->DAG->GetOrdering(SU->getNode());
1067    }
1068
1069    unsigned size() const { return Queue.size(); }
1070
1071    bool empty() const { return Queue.empty(); }
1072
1073    void push(SUnit *U) {
1074      assert(!U->NodeQueueId && "Node in the queue already");
1075      U->NodeQueueId = ++currentQueueId;
1076      Queue.push(U);
1077    }
1078
1079    void push_all(const std::vector<SUnit *> &Nodes) {
1080      for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
1081        push(Nodes[i]);
1082    }
1083
1084    SUnit *pop() {
1085      if (empty()) return NULL;
1086      SUnit *V = Queue.top();
1087      Queue.pop();
1088      V->NodeQueueId = 0;
1089      return V;
1090    }
1091
1092    void remove(SUnit *SU) {
1093      assert(!Queue.empty() && "Queue is empty!");
1094      assert(SU->NodeQueueId != 0 && "Not in queue!");
1095      Queue.erase_one(SU);
1096      SU->NodeQueueId = 0;
1097    }
1098
1099    void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1100      scheduleDAG = scheduleDag;
1101    }
1102
1103  protected:
1104    bool canClobber(const SUnit *SU, const SUnit *Op);
1105    void AddPseudoTwoAddrDeps();
1106    void PrescheduleNodesWithMultipleUses();
1107    void CalculateSethiUllmanNumbers();
1108  };
1109
1110  typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1111    BURegReductionPriorityQueue;
1112
1113  typedef RegReductionPriorityQueue<td_ls_rr_sort>
1114    TDRegReductionPriorityQueue;
1115
1116  typedef RegReductionPriorityQueue<src_ls_rr_sort>
1117    SrcRegReductionPriorityQueue;
1118}
1119
1120/// closestSucc - Returns the scheduled cycle of the successor which is
1121/// closest to the current cycle.
1122static unsigned closestSucc(const SUnit *SU) {
1123  unsigned MaxHeight = 0;
1124  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1125       I != E; ++I) {
1126    if (I->isCtrl()) continue;  // ignore chain succs
1127    unsigned Height = I->getSUnit()->getHeight();
1128    // If there are bunch of CopyToRegs stacked up, they should be considered
1129    // to be at the same position.
1130    if (I->getSUnit()->getNode() &&
1131        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
1132      Height = closestSucc(I->getSUnit())+1;
1133    if (Height > MaxHeight)
1134      MaxHeight = Height;
1135  }
1136  return MaxHeight;
1137}
1138
1139/// calcMaxScratches - Returns an cost estimate of the worse case requirement
1140/// for scratch registers, i.e. number of data dependencies.
1141static unsigned calcMaxScratches(const SUnit *SU) {
1142  unsigned Scratches = 0;
1143  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1144       I != E; ++I) {
1145    if (I->isCtrl()) continue;  // ignore chain preds
1146    Scratches++;
1147  }
1148  return Scratches;
1149}
1150
1151template <typename RRSort>
1152static bool BURRSort(const SUnit *left, const SUnit *right,
1153                     const RegReductionPriorityQueue<RRSort> *SPQ) {
1154  unsigned LPriority = SPQ->getNodePriority(left);
1155  unsigned RPriority = SPQ->getNodePriority(right);
1156  if (LPriority != RPriority)
1157    return LPriority > RPriority;
1158
1159  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
1160  // e.g.
1161  // t1 = op t2, c1
1162  // t3 = op t4, c2
1163  //
1164  // and the following instructions are both ready.
1165  // t2 = op c3
1166  // t4 = op c4
1167  //
1168  // Then schedule t2 = op first.
1169  // i.e.
1170  // t4 = op c4
1171  // t2 = op c3
1172  // t1 = op t2, c1
1173  // t3 = op t4, c2
1174  //
1175  // This creates more short live intervals.
1176  unsigned LDist = closestSucc(left);
1177  unsigned RDist = closestSucc(right);
1178  if (LDist != RDist)
1179    return LDist < RDist;
1180
1181  // How many registers becomes live when the node is scheduled.
1182  unsigned LScratch = calcMaxScratches(left);
1183  unsigned RScratch = calcMaxScratches(right);
1184  if (LScratch != RScratch)
1185    return LScratch > RScratch;
1186
1187  if (left->getHeight() != right->getHeight())
1188    return left->getHeight() > right->getHeight();
1189
1190  if (left->getDepth() != right->getDepth())
1191    return left->getDepth() < right->getDepth();
1192
1193  assert(left->NodeQueueId && right->NodeQueueId &&
1194         "NodeQueueId cannot be zero");
1195  return (left->NodeQueueId > right->NodeQueueId);
1196}
1197
1198// Bottom up
1199bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1200  return BURRSort(left, right, SPQ);
1201}
1202
1203// Source order, otherwise bottom up.
1204bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
1205  unsigned LOrder = SPQ->getNodeOrdering(left);
1206  unsigned ROrder = SPQ->getNodeOrdering(right);
1207
1208  // Prefer an ordering where the lower the non-zero order number, the higher
1209  // the preference.
1210  if ((LOrder || ROrder) && LOrder != ROrder)
1211    return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
1212
1213  return BURRSort(left, right, SPQ);
1214}
1215
1216template<class SF>
1217bool
1218RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
1219  if (SU->isTwoAddress) {
1220    unsigned Opc = SU->getNode()->getMachineOpcode();
1221    const TargetInstrDesc &TID = TII->get(Opc);
1222    unsigned NumRes = TID.getNumDefs();
1223    unsigned NumOps = TID.getNumOperands() - NumRes;
1224    for (unsigned i = 0; i != NumOps; ++i) {
1225      if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
1226        SDNode *DU = SU->getNode()->getOperand(i).getNode();
1227        if (DU->getNodeId() != -1 &&
1228            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
1229          return true;
1230      }
1231    }
1232  }
1233  return false;
1234}
1235
1236/// hasCopyToRegUse - Return true if SU has a value successor that is a
1237/// CopyToReg node.
1238static bool hasCopyToRegUse(const SUnit *SU) {
1239  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1240       I != E; ++I) {
1241    if (I->isCtrl()) continue;
1242    const SUnit *SuccSU = I->getSUnit();
1243    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)
1244      return true;
1245  }
1246  return false;
1247}
1248
1249/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
1250/// physical register defs.
1251static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
1252                                  const TargetInstrInfo *TII,
1253                                  const TargetRegisterInfo *TRI) {
1254  SDNode *N = SuccSU->getNode();
1255  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1256  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
1257  assert(ImpDefs && "Caller should check hasPhysRegDefs");
1258  for (const SDNode *SUNode = SU->getNode(); SUNode;
1259       SUNode = SUNode->getFlaggedNode()) {
1260    if (!SUNode->isMachineOpcode())
1261      continue;
1262    const unsigned *SUImpDefs =
1263      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
1264    if (!SUImpDefs)
1265      return false;
1266    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1267      EVT VT = N->getValueType(i);
1268      if (VT == MVT::Flag || VT == MVT::Other)
1269        continue;
1270      if (!N->hasAnyUseOfValue(i))
1271        continue;
1272      unsigned Reg = ImpDefs[i - NumDefs];
1273      for (;*SUImpDefs; ++SUImpDefs) {
1274        unsigned SUReg = *SUImpDefs;
1275        if (TRI->regsOverlap(Reg, SUReg))
1276          return true;
1277      }
1278    }
1279  }
1280  return false;
1281}
1282
1283/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
1284/// are not handled well by the general register pressure reduction
1285/// heuristics. When presented with code like this:
1286///
1287///      N
1288///    / |
1289///   /  |
1290///  U  store
1291///  |
1292/// ...
1293///
1294/// the heuristics tend to push the store up, but since the
1295/// operand of the store has another use (U), this would increase
1296/// the length of that other use (the U->N edge).
1297///
1298/// This function transforms code like the above to route U's
1299/// dependence through the store when possible, like this:
1300///
1301///      N
1302///      ||
1303///      ||
1304///     store
1305///       |
1306///       U
1307///       |
1308///      ...
1309///
1310/// This results in the store being scheduled immediately
1311/// after N, which shortens the U->N live range, reducing
1312/// register pressure.
1313///
1314template<class SF>
1315void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
1316  // Visit all the nodes in topological order, working top-down.
1317  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1318    SUnit *SU = &(*SUnits)[i];
1319    // For now, only look at nodes with no data successors, such as stores.
1320    // These are especially important, due to the heuristics in
1321    // getNodePriority for nodes with no data successors.
1322    if (SU->NumSuccs != 0)
1323      continue;
1324    // For now, only look at nodes with exactly one data predecessor.
1325    if (SU->NumPreds != 1)
1326      continue;
1327    // Avoid prescheduling copies to virtual registers, which don't behave
1328    // like other nodes from the perspective of scheduling heuristics.
1329    if (SDNode *N = SU->getNode())
1330      if (N->getOpcode() == ISD::CopyToReg &&
1331          TargetRegisterInfo::isVirtualRegister
1332            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1333        continue;
1334
1335    // Locate the single data predecessor.
1336    SUnit *PredSU = 0;
1337    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
1338         EE = SU->Preds.end(); II != EE; ++II)
1339      if (!II->isCtrl()) {
1340        PredSU = II->getSUnit();
1341        break;
1342      }
1343    assert(PredSU);
1344
1345    // Don't rewrite edges that carry physregs, because that requires additional
1346    // support infrastructure.
1347    if (PredSU->hasPhysRegDefs)
1348      continue;
1349    // Short-circuit the case where SU is PredSU's only data successor.
1350    if (PredSU->NumSuccs == 1)
1351      continue;
1352    // Avoid prescheduling to copies from virtual registers, which don't behave
1353    // like other nodes from the perspective of scheduling // heuristics.
1354    if (SDNode *N = SU->getNode())
1355      if (N->getOpcode() == ISD::CopyFromReg &&
1356          TargetRegisterInfo::isVirtualRegister
1357            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1358        continue;
1359
1360    // Perform checks on the successors of PredSU.
1361    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
1362         EE = PredSU->Succs.end(); II != EE; ++II) {
1363      SUnit *PredSuccSU = II->getSUnit();
1364      if (PredSuccSU == SU) continue;
1365      // If PredSU has another successor with no data successors, for
1366      // now don't attempt to choose either over the other.
1367      if (PredSuccSU->NumSuccs == 0)
1368        goto outer_loop_continue;
1369      // Don't break physical register dependencies.
1370      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
1371        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
1372          goto outer_loop_continue;
1373      // Don't introduce graph cycles.
1374      if (scheduleDAG->IsReachable(SU, PredSuccSU))
1375        goto outer_loop_continue;
1376    }
1377
1378    // Ok, the transformation is safe and the heuristics suggest it is
1379    // profitable. Update the graph.
1380    DEBUG(dbgs() << "Prescheduling SU # " << SU->NodeNum
1381                 << " next to PredSU # " << PredSU->NodeNum
1382                 << " to guide scheduling in the presence of multiple uses\n");
1383    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
1384      SDep Edge = PredSU->Succs[i];
1385      assert(!Edge.isAssignedRegDep());
1386      SUnit *SuccSU = Edge.getSUnit();
1387      if (SuccSU != SU) {
1388        Edge.setSUnit(PredSU);
1389        scheduleDAG->RemovePred(SuccSU, Edge);
1390        scheduleDAG->AddPred(SU, Edge);
1391        Edge.setSUnit(SU);
1392        scheduleDAG->AddPred(SuccSU, Edge);
1393        --i;
1394      }
1395    }
1396  outer_loop_continue:;
1397  }
1398}
1399
1400/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
1401/// it as a def&use operand. Add a pseudo control edge from it to the other
1402/// node (if it won't create a cycle) so the two-address one will be scheduled
1403/// first (lower in the schedule). If both nodes are two-address, favor the
1404/// one that has a CopyToReg use (more likely to be a loop induction update).
1405/// If both are two-address, but one is commutable while the other is not
1406/// commutable, favor the one that's not commutable.
1407template<class SF>
1408void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
1409  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1410    SUnit *SU = &(*SUnits)[i];
1411    if (!SU->isTwoAddress)
1412      continue;
1413
1414    SDNode *Node = SU->getNode();
1415    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
1416      continue;
1417
1418    unsigned Opc = Node->getMachineOpcode();
1419    const TargetInstrDesc &TID = TII->get(Opc);
1420    unsigned NumRes = TID.getNumDefs();
1421    unsigned NumOps = TID.getNumOperands() - NumRes;
1422    for (unsigned j = 0; j != NumOps; ++j) {
1423      if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
1424        continue;
1425      SDNode *DU = SU->getNode()->getOperand(j).getNode();
1426      if (DU->getNodeId() == -1)
1427        continue;
1428      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
1429      if (!DUSU) continue;
1430      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
1431           E = DUSU->Succs.end(); I != E; ++I) {
1432        if (I->isCtrl()) continue;
1433        SUnit *SuccSU = I->getSUnit();
1434        if (SuccSU == SU)
1435          continue;
1436        // Be conservative. Ignore if nodes aren't at roughly the same
1437        // depth and height.
1438        if (SuccSU->getHeight() < SU->getHeight() &&
1439            (SU->getHeight() - SuccSU->getHeight()) > 1)
1440          continue;
1441        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
1442        // constrains whatever is using the copy, instead of the copy
1443        // itself. In the case that the copy is coalesced, this
1444        // preserves the intent of the pseudo two-address heurietics.
1445        while (SuccSU->Succs.size() == 1 &&
1446               SuccSU->getNode()->isMachineOpcode() &&
1447               SuccSU->getNode()->getMachineOpcode() ==
1448                 TargetOpcode::COPY_TO_REGCLASS)
1449          SuccSU = SuccSU->Succs.front().getSUnit();
1450        // Don't constrain non-instruction nodes.
1451        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
1452          continue;
1453        // Don't constrain nodes with physical register defs if the
1454        // predecessor can clobber them.
1455        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
1456          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
1457            continue;
1458        }
1459        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
1460        // these may be coalesced away. We want them close to their uses.
1461        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
1462        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
1463            SuccOpc == TargetOpcode::INSERT_SUBREG ||
1464            SuccOpc == TargetOpcode::SUBREG_TO_REG)
1465          continue;
1466        if ((!canClobber(SuccSU, DUSU) ||
1467             (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
1468             (!SU->isCommutable && SuccSU->isCommutable)) &&
1469            !scheduleDAG->IsReachable(SuccSU, SU)) {
1470          DEBUG(dbgs() << "Adding a pseudo-two-addr edge from SU # "
1471                       << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
1472          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
1473                                        /*Reg=*/0, /*isNormalMemory=*/false,
1474                                        /*isMustAlias=*/false,
1475                                        /*isArtificial=*/true));
1476        }
1477      }
1478    }
1479  }
1480}
1481
1482/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1483/// scheduling units.
1484template<class SF>
1485void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
1486  SethiUllmanNumbers.assign(SUnits->size(), 0);
1487
1488  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1489    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1490}
1491
1492/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
1493/// predecessors of the successors of the SUnit SU. Stop when the provided
1494/// limit is exceeded.
1495static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
1496                                                    unsigned Limit) {
1497  unsigned Sum = 0;
1498  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1499       I != E; ++I) {
1500    const SUnit *SuccSU = I->getSUnit();
1501    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
1502         EE = SuccSU->Preds.end(); II != EE; ++II) {
1503      SUnit *PredSU = II->getSUnit();
1504      if (!PredSU->isScheduled)
1505        if (++Sum > Limit)
1506          return Sum;
1507    }
1508  }
1509  return Sum;
1510}
1511
1512
1513// Top down
1514bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1515  unsigned LPriority = SPQ->getNodePriority(left);
1516  unsigned RPriority = SPQ->getNodePriority(right);
1517  bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
1518  bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
1519  bool LIsFloater = LIsTarget && left->NumPreds == 0;
1520  bool RIsFloater = RIsTarget && right->NumPreds == 0;
1521  unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
1522  unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
1523
1524  if (left->NumSuccs == 0 && right->NumSuccs != 0)
1525    return false;
1526  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
1527    return true;
1528
1529  if (LIsFloater)
1530    LBonus -= 2;
1531  if (RIsFloater)
1532    RBonus -= 2;
1533  if (left->NumSuccs == 1)
1534    LBonus += 2;
1535  if (right->NumSuccs == 1)
1536    RBonus += 2;
1537
1538  if (LPriority+LBonus != RPriority+RBonus)
1539    return LPriority+LBonus < RPriority+RBonus;
1540
1541  if (left->getDepth() != right->getDepth())
1542    return left->getDepth() < right->getDepth();
1543
1544  if (left->NumSuccsLeft != right->NumSuccsLeft)
1545    return left->NumSuccsLeft > right->NumSuccsLeft;
1546
1547  assert(left->NodeQueueId && right->NodeQueueId &&
1548         "NodeQueueId cannot be zero");
1549  return (left->NodeQueueId > right->NodeQueueId);
1550}
1551
1552//===----------------------------------------------------------------------===//
1553//                         Public Constructor Functions
1554//===----------------------------------------------------------------------===//
1555
1556llvm::ScheduleDAGSDNodes *
1557llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1558  const TargetMachine &TM = IS->TM;
1559  const TargetInstrInfo *TII = TM.getInstrInfo();
1560  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1561
1562  BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI);
1563
1564  ScheduleDAGRRList *SD =
1565    new ScheduleDAGRRList(*IS->MF, true, PQ);
1566  PQ->setScheduleDAG(SD);
1567  return SD;
1568}
1569
1570llvm::ScheduleDAGSDNodes *
1571llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1572  const TargetMachine &TM = IS->TM;
1573  const TargetInstrInfo *TII = TM.getInstrInfo();
1574  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1575
1576  TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI);
1577
1578  ScheduleDAGRRList *SD =
1579    new ScheduleDAGRRList(*IS->MF, false, PQ);
1580  PQ->setScheduleDAG(SD);
1581  return SD;
1582}
1583
1584llvm::ScheduleDAGSDNodes *
1585llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1586  const TargetMachine &TM = IS->TM;
1587  const TargetInstrInfo *TII = TM.getInstrInfo();
1588  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1589
1590  SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(TII, TRI);
1591
1592  ScheduleDAGRRList *SD =
1593    new ScheduleDAGRRList(*IS->MF, true, PQ);
1594  PQ->setScheduleDAG(SD);
1595  return SD;
1596}
1597