ScheduleDAGRRList.cpp revision b42df31c2d72b3cfe7f04fad4aa3f57790272b95
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "pre-RA-sched"
19#include "ScheduleDAGSDNodes.h"
20#include "llvm/InlineAsm.h"
21#include "llvm/CodeGen/SchedulerRegistry.h"
22#include "llvm/CodeGen/SelectionDAGISel.h"
23#include "llvm/Target/TargetRegisterInfo.h"
24#include "llvm/Target/TargetData.h"
25#include "llvm/Target/TargetMachine.h"
26#include "llvm/Target/TargetInstrInfo.h"
27#include "llvm/Target/TargetLowering.h"
28#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
30#include "llvm/ADT/STLExtras.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/ErrorHandling.h"
33#include "llvm/Support/raw_ostream.h"
34#include <climits>
35using namespace llvm;
36
37STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
38STATISTIC(NumUnfolds,    "Number of nodes unfolded");
39STATISTIC(NumDups,       "Number of duplicated nodes");
40STATISTIC(NumPRCopies,   "Number of physical register copies");
41
42static RegisterScheduler
43  burrListDAGScheduler("list-burr",
44                       "Bottom-up register reduction list scheduling",
45                       createBURRListDAGScheduler);
46static RegisterScheduler
47  tdrListrDAGScheduler("list-tdrr",
48                       "Top-down register reduction list scheduling",
49                       createTDRRListDAGScheduler);
50static RegisterScheduler
51  sourceListDAGScheduler("source",
52                         "Similar to list-burr but schedules in source "
53                         "order when possible",
54                         createSourceListDAGScheduler);
55
56static RegisterScheduler
57  hybridListDAGScheduler("list-hybrid",
58                         "Bottom-up register pressure aware list scheduling "
59                         "which tries to balance latency and register pressure",
60                         createHybridListDAGScheduler);
61
62static RegisterScheduler
63  ILPListDAGScheduler("list-ilp",
64                      "Bottom-up register pressure aware list scheduling "
65                      "which tries to balance ILP and register pressure",
66                      createILPListDAGScheduler);
67
68namespace {
69//===----------------------------------------------------------------------===//
70/// ScheduleDAGRRList - The actual register reduction list scheduler
71/// implementation.  This supports both top-down and bottom-up scheduling.
72///
73class ScheduleDAGRRList : public ScheduleDAGSDNodes {
74private:
75  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
76  /// it is top-down.
77  bool isBottomUp;
78
79  /// NeedLatency - True if the scheduler will make use of latency information.
80  ///
81  bool NeedLatency;
82
83  /// AvailableQueue - The priority queue to use for the available SUnits.
84  SchedulingPriorityQueue *AvailableQueue;
85
86  /// LiveRegDefs - A set of physical registers and their definition
87  /// that are "live". These nodes must be scheduled before any other nodes that
88  /// modifies the registers can be scheduled.
89  unsigned NumLiveRegs;
90  std::vector<SUnit*> LiveRegDefs;
91  std::vector<unsigned> LiveRegCycles;
92
93  /// Topo - A topological ordering for SUnits which permits fast IsReachable
94  /// and similar queries.
95  ScheduleDAGTopologicalSort Topo;
96
97public:
98  ScheduleDAGRRList(MachineFunction &mf,
99                    bool isbottomup, bool needlatency,
100                    SchedulingPriorityQueue *availqueue)
101    : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency),
102      AvailableQueue(availqueue), Topo(SUnits) {
103    }
104
105  ~ScheduleDAGRRList() {
106    delete AvailableQueue;
107  }
108
109  void Schedule();
110
111  /// IsReachable - Checks if SU is reachable from TargetSU.
112  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
113    return Topo.IsReachable(SU, TargetSU);
114  }
115
116  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
117  /// create a cycle.
118  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
119    return Topo.WillCreateCycle(SU, TargetSU);
120  }
121
122  /// AddPred - adds a predecessor edge to SUnit SU.
123  /// This returns true if this is a new predecessor.
124  /// Updates the topological ordering if required.
125  void AddPred(SUnit *SU, const SDep &D) {
126    Topo.AddPred(SU, D.getSUnit());
127    SU->addPred(D);
128  }
129
130  /// RemovePred - removes a predecessor edge from SUnit SU.
131  /// This returns true if an edge was removed.
132  /// Updates the topological ordering if required.
133  void RemovePred(SUnit *SU, const SDep &D) {
134    Topo.RemovePred(SU, D.getSUnit());
135    SU->removePred(D);
136  }
137
138private:
139  void ReleasePred(SUnit *SU, const SDep *PredEdge);
140  void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
141  void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
142  void ReleaseSuccessors(SUnit *SU);
143  void CapturePred(SDep *PredEdge);
144  void ScheduleNodeBottomUp(SUnit*, unsigned);
145  void ScheduleNodeTopDown(SUnit*, unsigned);
146  void UnscheduleNodeBottomUp(SUnit*);
147  void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
148  SUnit *CopyAndMoveSuccessors(SUnit*);
149  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
150                                const TargetRegisterClass*,
151                                const TargetRegisterClass*,
152                                SmallVector<SUnit*, 2>&);
153  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
154  void ListScheduleTopDown();
155  void ListScheduleBottomUp();
156
157
158  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
159  /// Updates the topological ordering if required.
160  SUnit *CreateNewSUnit(SDNode *N) {
161    unsigned NumSUnits = SUnits.size();
162    SUnit *NewNode = NewSUnit(N);
163    // Update the topological ordering.
164    if (NewNode->NodeNum >= NumSUnits)
165      Topo.InitDAGTopologicalSorting();
166    return NewNode;
167  }
168
169  /// CreateClone - Creates a new SUnit from an existing one.
170  /// Updates the topological ordering if required.
171  SUnit *CreateClone(SUnit *N) {
172    unsigned NumSUnits = SUnits.size();
173    SUnit *NewNode = Clone(N);
174    // Update the topological ordering.
175    if (NewNode->NodeNum >= NumSUnits)
176      Topo.InitDAGTopologicalSorting();
177    return NewNode;
178  }
179
180  /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
181  /// need actual latency information but the hybrid scheduler does.
182  bool ForceUnitLatencies() const {
183    return !NeedLatency;
184  }
185};
186}  // end anonymous namespace
187
188
189/// Schedule - Schedule the DAG using list scheduling.
190void ScheduleDAGRRList::Schedule() {
191  DEBUG(dbgs()
192        << "********** List Scheduling BB#" << BB->getNumber()
193        << " **********\n");
194
195  NumLiveRegs = 0;
196  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
197  LiveRegCycles.resize(TRI->getNumRegs(), 0);
198
199  // Build the scheduling graph.
200  BuildSchedGraph(NULL);
201
202  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
203          SUnits[su].dumpAll(this));
204  Topo.InitDAGTopologicalSorting();
205
206  AvailableQueue->initNodes(SUnits);
207
208  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
209  if (isBottomUp)
210    ListScheduleBottomUp();
211  else
212    ListScheduleTopDown();
213
214  AvailableQueue->releaseState();
215}
216
217//===----------------------------------------------------------------------===//
218//  Bottom-Up Scheduling
219//===----------------------------------------------------------------------===//
220
221/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
222/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
223void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
224  SUnit *PredSU = PredEdge->getSUnit();
225
226#ifndef NDEBUG
227  if (PredSU->NumSuccsLeft == 0) {
228    dbgs() << "*** Scheduling failed! ***\n";
229    PredSU->dump(this);
230    dbgs() << " has been released too many times!\n";
231    llvm_unreachable(0);
232  }
233#endif
234  --PredSU->NumSuccsLeft;
235
236  if (!ForceUnitLatencies()) {
237    // Updating predecessor's height. This is now the cycle when the
238    // predecessor can be scheduled without causing a pipeline stall.
239    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
240  }
241
242  // If all the node's successors are scheduled, this node is ready
243  // to be scheduled. Ignore the special EntrySU node.
244  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
245    PredSU->isAvailable = true;
246    AvailableQueue->push(PredSU);
247  }
248}
249
250void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
251  // Bottom up: release predecessors
252  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
253       I != E; ++I) {
254    ReleasePred(SU, &*I);
255    if (I->isAssignedRegDep()) {
256      // This is a physical register dependency and it's impossible or
257      // expensive to copy the register. Make sure nothing that can
258      // clobber the register is scheduled between the predecessor and
259      // this node.
260      if (!LiveRegDefs[I->getReg()]) {
261        ++NumLiveRegs;
262        LiveRegDefs[I->getReg()] = I->getSUnit();
263        LiveRegCycles[I->getReg()] = CurCycle;
264      }
265    }
266  }
267}
268
269/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
270/// count of its predecessors. If a predecessor pending count is zero, add it to
271/// the Available queue.
272void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
273  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
274  DEBUG(SU->dump(this));
275
276#ifndef NDEBUG
277  if (CurCycle < SU->getHeight())
278    DEBUG(dbgs() << "   Height [" << SU->getHeight() << "] pipeline stall!\n");
279#endif
280
281  // FIXME: Handle noop hazard.
282  SU->setHeightToAtLeast(CurCycle);
283  Sequence.push_back(SU);
284
285  AvailableQueue->ScheduledNode(SU);
286
287  ReleasePredecessors(SU, CurCycle);
288
289  // Release all the implicit physical register defs that are live.
290  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
291       I != E; ++I) {
292    if (I->isAssignedRegDep()) {
293      if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
294        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
295        assert(LiveRegDefs[I->getReg()] == SU &&
296               "Physical register dependency violated?");
297        --NumLiveRegs;
298        LiveRegDefs[I->getReg()] = NULL;
299        LiveRegCycles[I->getReg()] = 0;
300      }
301    }
302  }
303
304  SU->isScheduled = true;
305}
306
307/// CapturePred - This does the opposite of ReleasePred. Since SU is being
308/// unscheduled, incrcease the succ left count of its predecessors. Remove
309/// them from AvailableQueue if necessary.
310void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
311  SUnit *PredSU = PredEdge->getSUnit();
312  if (PredSU->isAvailable) {
313    PredSU->isAvailable = false;
314    if (!PredSU->isPending)
315      AvailableQueue->remove(PredSU);
316  }
317
318  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
319  ++PredSU->NumSuccsLeft;
320}
321
322/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
323/// its predecessor states to reflect the change.
324void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
325  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
326  DEBUG(SU->dump(this));
327
328  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
329       I != E; ++I) {
330    CapturePred(&*I);
331    if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){
332      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
333      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
334             "Physical register dependency violated?");
335      --NumLiveRegs;
336      LiveRegDefs[I->getReg()] = NULL;
337      LiveRegCycles[I->getReg()] = 0;
338    }
339  }
340
341  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
342       I != E; ++I) {
343    if (I->isAssignedRegDep()) {
344      if (!LiveRegDefs[I->getReg()]) {
345        LiveRegDefs[I->getReg()] = SU;
346        ++NumLiveRegs;
347      }
348      if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
349        LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
350    }
351  }
352
353  SU->setHeightDirty();
354  SU->isScheduled = false;
355  SU->isAvailable = true;
356  AvailableQueue->push(SU);
357  AvailableQueue->UnscheduledNode(SU);
358}
359
360/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
361/// BTCycle in order to schedule a specific node.
362void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
363                                          unsigned &CurCycle) {
364  SUnit *OldSU = NULL;
365  while (CurCycle > BtCycle) {
366    OldSU = Sequence.back();
367    Sequence.pop_back();
368    if (SU->isSucc(OldSU))
369      // Don't try to remove SU from AvailableQueue.
370      SU->isAvailable = false;
371    UnscheduleNodeBottomUp(OldSU);
372    --CurCycle;
373    AvailableQueue->setCurCycle(CurCycle);
374  }
375
376  assert(!SU->isSucc(OldSU) && "Something is wrong!");
377
378  ++NumBacktracks;
379}
380
381static bool isOperandOf(const SUnit *SU, SDNode *N) {
382  for (const SDNode *SUNode = SU->getNode(); SUNode;
383       SUNode = SUNode->getFlaggedNode()) {
384    if (SUNode->isOperandOf(N))
385      return true;
386  }
387  return false;
388}
389
390/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
391/// successors to the newly created node.
392SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
393  if (SU->getNode()->getFlaggedNode())
394    return NULL;
395
396  SDNode *N = SU->getNode();
397  if (!N)
398    return NULL;
399
400  SUnit *NewSU;
401  bool TryUnfold = false;
402  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
403    EVT VT = N->getValueType(i);
404    if (VT == MVT::Flag)
405      return NULL;
406    else if (VT == MVT::Other)
407      TryUnfold = true;
408  }
409  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
410    const SDValue &Op = N->getOperand(i);
411    EVT VT = Op.getNode()->getValueType(Op.getResNo());
412    if (VT == MVT::Flag)
413      return NULL;
414  }
415
416  if (TryUnfold) {
417    SmallVector<SDNode*, 2> NewNodes;
418    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
419      return NULL;
420
421    DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
422    assert(NewNodes.size() == 2 && "Expected a load folding node!");
423
424    N = NewNodes[1];
425    SDNode *LoadNode = NewNodes[0];
426    unsigned NumVals = N->getNumValues();
427    unsigned OldNumVals = SU->getNode()->getNumValues();
428    for (unsigned i = 0; i != NumVals; ++i)
429      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
430    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
431                                   SDValue(LoadNode, 1));
432
433    // LoadNode may already exist. This can happen when there is another
434    // load from the same location and producing the same type of value
435    // but it has different alignment or volatileness.
436    bool isNewLoad = true;
437    SUnit *LoadSU;
438    if (LoadNode->getNodeId() != -1) {
439      LoadSU = &SUnits[LoadNode->getNodeId()];
440      isNewLoad = false;
441    } else {
442      LoadSU = CreateNewSUnit(LoadNode);
443      LoadNode->setNodeId(LoadSU->NodeNum);
444      ComputeLatency(LoadSU);
445    }
446
447    SUnit *NewSU = CreateNewSUnit(N);
448    assert(N->getNodeId() == -1 && "Node already inserted!");
449    N->setNodeId(NewSU->NodeNum);
450
451    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
452    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
453      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
454        NewSU->isTwoAddress = true;
455        break;
456      }
457    }
458    if (TID.isCommutable())
459      NewSU->isCommutable = true;
460    ComputeLatency(NewSU);
461
462    // Record all the edges to and from the old SU, by category.
463    SmallVector<SDep, 4> ChainPreds;
464    SmallVector<SDep, 4> ChainSuccs;
465    SmallVector<SDep, 4> LoadPreds;
466    SmallVector<SDep, 4> NodePreds;
467    SmallVector<SDep, 4> NodeSuccs;
468    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
469         I != E; ++I) {
470      if (I->isCtrl())
471        ChainPreds.push_back(*I);
472      else if (isOperandOf(I->getSUnit(), LoadNode))
473        LoadPreds.push_back(*I);
474      else
475        NodePreds.push_back(*I);
476    }
477    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
478         I != E; ++I) {
479      if (I->isCtrl())
480        ChainSuccs.push_back(*I);
481      else
482        NodeSuccs.push_back(*I);
483    }
484
485    // Now assign edges to the newly-created nodes.
486    for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
487      const SDep &Pred = ChainPreds[i];
488      RemovePred(SU, Pred);
489      if (isNewLoad)
490        AddPred(LoadSU, Pred);
491    }
492    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
493      const SDep &Pred = LoadPreds[i];
494      RemovePred(SU, Pred);
495      if (isNewLoad)
496        AddPred(LoadSU, Pred);
497    }
498    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
499      const SDep &Pred = NodePreds[i];
500      RemovePred(SU, Pred);
501      AddPred(NewSU, Pred);
502    }
503    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
504      SDep D = NodeSuccs[i];
505      SUnit *SuccDep = D.getSUnit();
506      D.setSUnit(SU);
507      RemovePred(SuccDep, D);
508      D.setSUnit(NewSU);
509      AddPred(SuccDep, D);
510    }
511    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
512      SDep D = ChainSuccs[i];
513      SUnit *SuccDep = D.getSUnit();
514      D.setSUnit(SU);
515      RemovePred(SuccDep, D);
516      if (isNewLoad) {
517        D.setSUnit(LoadSU);
518        AddPred(SuccDep, D);
519      }
520    }
521
522    // Add a data dependency to reflect that NewSU reads the value defined
523    // by LoadSU.
524    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
525
526    if (isNewLoad)
527      AvailableQueue->addNode(LoadSU);
528    AvailableQueue->addNode(NewSU);
529
530    ++NumUnfolds;
531
532    if (NewSU->NumSuccsLeft == 0) {
533      NewSU->isAvailable = true;
534      return NewSU;
535    }
536    SU = NewSU;
537  }
538
539  DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n");
540  NewSU = CreateClone(SU);
541
542  // New SUnit has the exact same predecessors.
543  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
544       I != E; ++I)
545    if (!I->isArtificial())
546      AddPred(NewSU, *I);
547
548  // Only copy scheduled successors. Cut them from old node's successor
549  // list and move them over.
550  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
551  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
552       I != E; ++I) {
553    if (I->isArtificial())
554      continue;
555    SUnit *SuccSU = I->getSUnit();
556    if (SuccSU->isScheduled) {
557      SDep D = *I;
558      D.setSUnit(NewSU);
559      AddPred(SuccSU, D);
560      D.setSUnit(SU);
561      DelDeps.push_back(std::make_pair(SuccSU, D));
562    }
563  }
564  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
565    RemovePred(DelDeps[i].first, DelDeps[i].second);
566
567  AvailableQueue->updateNode(SU);
568  AvailableQueue->addNode(NewSU);
569
570  ++NumDups;
571  return NewSU;
572}
573
574/// InsertCopiesAndMoveSuccs - Insert register copies and move all
575/// scheduled successors of the given SUnit to the last copy.
576void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
577                                               const TargetRegisterClass *DestRC,
578                                               const TargetRegisterClass *SrcRC,
579                                               SmallVector<SUnit*, 2> &Copies) {
580  SUnit *CopyFromSU = CreateNewSUnit(NULL);
581  CopyFromSU->CopySrcRC = SrcRC;
582  CopyFromSU->CopyDstRC = DestRC;
583
584  SUnit *CopyToSU = CreateNewSUnit(NULL);
585  CopyToSU->CopySrcRC = DestRC;
586  CopyToSU->CopyDstRC = SrcRC;
587
588  // Only copy scheduled successors. Cut them from old node's successor
589  // list and move them over.
590  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
591  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
592       I != E; ++I) {
593    if (I->isArtificial())
594      continue;
595    SUnit *SuccSU = I->getSUnit();
596    if (SuccSU->isScheduled) {
597      SDep D = *I;
598      D.setSUnit(CopyToSU);
599      AddPred(SuccSU, D);
600      DelDeps.push_back(std::make_pair(SuccSU, *I));
601    }
602  }
603  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
604    RemovePred(DelDeps[i].first, DelDeps[i].second);
605
606  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
607  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
608
609  AvailableQueue->updateNode(SU);
610  AvailableQueue->addNode(CopyFromSU);
611  AvailableQueue->addNode(CopyToSU);
612  Copies.push_back(CopyFromSU);
613  Copies.push_back(CopyToSU);
614
615  ++NumPRCopies;
616}
617
618/// getPhysicalRegisterVT - Returns the ValueType of the physical register
619/// definition of the specified node.
620/// FIXME: Move to SelectionDAG?
621static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
622                                 const TargetInstrInfo *TII) {
623  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
624  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
625  unsigned NumRes = TID.getNumDefs();
626  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
627    if (Reg == *ImpDef)
628      break;
629    ++NumRes;
630  }
631  return N->getValueType(NumRes);
632}
633
634/// CheckForLiveRegDef - Return true and update live register vector if the
635/// specified register def of the specified SUnit clobbers any "live" registers.
636static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
637                               std::vector<SUnit*> &LiveRegDefs,
638                               SmallSet<unsigned, 4> &RegAdded,
639                               SmallVector<unsigned, 4> &LRegs,
640                               const TargetRegisterInfo *TRI) {
641  bool Added = false;
642  if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
643    if (RegAdded.insert(Reg)) {
644      LRegs.push_back(Reg);
645      Added = true;
646    }
647  }
648  for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
649    if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
650      if (RegAdded.insert(*Alias)) {
651        LRegs.push_back(*Alias);
652        Added = true;
653      }
654    }
655  return Added;
656}
657
658/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
659/// scheduling of the given node to satisfy live physical register dependencies.
660/// If the specific node is the last one that's available to schedule, do
661/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
662bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
663                                                 SmallVector<unsigned, 4> &LRegs){
664  if (NumLiveRegs == 0)
665    return false;
666
667  SmallSet<unsigned, 4> RegAdded;
668  // If this node would clobber any "live" register, then it's not ready.
669  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
670       I != E; ++I) {
671    if (I->isAssignedRegDep())
672      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
673                         RegAdded, LRegs, TRI);
674  }
675
676  for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
677    if (Node->getOpcode() == ISD::INLINEASM) {
678      // Inline asm can clobber physical defs.
679      unsigned NumOps = Node->getNumOperands();
680      if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
681        --NumOps;  // Ignore the flag operand.
682
683      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
684        unsigned Flags =
685          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
686        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
687
688        ++i; // Skip the ID value.
689        if (InlineAsm::isRegDefKind(Flags) ||
690            InlineAsm::isRegDefEarlyClobberKind(Flags)) {
691          // Check for def of register or earlyclobber register.
692          for (; NumVals; --NumVals, ++i) {
693            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
694            if (TargetRegisterInfo::isPhysicalRegister(Reg))
695              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
696          }
697        } else
698          i += NumVals;
699      }
700      continue;
701    }
702
703    if (!Node->isMachineOpcode())
704      continue;
705    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
706    if (!TID.ImplicitDefs)
707      continue;
708    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
709      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
710  }
711  return !LRegs.empty();
712}
713
714
715/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
716/// schedulers.
717void ScheduleDAGRRList::ListScheduleBottomUp() {
718  unsigned CurCycle = 0;
719
720  // Release any predecessors of the special Exit node.
721  ReleasePredecessors(&ExitSU, CurCycle);
722
723  // Add root to Available queue.
724  if (!SUnits.empty()) {
725    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
726    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
727    RootSU->isAvailable = true;
728    AvailableQueue->push(RootSU);
729  }
730
731  // While Available queue is not empty, grab the node with the highest
732  // priority. If it is not ready put it back.  Schedule the node.
733  SmallVector<SUnit*, 4> NotReady;
734  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
735  Sequence.reserve(SUnits.size());
736  while (!AvailableQueue->empty()) {
737    bool Delayed = false;
738    LRegsMap.clear();
739    SUnit *CurSU = AvailableQueue->pop();
740    while (CurSU) {
741      SmallVector<unsigned, 4> LRegs;
742      if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
743        break;
744      Delayed = true;
745      LRegsMap.insert(std::make_pair(CurSU, LRegs));
746
747      CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
748      NotReady.push_back(CurSU);
749      CurSU = AvailableQueue->pop();
750    }
751
752    // All candidates are delayed due to live physical reg dependencies.
753    // Try backtracking, code duplication, or inserting cross class copies
754    // to resolve it.
755    if (Delayed && !CurSU) {
756      for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
757        SUnit *TrySU = NotReady[i];
758        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
759
760        // Try unscheduling up to the point where it's safe to schedule
761        // this node.
762        unsigned LiveCycle = CurCycle;
763        for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
764          unsigned Reg = LRegs[j];
765          unsigned LCycle = LiveRegCycles[Reg];
766          LiveCycle = std::min(LiveCycle, LCycle);
767        }
768        SUnit *OldSU = Sequence[LiveCycle];
769        if (!WillCreateCycle(TrySU, OldSU))  {
770          BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
771          // Force the current node to be scheduled before the node that
772          // requires the physical reg dep.
773          if (OldSU->isAvailable) {
774            OldSU->isAvailable = false;
775            AvailableQueue->remove(OldSU);
776          }
777          AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
778                              /*Reg=*/0, /*isNormalMemory=*/false,
779                              /*isMustAlias=*/false, /*isArtificial=*/true));
780          // If one or more successors has been unscheduled, then the current
781          // node is no longer avaialable. Schedule a successor that's now
782          // available instead.
783          if (!TrySU->isAvailable)
784            CurSU = AvailableQueue->pop();
785          else {
786            CurSU = TrySU;
787            TrySU->isPending = false;
788            NotReady.erase(NotReady.begin()+i);
789          }
790          break;
791        }
792      }
793
794      if (!CurSU) {
795        // Can't backtrack. If it's too expensive to copy the value, then try
796        // duplicate the nodes that produces these "too expensive to copy"
797        // values to break the dependency. In case even that doesn't work,
798        // insert cross class copies.
799        // If it's not too expensive, i.e. cost != -1, issue copies.
800        SUnit *TrySU = NotReady[0];
801        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
802        assert(LRegs.size() == 1 && "Can't handle this yet!");
803        unsigned Reg = LRegs[0];
804        SUnit *LRDef = LiveRegDefs[Reg];
805        EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
806        const TargetRegisterClass *RC =
807          TRI->getMinimalPhysRegClass(Reg, VT);
808        const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
809
810        // If cross copy register class is null, then it must be possible copy
811        // the value directly. Do not try duplicate the def.
812        SUnit *NewDef = 0;
813        if (DestRC)
814          NewDef = CopyAndMoveSuccessors(LRDef);
815        else
816          DestRC = RC;
817        if (!NewDef) {
818          // Issue copies, these can be expensive cross register class copies.
819          SmallVector<SUnit*, 2> Copies;
820          InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
821          DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
822                       << " to SU #" << Copies.front()->NodeNum << "\n");
823          AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
824                              /*Reg=*/0, /*isNormalMemory=*/false,
825                              /*isMustAlias=*/false,
826                              /*isArtificial=*/true));
827          NewDef = Copies.back();
828        }
829
830        DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
831                     << " to SU #" << TrySU->NodeNum << "\n");
832        LiveRegDefs[Reg] = NewDef;
833        AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
834                             /*Reg=*/0, /*isNormalMemory=*/false,
835                             /*isMustAlias=*/false,
836                             /*isArtificial=*/true));
837        TrySU->isAvailable = false;
838        CurSU = NewDef;
839      }
840
841      assert(CurSU && "Unable to resolve live physical register dependencies!");
842    }
843
844    // Add the nodes that aren't ready back onto the available list.
845    for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
846      NotReady[i]->isPending = false;
847      // May no longer be available due to backtracking.
848      if (NotReady[i]->isAvailable)
849        AvailableQueue->push(NotReady[i]);
850    }
851    NotReady.clear();
852
853    if (CurSU)
854      ScheduleNodeBottomUp(CurSU, CurCycle);
855    ++CurCycle;
856    AvailableQueue->setCurCycle(CurCycle);
857  }
858
859  // Reverse the order if it is bottom up.
860  std::reverse(Sequence.begin(), Sequence.end());
861
862#ifndef NDEBUG
863  VerifySchedule(isBottomUp);
864#endif
865}
866
867//===----------------------------------------------------------------------===//
868//  Top-Down Scheduling
869//===----------------------------------------------------------------------===//
870
871/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
872/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
873void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
874  SUnit *SuccSU = SuccEdge->getSUnit();
875
876#ifndef NDEBUG
877  if (SuccSU->NumPredsLeft == 0) {
878    dbgs() << "*** Scheduling failed! ***\n";
879    SuccSU->dump(this);
880    dbgs() << " has been released too many times!\n";
881    llvm_unreachable(0);
882  }
883#endif
884  --SuccSU->NumPredsLeft;
885
886  // If all the node's predecessors are scheduled, this node is ready
887  // to be scheduled. Ignore the special ExitSU node.
888  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
889    SuccSU->isAvailable = true;
890    AvailableQueue->push(SuccSU);
891  }
892}
893
894void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
895  // Top down: release successors
896  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
897       I != E; ++I) {
898    assert(!I->isAssignedRegDep() &&
899           "The list-tdrr scheduler doesn't yet support physreg dependencies!");
900
901    ReleaseSucc(SU, &*I);
902  }
903}
904
905/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
906/// count of its successors. If a successor pending count is zero, add it to
907/// the Available queue.
908void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
909  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
910  DEBUG(SU->dump(this));
911
912  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
913  SU->setDepthToAtLeast(CurCycle);
914  Sequence.push_back(SU);
915
916  ReleaseSuccessors(SU);
917  SU->isScheduled = true;
918  AvailableQueue->ScheduledNode(SU);
919}
920
921/// ListScheduleTopDown - The main loop of list scheduling for top-down
922/// schedulers.
923void ScheduleDAGRRList::ListScheduleTopDown() {
924  unsigned CurCycle = 0;
925  AvailableQueue->setCurCycle(CurCycle);
926
927  // Release any successors of the special Entry node.
928  ReleaseSuccessors(&EntrySU);
929
930  // All leaves to Available queue.
931  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
932    // It is available if it has no predecessors.
933    if (SUnits[i].Preds.empty()) {
934      AvailableQueue->push(&SUnits[i]);
935      SUnits[i].isAvailable = true;
936    }
937  }
938
939  // While Available queue is not empty, grab the node with the highest
940  // priority. If it is not ready put it back.  Schedule the node.
941  Sequence.reserve(SUnits.size());
942  while (!AvailableQueue->empty()) {
943    SUnit *CurSU = AvailableQueue->pop();
944
945    if (CurSU)
946      ScheduleNodeTopDown(CurSU, CurCycle);
947    ++CurCycle;
948    AvailableQueue->setCurCycle(CurCycle);
949  }
950
951#ifndef NDEBUG
952  VerifySchedule(isBottomUp);
953#endif
954}
955
956
957//===----------------------------------------------------------------------===//
958//                RegReductionPriorityQueue Implementation
959//===----------------------------------------------------------------------===//
960//
961// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
962// to reduce register pressure.
963//
964namespace {
965  template<class SF>
966  class RegReductionPriorityQueue;
967
968  /// bu_ls_rr_sort - Priority function for bottom up register pressure
969  // reduction scheduler.
970  struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
971    RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
972    bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
973    bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
974
975    bool operator()(const SUnit* left, const SUnit* right) const;
976  };
977
978  // td_ls_rr_sort - Priority function for top down register pressure reduction
979  // scheduler.
980  struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
981    RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
982    td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
983    td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
984
985    bool operator()(const SUnit* left, const SUnit* right) const;
986  };
987
988  // src_ls_rr_sort - Priority function for source order scheduler.
989  struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
990    RegReductionPriorityQueue<src_ls_rr_sort> *SPQ;
991    src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq)
992      : SPQ(spq) {}
993    src_ls_rr_sort(const src_ls_rr_sort &RHS)
994      : SPQ(RHS.SPQ) {}
995
996    bool operator()(const SUnit* left, const SUnit* right) const;
997  };
998
999  // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1000  struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1001    RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ;
1002    hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq)
1003      : SPQ(spq) {}
1004    hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
1005      : SPQ(RHS.SPQ) {}
1006
1007    bool operator()(const SUnit* left, const SUnit* right) const;
1008  };
1009
1010  // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1011  // scheduler.
1012  struct ilp_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1013    RegReductionPriorityQueue<ilp_ls_rr_sort> *SPQ;
1014    ilp_ls_rr_sort(RegReductionPriorityQueue<ilp_ls_rr_sort> *spq)
1015      : SPQ(spq) {}
1016    ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
1017      : SPQ(RHS.SPQ) {}
1018
1019    bool operator()(const SUnit* left, const SUnit* right) const;
1020  };
1021}  // end anonymous namespace
1022
1023/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1024/// Smaller number is the higher priority.
1025static unsigned
1026CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1027  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
1028  if (SethiUllmanNumber != 0)
1029    return SethiUllmanNumber;
1030
1031  unsigned Extra = 0;
1032  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1033       I != E; ++I) {
1034    if (I->isCtrl()) continue;  // ignore chain preds
1035    SUnit *PredSU = I->getSUnit();
1036    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
1037    if (PredSethiUllman > SethiUllmanNumber) {
1038      SethiUllmanNumber = PredSethiUllman;
1039      Extra = 0;
1040    } else if (PredSethiUllman == SethiUllmanNumber)
1041      ++Extra;
1042  }
1043
1044  SethiUllmanNumber += Extra;
1045
1046  if (SethiUllmanNumber == 0)
1047    SethiUllmanNumber = 1;
1048
1049  return SethiUllmanNumber;
1050}
1051
1052namespace {
1053  template<class SF>
1054  class RegReductionPriorityQueue : public SchedulingPriorityQueue {
1055    std::vector<SUnit*> Queue;
1056    SF Picker;
1057    unsigned CurQueueId;
1058    bool TracksRegPressure;
1059
1060  protected:
1061    // SUnits - The SUnits for the current graph.
1062    std::vector<SUnit> *SUnits;
1063
1064    MachineFunction &MF;
1065    const TargetInstrInfo *TII;
1066    const TargetRegisterInfo *TRI;
1067    const TargetLowering *TLI;
1068    ScheduleDAGRRList *scheduleDAG;
1069
1070    // SethiUllmanNumbers - The SethiUllman number for each node.
1071    std::vector<unsigned> SethiUllmanNumbers;
1072
1073    /// RegPressure - Tracking current reg pressure per register class.
1074    ///
1075    std::vector<unsigned> RegPressure;
1076
1077    /// RegLimit - Tracking the number of allocatable registers per register
1078    /// class.
1079    std::vector<unsigned> RegLimit;
1080
1081  public:
1082    RegReductionPriorityQueue(MachineFunction &mf,
1083                              bool tracksrp,
1084                              const TargetInstrInfo *tii,
1085                              const TargetRegisterInfo *tri,
1086                              const TargetLowering *tli)
1087      : Picker(this), CurQueueId(0), TracksRegPressure(tracksrp),
1088        MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
1089      if (TracksRegPressure) {
1090        unsigned NumRC = TRI->getNumRegClasses();
1091        RegLimit.resize(NumRC);
1092        RegPressure.resize(NumRC);
1093        std::fill(RegLimit.begin(), RegLimit.end(), 0);
1094        std::fill(RegPressure.begin(), RegPressure.end(), 0);
1095        for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1096               E = TRI->regclass_end(); I != E; ++I)
1097          RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF);
1098      }
1099    }
1100
1101    void initNodes(std::vector<SUnit> &sunits) {
1102      SUnits = &sunits;
1103      // Add pseudo dependency edges for two-address nodes.
1104      AddPseudoTwoAddrDeps();
1105      // Reroute edges to nodes with multiple uses.
1106      PrescheduleNodesWithMultipleUses();
1107      // Calculate node priorities.
1108      CalculateSethiUllmanNumbers();
1109    }
1110
1111    void addNode(const SUnit *SU) {
1112      unsigned SUSize = SethiUllmanNumbers.size();
1113      if (SUnits->size() > SUSize)
1114        SethiUllmanNumbers.resize(SUSize*2, 0);
1115      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1116    }
1117
1118    void updateNode(const SUnit *SU) {
1119      SethiUllmanNumbers[SU->NodeNum] = 0;
1120      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1121    }
1122
1123    void releaseState() {
1124      SUnits = 0;
1125      SethiUllmanNumbers.clear();
1126      std::fill(RegPressure.begin(), RegPressure.end(), 0);
1127    }
1128
1129    unsigned getNodePriority(const SUnit *SU) const {
1130      assert(SU->NodeNum < SethiUllmanNumbers.size());
1131      unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1132      if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1133        // CopyToReg should be close to its uses to facilitate coalescing and
1134        // avoid spilling.
1135        return 0;
1136      if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1137          Opc == TargetOpcode::SUBREG_TO_REG ||
1138          Opc == TargetOpcode::INSERT_SUBREG)
1139        // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1140        // close to their uses to facilitate coalescing.
1141        return 0;
1142      if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1143        // If SU does not have a register use, i.e. it doesn't produce a value
1144        // that would be consumed (e.g. store), then it terminates a chain of
1145        // computation.  Give it a large SethiUllman number so it will be
1146        // scheduled right before its predecessors that it doesn't lengthen
1147        // their live ranges.
1148        return 0xffff;
1149      if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1150        // If SU does not have a register def, schedule it close to its uses
1151        // because it does not lengthen any live ranges.
1152        return 0;
1153      return SethiUllmanNumbers[SU->NodeNum];
1154    }
1155
1156    unsigned getNodeOrdering(const SUnit *SU) const {
1157      return scheduleDAG->DAG->GetOrdering(SU->getNode());
1158    }
1159
1160    bool empty() const { return Queue.empty(); }
1161
1162    void push(SUnit *U) {
1163      assert(!U->NodeQueueId && "Node in the queue already");
1164      U->NodeQueueId = ++CurQueueId;
1165      Queue.push_back(U);
1166    }
1167
1168    SUnit *pop() {
1169      if (empty()) return NULL;
1170      std::vector<SUnit *>::iterator Best = Queue.begin();
1171      for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
1172           E = Queue.end(); I != E; ++I)
1173        if (Picker(*Best, *I))
1174          Best = I;
1175      SUnit *V = *Best;
1176      if (Best != prior(Queue.end()))
1177        std::swap(*Best, Queue.back());
1178      Queue.pop_back();
1179      V->NodeQueueId = 0;
1180      return V;
1181    }
1182
1183    void remove(SUnit *SU) {
1184      assert(!Queue.empty() && "Queue is empty!");
1185      assert(SU->NodeQueueId != 0 && "Not in queue!");
1186      std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
1187                                                   SU);
1188      if (I != prior(Queue.end()))
1189        std::swap(*I, Queue.back());
1190      Queue.pop_back();
1191      SU->NodeQueueId = 0;
1192    }
1193
1194    bool HighRegPressure(const SUnit *SU, unsigned &Excess) const {
1195      if (!TLI)
1196        return false;
1197
1198      bool High = false;
1199      Excess = 0;
1200      for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1201           I != E; ++I) {
1202        if (I->isCtrl())
1203          continue;
1204        SUnit *PredSU = I->getSUnit();
1205        const SDNode *PN = PredSU->getNode();
1206        if (!PN->isMachineOpcode()) {
1207          if (PN->getOpcode() == ISD::CopyFromReg) {
1208            EVT VT = PN->getValueType(0);
1209            unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1210            unsigned Cost = TLI->getRepRegClassCostFor(VT);
1211            if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) {
1212              High = true;
1213              Excess += (RegPressure[RCId] + Cost) - RegLimit[RCId];
1214            }
1215          }
1216          continue;
1217        }
1218        unsigned POpc = PN->getMachineOpcode();
1219        if (POpc == TargetOpcode::IMPLICIT_DEF)
1220          continue;
1221        if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1222          EVT VT = PN->getOperand(0).getValueType();
1223          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1224          unsigned Cost = TLI->getRepRegClassCostFor(VT);
1225          // Check if this increases register pressure of the specific register
1226          // class to the point where it would cause spills.
1227          if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) {
1228            High = true;
1229            Excess += (RegPressure[RCId] + Cost) - RegLimit[RCId];
1230          }
1231          continue;
1232        } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1233                   POpc == TargetOpcode::SUBREG_TO_REG) {
1234          EVT VT = PN->getValueType(0);
1235          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1236          unsigned Cost = TLI->getRepRegClassCostFor(VT);
1237          // Check if this increases register pressure of the specific register
1238          // class to the point where it would cause spills.
1239          if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) {
1240            High = true;
1241            Excess += (RegPressure[RCId] + Cost) - RegLimit[RCId];
1242          }
1243          continue;
1244        }
1245        unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1246        for (unsigned i = 0; i != NumDefs; ++i) {
1247          EVT VT = PN->getValueType(i);
1248          if (!PN->hasAnyUseOfValue(i))
1249            continue;
1250          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1251          unsigned Cost = TLI->getRepRegClassCostFor(VT);
1252          // Check if this increases register pressure of the specific register
1253          // class to the point where it would cause spills.
1254          if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) {
1255            High = true;
1256            Excess += (RegPressure[RCId] + Cost) - RegLimit[RCId];
1257          }
1258        }
1259      }
1260
1261      return High;
1262    }
1263
1264    void ScheduledNode(SUnit *SU) {
1265      if (!TracksRegPressure)
1266        return;
1267
1268      const SDNode *N = SU->getNode();
1269      if (!N->isMachineOpcode()) {
1270        if (N->getOpcode() != ISD::CopyToReg)
1271          return;
1272      } else {
1273        unsigned Opc = N->getMachineOpcode();
1274        if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1275            Opc == TargetOpcode::INSERT_SUBREG ||
1276            Opc == TargetOpcode::SUBREG_TO_REG ||
1277            Opc == TargetOpcode::REG_SEQUENCE ||
1278            Opc == TargetOpcode::IMPLICIT_DEF)
1279          return;
1280      }
1281
1282      for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1283           I != E; ++I) {
1284        if (I->isCtrl())
1285          continue;
1286        SUnit *PredSU = I->getSUnit();
1287        if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
1288          continue;
1289        const SDNode *PN = PredSU->getNode();
1290        if (!PN->isMachineOpcode()) {
1291          if (PN->getOpcode() == ISD::CopyFromReg) {
1292            EVT VT = PN->getValueType(0);
1293            unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1294            RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1295          }
1296          continue;
1297        }
1298        unsigned POpc = PN->getMachineOpcode();
1299        if (POpc == TargetOpcode::IMPLICIT_DEF)
1300          continue;
1301        if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1302          EVT VT = PN->getOperand(0).getValueType();
1303          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1304          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1305          continue;
1306        } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1307                   POpc == TargetOpcode::SUBREG_TO_REG) {
1308          EVT VT = PN->getValueType(0);
1309          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1310          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1311          continue;
1312        }
1313        unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1314        for (unsigned i = 0; i != NumDefs; ++i) {
1315          EVT VT = PN->getValueType(i);
1316          if (!PN->hasAnyUseOfValue(i))
1317            continue;
1318          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1319          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1320        }
1321      }
1322
1323      // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
1324      // may transfer data dependencies to CopyToReg.
1325      if (SU->NumSuccs && N->isMachineOpcode()) {
1326        unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1327        for (unsigned i = 0; i != NumDefs; ++i) {
1328          EVT VT = N->getValueType(i);
1329          if (!N->hasAnyUseOfValue(i))
1330            continue;
1331          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1332          if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
1333            // Register pressure tracking is imprecise. This can happen.
1334            RegPressure[RCId] = 0;
1335          else
1336            RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1337        }
1338      }
1339
1340      dumpRegPressure();
1341    }
1342
1343    void UnscheduledNode(SUnit *SU) {
1344      if (!TracksRegPressure)
1345        return;
1346
1347      const SDNode *N = SU->getNode();
1348      if (!N->isMachineOpcode()) {
1349        if (N->getOpcode() != ISD::CopyToReg)
1350          return;
1351      } else {
1352        unsigned Opc = N->getMachineOpcode();
1353        if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1354            Opc == TargetOpcode::INSERT_SUBREG ||
1355            Opc == TargetOpcode::SUBREG_TO_REG ||
1356            Opc == TargetOpcode::REG_SEQUENCE ||
1357            Opc == TargetOpcode::IMPLICIT_DEF)
1358          return;
1359      }
1360
1361      for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1362           I != E; ++I) {
1363        if (I->isCtrl())
1364          continue;
1365        SUnit *PredSU = I->getSUnit();
1366        if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
1367          continue;
1368        const SDNode *PN = PredSU->getNode();
1369        if (!PN->isMachineOpcode()) {
1370          if (PN->getOpcode() == ISD::CopyFromReg) {
1371            EVT VT = PN->getValueType(0);
1372            unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1373            RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1374          }
1375          continue;
1376        }
1377        unsigned POpc = PN->getMachineOpcode();
1378        if (POpc == TargetOpcode::IMPLICIT_DEF)
1379          continue;
1380        if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1381          EVT VT = PN->getOperand(0).getValueType();
1382          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1383          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1384          continue;
1385        } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1386                   POpc == TargetOpcode::SUBREG_TO_REG) {
1387          EVT VT = PN->getValueType(0);
1388          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1389          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1390          continue;
1391        }
1392        unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1393        for (unsigned i = 0; i != NumDefs; ++i) {
1394          EVT VT = PN->getValueType(i);
1395          if (!PN->hasAnyUseOfValue(i))
1396            continue;
1397          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1398          if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
1399            // Register pressure tracking is imprecise. This can happen.
1400            RegPressure[RCId] = 0;
1401          else
1402            RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1403        }
1404      }
1405
1406      // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
1407      // may transfer data dependencies to CopyToReg.
1408      if (SU->NumSuccs && N->isMachineOpcode()) {
1409        unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1410        for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1411          EVT VT = N->getValueType(i);
1412          if (VT == MVT::Flag || VT == MVT::Other)
1413            continue;
1414          if (!N->hasAnyUseOfValue(i))
1415            continue;
1416          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1417          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1418        }
1419      }
1420
1421      dumpRegPressure();
1422    }
1423
1424    void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1425      scheduleDAG = scheduleDag;
1426    }
1427
1428    void dumpRegPressure() const {
1429      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1430             E = TRI->regclass_end(); I != E; ++I) {
1431        const TargetRegisterClass *RC = *I;
1432        unsigned Id = RC->getID();
1433        unsigned RP = RegPressure[Id];
1434        if (!RP) continue;
1435        DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
1436              << '\n');
1437      }
1438    }
1439
1440  protected:
1441    bool canClobber(const SUnit *SU, const SUnit *Op);
1442    void AddPseudoTwoAddrDeps();
1443    void PrescheduleNodesWithMultipleUses();
1444    void CalculateSethiUllmanNumbers();
1445  };
1446
1447  typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1448    BURegReductionPriorityQueue;
1449
1450  typedef RegReductionPriorityQueue<td_ls_rr_sort>
1451    TDRegReductionPriorityQueue;
1452
1453  typedef RegReductionPriorityQueue<src_ls_rr_sort>
1454    SrcRegReductionPriorityQueue;
1455
1456  typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1457    HybridBURRPriorityQueue;
1458
1459  typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
1460    ILPBURRPriorityQueue;
1461}
1462
1463/// closestSucc - Returns the scheduled cycle of the successor which is
1464/// closest to the current cycle.
1465static unsigned closestSucc(const SUnit *SU) {
1466  unsigned MaxHeight = 0;
1467  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1468       I != E; ++I) {
1469    if (I->isCtrl()) continue;  // ignore chain succs
1470    unsigned Height = I->getSUnit()->getHeight();
1471    // If there are bunch of CopyToRegs stacked up, they should be considered
1472    // to be at the same position.
1473    if (I->getSUnit()->getNode() &&
1474        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
1475      Height = closestSucc(I->getSUnit())+1;
1476    if (Height > MaxHeight)
1477      MaxHeight = Height;
1478  }
1479  return MaxHeight;
1480}
1481
1482/// calcMaxScratches - Returns an cost estimate of the worse case requirement
1483/// for scratch registers, i.e. number of data dependencies.
1484static unsigned calcMaxScratches(const SUnit *SU) {
1485  unsigned Scratches = 0;
1486  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1487       I != E; ++I) {
1488    if (I->isCtrl()) continue;  // ignore chain preds
1489    Scratches++;
1490  }
1491  return Scratches;
1492}
1493
1494template <typename RRSort>
1495static bool BURRSort(const SUnit *left, const SUnit *right,
1496                     const RegReductionPriorityQueue<RRSort> *SPQ) {
1497  unsigned LPriority = SPQ->getNodePriority(left);
1498  unsigned RPriority = SPQ->getNodePriority(right);
1499  if (LPriority != RPriority)
1500    return LPriority > RPriority;
1501
1502  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
1503  // e.g.
1504  // t1 = op t2, c1
1505  // t3 = op t4, c2
1506  //
1507  // and the following instructions are both ready.
1508  // t2 = op c3
1509  // t4 = op c4
1510  //
1511  // Then schedule t2 = op first.
1512  // i.e.
1513  // t4 = op c4
1514  // t2 = op c3
1515  // t1 = op t2, c1
1516  // t3 = op t4, c2
1517  //
1518  // This creates more short live intervals.
1519  unsigned LDist = closestSucc(left);
1520  unsigned RDist = closestSucc(right);
1521  if (LDist != RDist)
1522    return LDist < RDist;
1523
1524  // How many registers becomes live when the node is scheduled.
1525  unsigned LScratch = calcMaxScratches(left);
1526  unsigned RScratch = calcMaxScratches(right);
1527  if (LScratch != RScratch)
1528    return LScratch > RScratch;
1529
1530  if (left->getHeight() != right->getHeight())
1531    return left->getHeight() > right->getHeight();
1532
1533  if (left->getDepth() != right->getDepth())
1534    return left->getDepth() < right->getDepth();
1535
1536  assert(left->NodeQueueId && right->NodeQueueId &&
1537         "NodeQueueId cannot be zero");
1538  return (left->NodeQueueId > right->NodeQueueId);
1539}
1540
1541// Bottom up
1542bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1543  return BURRSort(left, right, SPQ);
1544}
1545
1546// Source order, otherwise bottom up.
1547bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1548  unsigned LOrder = SPQ->getNodeOrdering(left);
1549  unsigned ROrder = SPQ->getNodeOrdering(right);
1550
1551  // Prefer an ordering where the lower the non-zero order number, the higher
1552  // the preference.
1553  if ((LOrder || ROrder) && LOrder != ROrder)
1554    return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
1555
1556  return BURRSort(left, right, SPQ);
1557}
1558
1559bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
1560  unsigned LExcess, RExcess;
1561  bool LHigh = SPQ->HighRegPressure(left, LExcess);
1562  bool RHigh = SPQ->HighRegPressure(right, RExcess);
1563  // Avoid causing spills. If register pressure is high, schedule for
1564  // register pressure reduction.
1565  if (LHigh && !RHigh)
1566    return true;
1567  else if (!LHigh && RHigh)
1568    return false;
1569  else if (LHigh && RHigh) {
1570    if (LExcess > RExcess)
1571      return true;
1572    else if (LExcess < RExcess)
1573      return false;
1574  } else {
1575    // Low register pressure situation, schedule for latency if possible.
1576    bool LStall = left->SchedulingPref == Sched::Latency &&
1577      SPQ->getCurCycle() < left->getHeight();
1578    bool RStall = right->SchedulingPref == Sched::Latency &&
1579      SPQ->getCurCycle() < right->getHeight();
1580    // If scheduling one of the node will cause a pipeline stall, delay it.
1581    // If scheduling either one of the node will cause a pipeline stall, sort
1582    // them according to their height.
1583    // If neither will cause a pipeline stall, try to reduce register pressure.
1584    if (LStall) {
1585      if (!RStall)
1586        return true;
1587      if (left->getHeight() != right->getHeight())
1588        return left->getHeight() > right->getHeight();
1589    } else if (RStall)
1590      return false;
1591
1592    // If either node is scheduling for latency, sort them by height and latency
1593    // first.
1594    if (left->SchedulingPref == Sched::Latency ||
1595        right->SchedulingPref == Sched::Latency) {
1596      if (left->getHeight() != right->getHeight())
1597        return left->getHeight() > right->getHeight();
1598      if (left->Latency != right->Latency)
1599        return left->Latency > right->Latency;
1600    }
1601  }
1602
1603  return BURRSort(left, right, SPQ);
1604}
1605
1606bool ilp_ls_rr_sort::operator()(const SUnit *left,
1607                                const SUnit *right) const {
1608  unsigned LExcess, RExcess;
1609  bool LHigh = SPQ->HighRegPressure(left, LExcess);
1610  bool RHigh = SPQ->HighRegPressure(right, RExcess);
1611  // Avoid causing spills. If register pressure is high, schedule for
1612  // register pressure reduction.
1613  if (LHigh && !RHigh)
1614    return true;
1615  else if (!LHigh && RHigh)
1616    return false;
1617  else if (LHigh && RHigh) {
1618    if (LExcess > RExcess)
1619      return true;
1620    else if (LExcess < RExcess)
1621      return false;
1622  } else {
1623    // Low register pressure situation, schedule to maximize instruction level
1624    // parallelism.
1625    if (left->NumPreds > right->NumPreds)
1626      return false;
1627    else if (left->NumPreds < right->NumPreds)
1628      return false;
1629  }
1630
1631  return BURRSort(left, right, SPQ);
1632}
1633
1634template<class SF>
1635bool
1636RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
1637  if (SU->isTwoAddress) {
1638    unsigned Opc = SU->getNode()->getMachineOpcode();
1639    const TargetInstrDesc &TID = TII->get(Opc);
1640    unsigned NumRes = TID.getNumDefs();
1641    unsigned NumOps = TID.getNumOperands() - NumRes;
1642    for (unsigned i = 0; i != NumOps; ++i) {
1643      if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
1644        SDNode *DU = SU->getNode()->getOperand(i).getNode();
1645        if (DU->getNodeId() != -1 &&
1646            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
1647          return true;
1648      }
1649    }
1650  }
1651  return false;
1652}
1653
1654/// hasCopyToRegUse - Return true if SU has a value successor that is a
1655/// CopyToReg node.
1656static bool hasCopyToRegUse(const SUnit *SU) {
1657  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1658       I != E; ++I) {
1659    if (I->isCtrl()) continue;
1660    const SUnit *SuccSU = I->getSUnit();
1661    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)
1662      return true;
1663  }
1664  return false;
1665}
1666
1667/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
1668/// physical register defs.
1669static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
1670                                  const TargetInstrInfo *TII,
1671                                  const TargetRegisterInfo *TRI) {
1672  SDNode *N = SuccSU->getNode();
1673  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1674  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
1675  assert(ImpDefs && "Caller should check hasPhysRegDefs");
1676  for (const SDNode *SUNode = SU->getNode(); SUNode;
1677       SUNode = SUNode->getFlaggedNode()) {
1678    if (!SUNode->isMachineOpcode())
1679      continue;
1680    const unsigned *SUImpDefs =
1681      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
1682    if (!SUImpDefs)
1683      return false;
1684    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1685      EVT VT = N->getValueType(i);
1686      if (VT == MVT::Flag || VT == MVT::Other)
1687        continue;
1688      if (!N->hasAnyUseOfValue(i))
1689        continue;
1690      unsigned Reg = ImpDefs[i - NumDefs];
1691      for (;*SUImpDefs; ++SUImpDefs) {
1692        unsigned SUReg = *SUImpDefs;
1693        if (TRI->regsOverlap(Reg, SUReg))
1694          return true;
1695      }
1696    }
1697  }
1698  return false;
1699}
1700
1701/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
1702/// are not handled well by the general register pressure reduction
1703/// heuristics. When presented with code like this:
1704///
1705///      N
1706///    / |
1707///   /  |
1708///  U  store
1709///  |
1710/// ...
1711///
1712/// the heuristics tend to push the store up, but since the
1713/// operand of the store has another use (U), this would increase
1714/// the length of that other use (the U->N edge).
1715///
1716/// This function transforms code like the above to route U's
1717/// dependence through the store when possible, like this:
1718///
1719///      N
1720///      ||
1721///      ||
1722///     store
1723///       |
1724///       U
1725///       |
1726///      ...
1727///
1728/// This results in the store being scheduled immediately
1729/// after N, which shortens the U->N live range, reducing
1730/// register pressure.
1731///
1732template<class SF>
1733void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
1734  // Visit all the nodes in topological order, working top-down.
1735  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1736    SUnit *SU = &(*SUnits)[i];
1737    // For now, only look at nodes with no data successors, such as stores.
1738    // These are especially important, due to the heuristics in
1739    // getNodePriority for nodes with no data successors.
1740    if (SU->NumSuccs != 0)
1741      continue;
1742    // For now, only look at nodes with exactly one data predecessor.
1743    if (SU->NumPreds != 1)
1744      continue;
1745    // Avoid prescheduling copies to virtual registers, which don't behave
1746    // like other nodes from the perspective of scheduling heuristics.
1747    if (SDNode *N = SU->getNode())
1748      if (N->getOpcode() == ISD::CopyToReg &&
1749          TargetRegisterInfo::isVirtualRegister
1750            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1751        continue;
1752
1753    // Locate the single data predecessor.
1754    SUnit *PredSU = 0;
1755    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
1756         EE = SU->Preds.end(); II != EE; ++II)
1757      if (!II->isCtrl()) {
1758        PredSU = II->getSUnit();
1759        break;
1760      }
1761    assert(PredSU);
1762
1763    // Don't rewrite edges that carry physregs, because that requires additional
1764    // support infrastructure.
1765    if (PredSU->hasPhysRegDefs)
1766      continue;
1767    // Short-circuit the case where SU is PredSU's only data successor.
1768    if (PredSU->NumSuccs == 1)
1769      continue;
1770    // Avoid prescheduling to copies from virtual registers, which don't behave
1771    // like other nodes from the perspective of scheduling // heuristics.
1772    if (SDNode *N = SU->getNode())
1773      if (N->getOpcode() == ISD::CopyFromReg &&
1774          TargetRegisterInfo::isVirtualRegister
1775            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1776        continue;
1777
1778    // Perform checks on the successors of PredSU.
1779    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
1780         EE = PredSU->Succs.end(); II != EE; ++II) {
1781      SUnit *PredSuccSU = II->getSUnit();
1782      if (PredSuccSU == SU) continue;
1783      // If PredSU has another successor with no data successors, for
1784      // now don't attempt to choose either over the other.
1785      if (PredSuccSU->NumSuccs == 0)
1786        goto outer_loop_continue;
1787      // Don't break physical register dependencies.
1788      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
1789        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
1790          goto outer_loop_continue;
1791      // Don't introduce graph cycles.
1792      if (scheduleDAG->IsReachable(SU, PredSuccSU))
1793        goto outer_loop_continue;
1794    }
1795
1796    // Ok, the transformation is safe and the heuristics suggest it is
1797    // profitable. Update the graph.
1798    DEBUG(dbgs() << "    Prescheduling SU #" << SU->NodeNum
1799                 << " next to PredSU #" << PredSU->NodeNum
1800                 << " to guide scheduling in the presence of multiple uses\n");
1801    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
1802      SDep Edge = PredSU->Succs[i];
1803      assert(!Edge.isAssignedRegDep());
1804      SUnit *SuccSU = Edge.getSUnit();
1805      if (SuccSU != SU) {
1806        Edge.setSUnit(PredSU);
1807        scheduleDAG->RemovePred(SuccSU, Edge);
1808        scheduleDAG->AddPred(SU, Edge);
1809        Edge.setSUnit(SU);
1810        scheduleDAG->AddPred(SuccSU, Edge);
1811        --i;
1812      }
1813    }
1814  outer_loop_continue:;
1815  }
1816}
1817
1818/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
1819/// it as a def&use operand. Add a pseudo control edge from it to the other
1820/// node (if it won't create a cycle) so the two-address one will be scheduled
1821/// first (lower in the schedule). If both nodes are two-address, favor the
1822/// one that has a CopyToReg use (more likely to be a loop induction update).
1823/// If both are two-address, but one is commutable while the other is not
1824/// commutable, favor the one that's not commutable.
1825template<class SF>
1826void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
1827  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1828    SUnit *SU = &(*SUnits)[i];
1829    if (!SU->isTwoAddress)
1830      continue;
1831
1832    SDNode *Node = SU->getNode();
1833    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
1834      continue;
1835
1836    unsigned Opc = Node->getMachineOpcode();
1837    const TargetInstrDesc &TID = TII->get(Opc);
1838    unsigned NumRes = TID.getNumDefs();
1839    unsigned NumOps = TID.getNumOperands() - NumRes;
1840    for (unsigned j = 0; j != NumOps; ++j) {
1841      if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
1842        continue;
1843      SDNode *DU = SU->getNode()->getOperand(j).getNode();
1844      if (DU->getNodeId() == -1)
1845        continue;
1846      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
1847      if (!DUSU) continue;
1848      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
1849           E = DUSU->Succs.end(); I != E; ++I) {
1850        if (I->isCtrl()) continue;
1851        SUnit *SuccSU = I->getSUnit();
1852        if (SuccSU == SU)
1853          continue;
1854        // Be conservative. Ignore if nodes aren't at roughly the same
1855        // depth and height.
1856        if (SuccSU->getHeight() < SU->getHeight() &&
1857            (SU->getHeight() - SuccSU->getHeight()) > 1)
1858          continue;
1859        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
1860        // constrains whatever is using the copy, instead of the copy
1861        // itself. In the case that the copy is coalesced, this
1862        // preserves the intent of the pseudo two-address heurietics.
1863        while (SuccSU->Succs.size() == 1 &&
1864               SuccSU->getNode()->isMachineOpcode() &&
1865               SuccSU->getNode()->getMachineOpcode() ==
1866                 TargetOpcode::COPY_TO_REGCLASS)
1867          SuccSU = SuccSU->Succs.front().getSUnit();
1868        // Don't constrain non-instruction nodes.
1869        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
1870          continue;
1871        // Don't constrain nodes with physical register defs if the
1872        // predecessor can clobber them.
1873        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
1874          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
1875            continue;
1876        }
1877        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
1878        // these may be coalesced away. We want them close to their uses.
1879        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
1880        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
1881            SuccOpc == TargetOpcode::INSERT_SUBREG ||
1882            SuccOpc == TargetOpcode::SUBREG_TO_REG)
1883          continue;
1884        if ((!canClobber(SuccSU, DUSU) ||
1885             (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
1886             (!SU->isCommutable && SuccSU->isCommutable)) &&
1887            !scheduleDAG->IsReachable(SuccSU, SU)) {
1888          DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
1889                       << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
1890          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
1891                                        /*Reg=*/0, /*isNormalMemory=*/false,
1892                                        /*isMustAlias=*/false,
1893                                        /*isArtificial=*/true));
1894        }
1895      }
1896    }
1897  }
1898}
1899
1900/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1901/// scheduling units.
1902template<class SF>
1903void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
1904  SethiUllmanNumbers.assign(SUnits->size(), 0);
1905
1906  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1907    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1908}
1909
1910/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
1911/// predecessors of the successors of the SUnit SU. Stop when the provided
1912/// limit is exceeded.
1913static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
1914                                                    unsigned Limit) {
1915  unsigned Sum = 0;
1916  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1917       I != E; ++I) {
1918    const SUnit *SuccSU = I->getSUnit();
1919    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
1920         EE = SuccSU->Preds.end(); II != EE; ++II) {
1921      SUnit *PredSU = II->getSUnit();
1922      if (!PredSU->isScheduled)
1923        if (++Sum > Limit)
1924          return Sum;
1925    }
1926  }
1927  return Sum;
1928}
1929
1930
1931// Top down
1932bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1933  unsigned LPriority = SPQ->getNodePriority(left);
1934  unsigned RPriority = SPQ->getNodePriority(right);
1935  bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
1936  bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
1937  bool LIsFloater = LIsTarget && left->NumPreds == 0;
1938  bool RIsFloater = RIsTarget && right->NumPreds == 0;
1939  unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
1940  unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
1941
1942  if (left->NumSuccs == 0 && right->NumSuccs != 0)
1943    return false;
1944  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
1945    return true;
1946
1947  if (LIsFloater)
1948    LBonus -= 2;
1949  if (RIsFloater)
1950    RBonus -= 2;
1951  if (left->NumSuccs == 1)
1952    LBonus += 2;
1953  if (right->NumSuccs == 1)
1954    RBonus += 2;
1955
1956  if (LPriority+LBonus != RPriority+RBonus)
1957    return LPriority+LBonus < RPriority+RBonus;
1958
1959  if (left->getDepth() != right->getDepth())
1960    return left->getDepth() < right->getDepth();
1961
1962  if (left->NumSuccsLeft != right->NumSuccsLeft)
1963    return left->NumSuccsLeft > right->NumSuccsLeft;
1964
1965  assert(left->NodeQueueId && right->NodeQueueId &&
1966         "NodeQueueId cannot be zero");
1967  return (left->NodeQueueId > right->NodeQueueId);
1968}
1969
1970//===----------------------------------------------------------------------===//
1971//                         Public Constructor Functions
1972//===----------------------------------------------------------------------===//
1973
1974llvm::ScheduleDAGSDNodes *
1975llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1976  const TargetMachine &TM = IS->TM;
1977  const TargetInstrInfo *TII = TM.getInstrInfo();
1978  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1979
1980  BURegReductionPriorityQueue *PQ =
1981    new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
1982  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
1983  PQ->setScheduleDAG(SD);
1984  return SD;
1985}
1986
1987llvm::ScheduleDAGSDNodes *
1988llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1989  const TargetMachine &TM = IS->TM;
1990  const TargetInstrInfo *TII = TM.getInstrInfo();
1991  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1992
1993  TDRegReductionPriorityQueue *PQ =
1994    new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
1995  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ);
1996  PQ->setScheduleDAG(SD);
1997  return SD;
1998}
1999
2000llvm::ScheduleDAGSDNodes *
2001llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
2002  const TargetMachine &TM = IS->TM;
2003  const TargetInstrInfo *TII = TM.getInstrInfo();
2004  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2005
2006  SrcRegReductionPriorityQueue *PQ =
2007    new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2008  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
2009  PQ->setScheduleDAG(SD);
2010  return SD;
2011}
2012
2013llvm::ScheduleDAGSDNodes *
2014llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
2015  const TargetMachine &TM = IS->TM;
2016  const TargetInstrInfo *TII = TM.getInstrInfo();
2017  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2018  const TargetLowering *TLI = &IS->getTargetLowering();
2019
2020  HybridBURRPriorityQueue *PQ =
2021    new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2022  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
2023  PQ->setScheduleDAG(SD);
2024  return SD;
2025}
2026
2027llvm::ScheduleDAGSDNodes *
2028llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
2029  const TargetMachine &TM = IS->TM;
2030  const TargetInstrInfo *TII = TM.getInstrInfo();
2031  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2032  const TargetLowering *TLI = &IS->getTargetLowering();
2033
2034  ILPBURRPriorityQueue *PQ =
2035    new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2036  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
2037  PQ->setScheduleDAG(SD);
2038  return SD;
2039}
2040