ScheduleDAGRRList.cpp revision f1b4eafbfec976f939ec0ea3e8acf91cef5363e3
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "pre-RA-sched"
19#include "ScheduleDAGSDNodes.h"
20#include "llvm/InlineAsm.h"
21#include "llvm/CodeGen/SchedulerRegistry.h"
22#include "llvm/CodeGen/SelectionDAGISel.h"
23#include "llvm/Target/TargetRegisterInfo.h"
24#include "llvm/Target/TargetData.h"
25#include "llvm/Target/TargetMachine.h"
26#include "llvm/Target/TargetInstrInfo.h"
27#include "llvm/Target/TargetLowering.h"
28#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
30#include "llvm/ADT/STLExtras.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/ErrorHandling.h"
33#include "llvm/Support/raw_ostream.h"
34#include <climits>
35using namespace llvm;
36
37STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
38STATISTIC(NumUnfolds,    "Number of nodes unfolded");
39STATISTIC(NumDups,       "Number of duplicated nodes");
40STATISTIC(NumPRCopies,   "Number of physical register copies");
41
42static RegisterScheduler
43  burrListDAGScheduler("list-burr",
44                       "Bottom-up register reduction list scheduling",
45                       createBURRListDAGScheduler);
46static RegisterScheduler
47  tdrListrDAGScheduler("list-tdrr",
48                       "Top-down register reduction list scheduling",
49                       createTDRRListDAGScheduler);
50static RegisterScheduler
51  sourceListDAGScheduler("source",
52                         "Similar to list-burr but schedules in source "
53                         "order when possible",
54                         createSourceListDAGScheduler);
55
56static RegisterScheduler
57  hybridListDAGScheduler("list-hybrid",
58                         "Bottom-up register pressure aware list scheduling "
59                         "which tries to balance latency and register pressure",
60                         createHybridListDAGScheduler);
61
62static RegisterScheduler
63  ILPListDAGScheduler("list-ilp",
64                      "Bottom-up register pressure aware list scheduling "
65                      "which tries to balance ILP and register pressure",
66                      createILPListDAGScheduler);
67
68namespace {
69//===----------------------------------------------------------------------===//
70/// ScheduleDAGRRList - The actual register reduction list scheduler
71/// implementation.  This supports both top-down and bottom-up scheduling.
72///
73class ScheduleDAGRRList : public ScheduleDAGSDNodes {
74private:
75  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
76  /// it is top-down.
77  bool isBottomUp;
78
79  /// NeedLatency - True if the scheduler will make use of latency information.
80  ///
81  bool NeedLatency;
82
83  /// AvailableQueue - The priority queue to use for the available SUnits.
84  SchedulingPriorityQueue *AvailableQueue;
85
86  /// LiveRegDefs - A set of physical registers and their definition
87  /// that are "live". These nodes must be scheduled before any other nodes that
88  /// modifies the registers can be scheduled.
89  unsigned NumLiveRegs;
90  std::vector<SUnit*> LiveRegDefs;
91  std::vector<unsigned> LiveRegCycles;
92
93  /// Topo - A topological ordering for SUnits which permits fast IsReachable
94  /// and similar queries.
95  ScheduleDAGTopologicalSort Topo;
96
97public:
98  ScheduleDAGRRList(MachineFunction &mf,
99                    bool isbottomup, bool needlatency,
100                    SchedulingPriorityQueue *availqueue)
101    : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency),
102      AvailableQueue(availqueue), Topo(SUnits) {
103    }
104
105  ~ScheduleDAGRRList() {
106    delete AvailableQueue;
107  }
108
109  void Schedule();
110
111  /// IsReachable - Checks if SU is reachable from TargetSU.
112  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
113    return Topo.IsReachable(SU, TargetSU);
114  }
115
116  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
117  /// create a cycle.
118  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
119    return Topo.WillCreateCycle(SU, TargetSU);
120  }
121
122  /// AddPred - adds a predecessor edge to SUnit SU.
123  /// This returns true if this is a new predecessor.
124  /// Updates the topological ordering if required.
125  void AddPred(SUnit *SU, const SDep &D) {
126    Topo.AddPred(SU, D.getSUnit());
127    SU->addPred(D);
128  }
129
130  /// RemovePred - removes a predecessor edge from SUnit SU.
131  /// This returns true if an edge was removed.
132  /// Updates the topological ordering if required.
133  void RemovePred(SUnit *SU, const SDep &D) {
134    Topo.RemovePred(SU, D.getSUnit());
135    SU->removePred(D);
136  }
137
138private:
139  void ReleasePred(SUnit *SU, const SDep *PredEdge);
140  void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
141  void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
142  void ReleaseSuccessors(SUnit *SU);
143  void CapturePred(SDep *PredEdge);
144  void ScheduleNodeBottomUp(SUnit*, unsigned);
145  void ScheduleNodeTopDown(SUnit*, unsigned);
146  void UnscheduleNodeBottomUp(SUnit*);
147  void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
148  SUnit *CopyAndMoveSuccessors(SUnit*);
149  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
150                                const TargetRegisterClass*,
151                                const TargetRegisterClass*,
152                                SmallVector<SUnit*, 2>&);
153  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
154  void ListScheduleTopDown();
155  void ListScheduleBottomUp();
156
157
158  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
159  /// Updates the topological ordering if required.
160  SUnit *CreateNewSUnit(SDNode *N) {
161    unsigned NumSUnits = SUnits.size();
162    SUnit *NewNode = NewSUnit(N);
163    // Update the topological ordering.
164    if (NewNode->NodeNum >= NumSUnits)
165      Topo.InitDAGTopologicalSorting();
166    return NewNode;
167  }
168
169  /// CreateClone - Creates a new SUnit from an existing one.
170  /// Updates the topological ordering if required.
171  SUnit *CreateClone(SUnit *N) {
172    unsigned NumSUnits = SUnits.size();
173    SUnit *NewNode = Clone(N);
174    // Update the topological ordering.
175    if (NewNode->NodeNum >= NumSUnits)
176      Topo.InitDAGTopologicalSorting();
177    return NewNode;
178  }
179
180  /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
181  /// need actual latency information but the hybrid scheduler does.
182  bool ForceUnitLatencies() const {
183    return !NeedLatency;
184  }
185};
186}  // end anonymous namespace
187
188
189/// Schedule - Schedule the DAG using list scheduling.
190void ScheduleDAGRRList::Schedule() {
191  DEBUG(dbgs()
192        << "********** List Scheduling BB#" << BB->getNumber()
193        << " '" << BB->getName() << "' **********\n");
194
195  NumLiveRegs = 0;
196  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
197  LiveRegCycles.resize(TRI->getNumRegs(), 0);
198
199  // Build the scheduling graph.
200  BuildSchedGraph(NULL);
201
202  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
203          SUnits[su].dumpAll(this));
204  Topo.InitDAGTopologicalSorting();
205
206  AvailableQueue->initNodes(SUnits);
207
208  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
209  if (isBottomUp)
210    ListScheduleBottomUp();
211  else
212    ListScheduleTopDown();
213
214  AvailableQueue->releaseState();
215}
216
217//===----------------------------------------------------------------------===//
218//  Bottom-Up Scheduling
219//===----------------------------------------------------------------------===//
220
221/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
222/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
223void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
224  SUnit *PredSU = PredEdge->getSUnit();
225
226#ifndef NDEBUG
227  if (PredSU->NumSuccsLeft == 0) {
228    dbgs() << "*** Scheduling failed! ***\n";
229    PredSU->dump(this);
230    dbgs() << " has been released too many times!\n";
231    llvm_unreachable(0);
232  }
233#endif
234  --PredSU->NumSuccsLeft;
235
236  if (!ForceUnitLatencies()) {
237    // Updating predecessor's height. This is now the cycle when the
238    // predecessor can be scheduled without causing a pipeline stall.
239    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
240  }
241
242  // If all the node's successors are scheduled, this node is ready
243  // to be scheduled. Ignore the special EntrySU node.
244  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
245    PredSU->isAvailable = true;
246    AvailableQueue->push(PredSU);
247  }
248}
249
250void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
251  // Bottom up: release predecessors
252  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
253       I != E; ++I) {
254    ReleasePred(SU, &*I);
255    if (I->isAssignedRegDep()) {
256      // This is a physical register dependency and it's impossible or
257      // expensive to copy the register. Make sure nothing that can
258      // clobber the register is scheduled between the predecessor and
259      // this node.
260      if (!LiveRegDefs[I->getReg()]) {
261        ++NumLiveRegs;
262        LiveRegDefs[I->getReg()] = I->getSUnit();
263        LiveRegCycles[I->getReg()] = CurCycle;
264      }
265    }
266  }
267}
268
269/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
270/// count of its predecessors. If a predecessor pending count is zero, add it to
271/// the Available queue.
272void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
273  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
274  DEBUG(SU->dump(this));
275
276#ifndef NDEBUG
277  if (CurCycle < SU->getHeight())
278    DEBUG(dbgs() << "   Height [" << SU->getHeight() << "] pipeline stall!\n");
279#endif
280
281  // FIXME: Handle noop hazard.
282  SU->setHeightToAtLeast(CurCycle);
283  Sequence.push_back(SU);
284
285  AvailableQueue->ScheduledNode(SU);
286
287  ReleasePredecessors(SU, CurCycle);
288
289  // Release all the implicit physical register defs that are live.
290  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
291       I != E; ++I) {
292    if (I->isAssignedRegDep()) {
293      if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
294        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
295        assert(LiveRegDefs[I->getReg()] == SU &&
296               "Physical register dependency violated?");
297        --NumLiveRegs;
298        LiveRegDefs[I->getReg()] = NULL;
299        LiveRegCycles[I->getReg()] = 0;
300      }
301    }
302  }
303
304  SU->isScheduled = true;
305}
306
307/// CapturePred - This does the opposite of ReleasePred. Since SU is being
308/// unscheduled, incrcease the succ left count of its predecessors. Remove
309/// them from AvailableQueue if necessary.
310void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
311  SUnit *PredSU = PredEdge->getSUnit();
312  if (PredSU->isAvailable) {
313    PredSU->isAvailable = false;
314    if (!PredSU->isPending)
315      AvailableQueue->remove(PredSU);
316  }
317
318  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
319  ++PredSU->NumSuccsLeft;
320}
321
322/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
323/// its predecessor states to reflect the change.
324void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
325  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
326  DEBUG(SU->dump(this));
327
328  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
329       I != E; ++I) {
330    CapturePred(&*I);
331    if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){
332      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
333      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
334             "Physical register dependency violated?");
335      --NumLiveRegs;
336      LiveRegDefs[I->getReg()] = NULL;
337      LiveRegCycles[I->getReg()] = 0;
338    }
339  }
340
341  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
342       I != E; ++I) {
343    if (I->isAssignedRegDep()) {
344      if (!LiveRegDefs[I->getReg()]) {
345        LiveRegDefs[I->getReg()] = SU;
346        ++NumLiveRegs;
347      }
348      if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
349        LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
350    }
351  }
352
353  SU->setHeightDirty();
354  SU->isScheduled = false;
355  SU->isAvailable = true;
356  AvailableQueue->push(SU);
357  AvailableQueue->UnscheduledNode(SU);
358}
359
360/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
361/// BTCycle in order to schedule a specific node.
362void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
363                                          unsigned &CurCycle) {
364  SUnit *OldSU = NULL;
365  while (CurCycle > BtCycle) {
366    OldSU = Sequence.back();
367    Sequence.pop_back();
368    if (SU->isSucc(OldSU))
369      // Don't try to remove SU from AvailableQueue.
370      SU->isAvailable = false;
371    UnscheduleNodeBottomUp(OldSU);
372    --CurCycle;
373    AvailableQueue->setCurCycle(CurCycle);
374  }
375
376  assert(!SU->isSucc(OldSU) && "Something is wrong!");
377
378  ++NumBacktracks;
379}
380
381static bool isOperandOf(const SUnit *SU, SDNode *N) {
382  for (const SDNode *SUNode = SU->getNode(); SUNode;
383       SUNode = SUNode->getFlaggedNode()) {
384    if (SUNode->isOperandOf(N))
385      return true;
386  }
387  return false;
388}
389
390/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
391/// successors to the newly created node.
392SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
393  if (SU->getNode()->getFlaggedNode())
394    return NULL;
395
396  SDNode *N = SU->getNode();
397  if (!N)
398    return NULL;
399
400  SUnit *NewSU;
401  bool TryUnfold = false;
402  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
403    EVT VT = N->getValueType(i);
404    if (VT == MVT::Glue)
405      return NULL;
406    else if (VT == MVT::Other)
407      TryUnfold = true;
408  }
409  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
410    const SDValue &Op = N->getOperand(i);
411    EVT VT = Op.getNode()->getValueType(Op.getResNo());
412    if (VT == MVT::Glue)
413      return NULL;
414  }
415
416  if (TryUnfold) {
417    SmallVector<SDNode*, 2> NewNodes;
418    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
419      return NULL;
420
421    DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
422    assert(NewNodes.size() == 2 && "Expected a load folding node!");
423
424    N = NewNodes[1];
425    SDNode *LoadNode = NewNodes[0];
426    unsigned NumVals = N->getNumValues();
427    unsigned OldNumVals = SU->getNode()->getNumValues();
428    for (unsigned i = 0; i != NumVals; ++i)
429      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
430    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
431                                   SDValue(LoadNode, 1));
432
433    // LoadNode may already exist. This can happen when there is another
434    // load from the same location and producing the same type of value
435    // but it has different alignment or volatileness.
436    bool isNewLoad = true;
437    SUnit *LoadSU;
438    if (LoadNode->getNodeId() != -1) {
439      LoadSU = &SUnits[LoadNode->getNodeId()];
440      isNewLoad = false;
441    } else {
442      LoadSU = CreateNewSUnit(LoadNode);
443      LoadNode->setNodeId(LoadSU->NodeNum);
444      ComputeLatency(LoadSU);
445    }
446
447    SUnit *NewSU = CreateNewSUnit(N);
448    assert(N->getNodeId() == -1 && "Node already inserted!");
449    N->setNodeId(NewSU->NodeNum);
450
451    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
452    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
453      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
454        NewSU->isTwoAddress = true;
455        break;
456      }
457    }
458    if (TID.isCommutable())
459      NewSU->isCommutable = true;
460    ComputeLatency(NewSU);
461
462    // Record all the edges to and from the old SU, by category.
463    SmallVector<SDep, 4> ChainPreds;
464    SmallVector<SDep, 4> ChainSuccs;
465    SmallVector<SDep, 4> LoadPreds;
466    SmallVector<SDep, 4> NodePreds;
467    SmallVector<SDep, 4> NodeSuccs;
468    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
469         I != E; ++I) {
470      if (I->isCtrl())
471        ChainPreds.push_back(*I);
472      else if (isOperandOf(I->getSUnit(), LoadNode))
473        LoadPreds.push_back(*I);
474      else
475        NodePreds.push_back(*I);
476    }
477    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
478         I != E; ++I) {
479      if (I->isCtrl())
480        ChainSuccs.push_back(*I);
481      else
482        NodeSuccs.push_back(*I);
483    }
484
485    // Now assign edges to the newly-created nodes.
486    for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
487      const SDep &Pred = ChainPreds[i];
488      RemovePred(SU, Pred);
489      if (isNewLoad)
490        AddPred(LoadSU, Pred);
491    }
492    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
493      const SDep &Pred = LoadPreds[i];
494      RemovePred(SU, Pred);
495      if (isNewLoad)
496        AddPred(LoadSU, Pred);
497    }
498    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
499      const SDep &Pred = NodePreds[i];
500      RemovePred(SU, Pred);
501      AddPred(NewSU, Pred);
502    }
503    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
504      SDep D = NodeSuccs[i];
505      SUnit *SuccDep = D.getSUnit();
506      D.setSUnit(SU);
507      RemovePred(SuccDep, D);
508      D.setSUnit(NewSU);
509      AddPred(SuccDep, D);
510    }
511    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
512      SDep D = ChainSuccs[i];
513      SUnit *SuccDep = D.getSUnit();
514      D.setSUnit(SU);
515      RemovePred(SuccDep, D);
516      if (isNewLoad) {
517        D.setSUnit(LoadSU);
518        AddPred(SuccDep, D);
519      }
520    }
521
522    // Add a data dependency to reflect that NewSU reads the value defined
523    // by LoadSU.
524    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
525
526    if (isNewLoad)
527      AvailableQueue->addNode(LoadSU);
528    AvailableQueue->addNode(NewSU);
529
530    ++NumUnfolds;
531
532    if (NewSU->NumSuccsLeft == 0) {
533      NewSU->isAvailable = true;
534      return NewSU;
535    }
536    SU = NewSU;
537  }
538
539  DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n");
540  NewSU = CreateClone(SU);
541
542  // New SUnit has the exact same predecessors.
543  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
544       I != E; ++I)
545    if (!I->isArtificial())
546      AddPred(NewSU, *I);
547
548  // Only copy scheduled successors. Cut them from old node's successor
549  // list and move them over.
550  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
551  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
552       I != E; ++I) {
553    if (I->isArtificial())
554      continue;
555    SUnit *SuccSU = I->getSUnit();
556    if (SuccSU->isScheduled) {
557      SDep D = *I;
558      D.setSUnit(NewSU);
559      AddPred(SuccSU, D);
560      D.setSUnit(SU);
561      DelDeps.push_back(std::make_pair(SuccSU, D));
562    }
563  }
564  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
565    RemovePred(DelDeps[i].first, DelDeps[i].second);
566
567  AvailableQueue->updateNode(SU);
568  AvailableQueue->addNode(NewSU);
569
570  ++NumDups;
571  return NewSU;
572}
573
574/// InsertCopiesAndMoveSuccs - Insert register copies and move all
575/// scheduled successors of the given SUnit to the last copy.
576void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
577                                               const TargetRegisterClass *DestRC,
578                                               const TargetRegisterClass *SrcRC,
579                                               SmallVector<SUnit*, 2> &Copies) {
580  SUnit *CopyFromSU = CreateNewSUnit(NULL);
581  CopyFromSU->CopySrcRC = SrcRC;
582  CopyFromSU->CopyDstRC = DestRC;
583
584  SUnit *CopyToSU = CreateNewSUnit(NULL);
585  CopyToSU->CopySrcRC = DestRC;
586  CopyToSU->CopyDstRC = SrcRC;
587
588  // Only copy scheduled successors. Cut them from old node's successor
589  // list and move them over.
590  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
591  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
592       I != E; ++I) {
593    if (I->isArtificial())
594      continue;
595    SUnit *SuccSU = I->getSUnit();
596    if (SuccSU->isScheduled) {
597      SDep D = *I;
598      D.setSUnit(CopyToSU);
599      AddPred(SuccSU, D);
600      DelDeps.push_back(std::make_pair(SuccSU, *I));
601    }
602  }
603  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
604    RemovePred(DelDeps[i].first, DelDeps[i].second);
605
606  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
607  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
608
609  AvailableQueue->updateNode(SU);
610  AvailableQueue->addNode(CopyFromSU);
611  AvailableQueue->addNode(CopyToSU);
612  Copies.push_back(CopyFromSU);
613  Copies.push_back(CopyToSU);
614
615  ++NumPRCopies;
616}
617
618/// getPhysicalRegisterVT - Returns the ValueType of the physical register
619/// definition of the specified node.
620/// FIXME: Move to SelectionDAG?
621static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
622                                 const TargetInstrInfo *TII) {
623  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
624  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
625  unsigned NumRes = TID.getNumDefs();
626  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
627    if (Reg == *ImpDef)
628      break;
629    ++NumRes;
630  }
631  return N->getValueType(NumRes);
632}
633
634/// CheckForLiveRegDef - Return true and update live register vector if the
635/// specified register def of the specified SUnit clobbers any "live" registers.
636static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
637                               std::vector<SUnit*> &LiveRegDefs,
638                               SmallSet<unsigned, 4> &RegAdded,
639                               SmallVector<unsigned, 4> &LRegs,
640                               const TargetRegisterInfo *TRI) {
641  if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
642    if (RegAdded.insert(Reg))
643      LRegs.push_back(Reg);
644  }
645  for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
646    if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
647      if (RegAdded.insert(*Alias))
648        LRegs.push_back(*Alias);
649    }
650}
651
652/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
653/// scheduling of the given node to satisfy live physical register dependencies.
654/// If the specific node is the last one that's available to schedule, do
655/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
656bool ScheduleDAGRRList::
657DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
658  if (NumLiveRegs == 0)
659    return false;
660
661  SmallSet<unsigned, 4> RegAdded;
662  // If this node would clobber any "live" register, then it's not ready.
663  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
664       I != E; ++I) {
665    if (I->isAssignedRegDep())
666      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
667                         RegAdded, LRegs, TRI);
668  }
669
670  for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
671    if (Node->getOpcode() == ISD::INLINEASM) {
672      // Inline asm can clobber physical defs.
673      unsigned NumOps = Node->getNumOperands();
674      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
675        --NumOps;  // Ignore the flag operand.
676
677      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
678        unsigned Flags =
679          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
680        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
681
682        ++i; // Skip the ID value.
683        if (InlineAsm::isRegDefKind(Flags) ||
684            InlineAsm::isRegDefEarlyClobberKind(Flags)) {
685          // Check for def of register or earlyclobber register.
686          for (; NumVals; --NumVals, ++i) {
687            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
688            if (TargetRegisterInfo::isPhysicalRegister(Reg))
689              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
690          }
691        } else
692          i += NumVals;
693      }
694      continue;
695    }
696
697    if (!Node->isMachineOpcode())
698      continue;
699    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
700    if (!TID.ImplicitDefs)
701      continue;
702    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
703      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
704  }
705
706
707  // Okay, we now know all of the live registers that are defined by an
708  // immediate predecessor.  It is ok to kill these registers if we are also
709  // using it.
710  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
711       I != E; ++I) {
712    if (I->isAssignedRegDep() &&
713        LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
714      unsigned Reg = I->getReg();
715      if (RegAdded.erase(Reg))
716        LRegs.erase(std::find(LRegs.begin(), LRegs.end(), Reg));
717    }
718  }
719
720  return !LRegs.empty();
721}
722
723
724/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
725/// schedulers.
726void ScheduleDAGRRList::ListScheduleBottomUp() {
727  unsigned CurCycle = 0;
728
729  // Release any predecessors of the special Exit node.
730  ReleasePredecessors(&ExitSU, CurCycle);
731
732  // Add root to Available queue.
733  if (!SUnits.empty()) {
734    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
735    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
736    RootSU->isAvailable = true;
737    AvailableQueue->push(RootSU);
738  }
739
740  // While Available queue is not empty, grab the node with the highest
741  // priority. If it is not ready put it back.  Schedule the node.
742  SmallVector<SUnit*, 4> NotReady;
743  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
744  Sequence.reserve(SUnits.size());
745  while (!AvailableQueue->empty()) {
746    bool Delayed = false;
747    LRegsMap.clear();
748    SUnit *CurSU = AvailableQueue->pop();
749    while (CurSU) {
750      SmallVector<unsigned, 4> LRegs;
751      if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
752        break;
753      Delayed = true;
754      LRegsMap.insert(std::make_pair(CurSU, LRegs));
755
756      CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
757      NotReady.push_back(CurSU);
758      CurSU = AvailableQueue->pop();
759    }
760
761    // All candidates are delayed due to live physical reg dependencies.
762    // Try backtracking, code duplication, or inserting cross class copies
763    // to resolve it.
764    if (Delayed && !CurSU) {
765      for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
766        SUnit *TrySU = NotReady[i];
767        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
768
769        // Try unscheduling up to the point where it's safe to schedule
770        // this node.
771        unsigned LiveCycle = CurCycle;
772        for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
773          unsigned Reg = LRegs[j];
774          unsigned LCycle = LiveRegCycles[Reg];
775          LiveCycle = std::min(LiveCycle, LCycle);
776        }
777        SUnit *OldSU = Sequence[LiveCycle];
778        if (!WillCreateCycle(TrySU, OldSU))  {
779          BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
780          // Force the current node to be scheduled before the node that
781          // requires the physical reg dep.
782          if (OldSU->isAvailable) {
783            OldSU->isAvailable = false;
784            AvailableQueue->remove(OldSU);
785          }
786          AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
787                              /*Reg=*/0, /*isNormalMemory=*/false,
788                              /*isMustAlias=*/false, /*isArtificial=*/true));
789          // If one or more successors has been unscheduled, then the current
790          // node is no longer avaialable. Schedule a successor that's now
791          // available instead.
792          if (!TrySU->isAvailable)
793            CurSU = AvailableQueue->pop();
794          else {
795            CurSU = TrySU;
796            TrySU->isPending = false;
797            NotReady.erase(NotReady.begin()+i);
798          }
799          break;
800        }
801      }
802
803      if (!CurSU) {
804        // Can't backtrack. If it's too expensive to copy the value, then try
805        // duplicate the nodes that produces these "too expensive to copy"
806        // values to break the dependency. In case even that doesn't work,
807        // insert cross class copies.
808        // If it's not too expensive, i.e. cost != -1, issue copies.
809        SUnit *TrySU = NotReady[0];
810        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
811        assert(LRegs.size() == 1 && "Can't handle this yet!");
812        unsigned Reg = LRegs[0];
813        SUnit *LRDef = LiveRegDefs[Reg];
814        EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
815        const TargetRegisterClass *RC =
816          TRI->getMinimalPhysRegClass(Reg, VT);
817        const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
818
819        // If cross copy register class is null, then it must be possible copy
820        // the value directly. Do not try duplicate the def.
821        SUnit *NewDef = 0;
822        if (DestRC)
823          NewDef = CopyAndMoveSuccessors(LRDef);
824        else
825          DestRC = RC;
826        if (!NewDef) {
827          // Issue copies, these can be expensive cross register class copies.
828          SmallVector<SUnit*, 2> Copies;
829          InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
830          DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
831                       << " to SU #" << Copies.front()->NodeNum << "\n");
832          AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
833                              /*Reg=*/0, /*isNormalMemory=*/false,
834                              /*isMustAlias=*/false,
835                              /*isArtificial=*/true));
836          NewDef = Copies.back();
837        }
838
839        DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
840                     << " to SU #" << TrySU->NodeNum << "\n");
841        LiveRegDefs[Reg] = NewDef;
842        AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
843                             /*Reg=*/0, /*isNormalMemory=*/false,
844                             /*isMustAlias=*/false,
845                             /*isArtificial=*/true));
846        TrySU->isAvailable = false;
847        CurSU = NewDef;
848      }
849
850      assert(CurSU && "Unable to resolve live physical register dependencies!");
851    }
852
853    // Add the nodes that aren't ready back onto the available list.
854    for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
855      NotReady[i]->isPending = false;
856      // May no longer be available due to backtracking.
857      if (NotReady[i]->isAvailable)
858        AvailableQueue->push(NotReady[i]);
859    }
860    NotReady.clear();
861
862    if (CurSU)
863      ScheduleNodeBottomUp(CurSU, CurCycle);
864    ++CurCycle;
865    AvailableQueue->setCurCycle(CurCycle);
866  }
867
868  // Reverse the order if it is bottom up.
869  std::reverse(Sequence.begin(), Sequence.end());
870
871#ifndef NDEBUG
872  VerifySchedule(isBottomUp);
873#endif
874}
875
876//===----------------------------------------------------------------------===//
877//  Top-Down Scheduling
878//===----------------------------------------------------------------------===//
879
880/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
881/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
882void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
883  SUnit *SuccSU = SuccEdge->getSUnit();
884
885#ifndef NDEBUG
886  if (SuccSU->NumPredsLeft == 0) {
887    dbgs() << "*** Scheduling failed! ***\n";
888    SuccSU->dump(this);
889    dbgs() << " has been released too many times!\n";
890    llvm_unreachable(0);
891  }
892#endif
893  --SuccSU->NumPredsLeft;
894
895  // If all the node's predecessors are scheduled, this node is ready
896  // to be scheduled. Ignore the special ExitSU node.
897  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
898    SuccSU->isAvailable = true;
899    AvailableQueue->push(SuccSU);
900  }
901}
902
903void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
904  // Top down: release successors
905  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
906       I != E; ++I) {
907    assert(!I->isAssignedRegDep() &&
908           "The list-tdrr scheduler doesn't yet support physreg dependencies!");
909
910    ReleaseSucc(SU, &*I);
911  }
912}
913
914/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
915/// count of its successors. If a successor pending count is zero, add it to
916/// the Available queue.
917void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
918  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
919  DEBUG(SU->dump(this));
920
921  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
922  SU->setDepthToAtLeast(CurCycle);
923  Sequence.push_back(SU);
924
925  ReleaseSuccessors(SU);
926  SU->isScheduled = true;
927  AvailableQueue->ScheduledNode(SU);
928}
929
930/// ListScheduleTopDown - The main loop of list scheduling for top-down
931/// schedulers.
932void ScheduleDAGRRList::ListScheduleTopDown() {
933  unsigned CurCycle = 0;
934  AvailableQueue->setCurCycle(CurCycle);
935
936  // Release any successors of the special Entry node.
937  ReleaseSuccessors(&EntrySU);
938
939  // All leaves to Available queue.
940  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
941    // It is available if it has no predecessors.
942    if (SUnits[i].Preds.empty()) {
943      AvailableQueue->push(&SUnits[i]);
944      SUnits[i].isAvailable = true;
945    }
946  }
947
948  // While Available queue is not empty, grab the node with the highest
949  // priority. If it is not ready put it back.  Schedule the node.
950  Sequence.reserve(SUnits.size());
951  while (!AvailableQueue->empty()) {
952    SUnit *CurSU = AvailableQueue->pop();
953
954    if (CurSU)
955      ScheduleNodeTopDown(CurSU, CurCycle);
956    ++CurCycle;
957    AvailableQueue->setCurCycle(CurCycle);
958  }
959
960#ifndef NDEBUG
961  VerifySchedule(isBottomUp);
962#endif
963}
964
965
966//===----------------------------------------------------------------------===//
967//                RegReductionPriorityQueue Implementation
968//===----------------------------------------------------------------------===//
969//
970// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
971// to reduce register pressure.
972//
973namespace {
974  template<class SF>
975  class RegReductionPriorityQueue;
976
977  /// bu_ls_rr_sort - Priority function for bottom up register pressure
978  // reduction scheduler.
979  struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
980    RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
981    bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
982    bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
983
984    bool operator()(const SUnit* left, const SUnit* right) const;
985  };
986
987  // td_ls_rr_sort - Priority function for top down register pressure reduction
988  // scheduler.
989  struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
990    RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
991    td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
992    td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
993
994    bool operator()(const SUnit* left, const SUnit* right) const;
995  };
996
997  // src_ls_rr_sort - Priority function for source order scheduler.
998  struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
999    RegReductionPriorityQueue<src_ls_rr_sort> *SPQ;
1000    src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq)
1001      : SPQ(spq) {}
1002    src_ls_rr_sort(const src_ls_rr_sort &RHS)
1003      : SPQ(RHS.SPQ) {}
1004
1005    bool operator()(const SUnit* left, const SUnit* right) const;
1006  };
1007
1008  // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1009  struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1010    RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ;
1011    hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq)
1012      : SPQ(spq) {}
1013    hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
1014      : SPQ(RHS.SPQ) {}
1015
1016    bool operator()(const SUnit* left, const SUnit* right) const;
1017  };
1018
1019  // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1020  // scheduler.
1021  struct ilp_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1022    RegReductionPriorityQueue<ilp_ls_rr_sort> *SPQ;
1023    ilp_ls_rr_sort(RegReductionPriorityQueue<ilp_ls_rr_sort> *spq)
1024      : SPQ(spq) {}
1025    ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
1026      : SPQ(RHS.SPQ) {}
1027
1028    bool operator()(const SUnit* left, const SUnit* right) const;
1029  };
1030}  // end anonymous namespace
1031
1032/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1033/// Smaller number is the higher priority.
1034static unsigned
1035CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1036  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
1037  if (SethiUllmanNumber != 0)
1038    return SethiUllmanNumber;
1039
1040  unsigned Extra = 0;
1041  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1042       I != E; ++I) {
1043    if (I->isCtrl()) continue;  // ignore chain preds
1044    SUnit *PredSU = I->getSUnit();
1045    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
1046    if (PredSethiUllman > SethiUllmanNumber) {
1047      SethiUllmanNumber = PredSethiUllman;
1048      Extra = 0;
1049    } else if (PredSethiUllman == SethiUllmanNumber)
1050      ++Extra;
1051  }
1052
1053  SethiUllmanNumber += Extra;
1054
1055  if (SethiUllmanNumber == 0)
1056    SethiUllmanNumber = 1;
1057
1058  return SethiUllmanNumber;
1059}
1060
1061namespace {
1062  template<class SF>
1063  class RegReductionPriorityQueue : public SchedulingPriorityQueue {
1064    std::vector<SUnit*> Queue;
1065    SF Picker;
1066    unsigned CurQueueId;
1067    bool TracksRegPressure;
1068
1069  protected:
1070    // SUnits - The SUnits for the current graph.
1071    std::vector<SUnit> *SUnits;
1072
1073    MachineFunction &MF;
1074    const TargetInstrInfo *TII;
1075    const TargetRegisterInfo *TRI;
1076    const TargetLowering *TLI;
1077    ScheduleDAGRRList *scheduleDAG;
1078
1079    // SethiUllmanNumbers - The SethiUllman number for each node.
1080    std::vector<unsigned> SethiUllmanNumbers;
1081
1082    /// RegPressure - Tracking current reg pressure per register class.
1083    ///
1084    std::vector<unsigned> RegPressure;
1085
1086    /// RegLimit - Tracking the number of allocatable registers per register
1087    /// class.
1088    std::vector<unsigned> RegLimit;
1089
1090  public:
1091    RegReductionPriorityQueue(MachineFunction &mf,
1092                              bool tracksrp,
1093                              const TargetInstrInfo *tii,
1094                              const TargetRegisterInfo *tri,
1095                              const TargetLowering *tli)
1096      : Picker(this), CurQueueId(0), TracksRegPressure(tracksrp),
1097        MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
1098      if (TracksRegPressure) {
1099        unsigned NumRC = TRI->getNumRegClasses();
1100        RegLimit.resize(NumRC);
1101        RegPressure.resize(NumRC);
1102        std::fill(RegLimit.begin(), RegLimit.end(), 0);
1103        std::fill(RegPressure.begin(), RegPressure.end(), 0);
1104        for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1105               E = TRI->regclass_end(); I != E; ++I)
1106          RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF);
1107      }
1108    }
1109
1110    void initNodes(std::vector<SUnit> &sunits) {
1111      SUnits = &sunits;
1112      // Add pseudo dependency edges for two-address nodes.
1113      AddPseudoTwoAddrDeps();
1114      // Reroute edges to nodes with multiple uses.
1115      PrescheduleNodesWithMultipleUses();
1116      // Calculate node priorities.
1117      CalculateSethiUllmanNumbers();
1118    }
1119
1120    void addNode(const SUnit *SU) {
1121      unsigned SUSize = SethiUllmanNumbers.size();
1122      if (SUnits->size() > SUSize)
1123        SethiUllmanNumbers.resize(SUSize*2, 0);
1124      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1125    }
1126
1127    void updateNode(const SUnit *SU) {
1128      SethiUllmanNumbers[SU->NodeNum] = 0;
1129      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1130    }
1131
1132    void releaseState() {
1133      SUnits = 0;
1134      SethiUllmanNumbers.clear();
1135      std::fill(RegPressure.begin(), RegPressure.end(), 0);
1136    }
1137
1138    unsigned getNodePriority(const SUnit *SU) const {
1139      assert(SU->NodeNum < SethiUllmanNumbers.size());
1140      unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1141      if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1142        // CopyToReg should be close to its uses to facilitate coalescing and
1143        // avoid spilling.
1144        return 0;
1145      if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1146          Opc == TargetOpcode::SUBREG_TO_REG ||
1147          Opc == TargetOpcode::INSERT_SUBREG)
1148        // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1149        // close to their uses to facilitate coalescing.
1150        return 0;
1151      if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1152        // If SU does not have a register use, i.e. it doesn't produce a value
1153        // that would be consumed (e.g. store), then it terminates a chain of
1154        // computation.  Give it a large SethiUllman number so it will be
1155        // scheduled right before its predecessors that it doesn't lengthen
1156        // their live ranges.
1157        return 0xffff;
1158      if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1159        // If SU does not have a register def, schedule it close to its uses
1160        // because it does not lengthen any live ranges.
1161        return 0;
1162      return SethiUllmanNumbers[SU->NodeNum];
1163    }
1164
1165    unsigned getNodeOrdering(const SUnit *SU) const {
1166      return scheduleDAG->DAG->GetOrdering(SU->getNode());
1167    }
1168
1169    bool empty() const { return Queue.empty(); }
1170
1171    void push(SUnit *U) {
1172      assert(!U->NodeQueueId && "Node in the queue already");
1173      U->NodeQueueId = ++CurQueueId;
1174      Queue.push_back(U);
1175    }
1176
1177    SUnit *pop() {
1178      if (empty()) return NULL;
1179      std::vector<SUnit *>::iterator Best = Queue.begin();
1180      for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
1181           E = Queue.end(); I != E; ++I)
1182        if (Picker(*Best, *I))
1183          Best = I;
1184      SUnit *V = *Best;
1185      if (Best != prior(Queue.end()))
1186        std::swap(*Best, Queue.back());
1187      Queue.pop_back();
1188      V->NodeQueueId = 0;
1189      return V;
1190    }
1191
1192    void remove(SUnit *SU) {
1193      assert(!Queue.empty() && "Queue is empty!");
1194      assert(SU->NodeQueueId != 0 && "Not in queue!");
1195      std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
1196                                                   SU);
1197      if (I != prior(Queue.end()))
1198        std::swap(*I, Queue.back());
1199      Queue.pop_back();
1200      SU->NodeQueueId = 0;
1201    }
1202
1203    bool HighRegPressure(const SUnit *SU) const {
1204      if (!TLI)
1205        return false;
1206
1207      for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1208           I != E; ++I) {
1209        if (I->isCtrl())
1210          continue;
1211        SUnit *PredSU = I->getSUnit();
1212        const SDNode *PN = PredSU->getNode();
1213        if (!PN->isMachineOpcode()) {
1214          if (PN->getOpcode() == ISD::CopyFromReg) {
1215            EVT VT = PN->getValueType(0);
1216            unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1217            unsigned Cost = TLI->getRepRegClassCostFor(VT);
1218            if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1219              return true;
1220          }
1221          continue;
1222        }
1223        unsigned POpc = PN->getMachineOpcode();
1224        if (POpc == TargetOpcode::IMPLICIT_DEF)
1225          continue;
1226        if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1227          EVT VT = PN->getOperand(0).getValueType();
1228          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1229          unsigned Cost = TLI->getRepRegClassCostFor(VT);
1230          // Check if this increases register pressure of the specific register
1231          // class to the point where it would cause spills.
1232          if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1233            return true;
1234          continue;
1235        } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1236                   POpc == TargetOpcode::SUBREG_TO_REG) {
1237          EVT VT = PN->getValueType(0);
1238          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1239          unsigned Cost = TLI->getRepRegClassCostFor(VT);
1240          // Check if this increases register pressure of the specific register
1241          // class to the point where it would cause spills.
1242          if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1243            return true;
1244          continue;
1245        }
1246        unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1247        for (unsigned i = 0; i != NumDefs; ++i) {
1248          EVT VT = PN->getValueType(i);
1249          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1250          if (RegPressure[RCId] >= RegLimit[RCId])
1251            return true; // Reg pressure already high.
1252          unsigned Cost = TLI->getRepRegClassCostFor(VT);
1253          if (!PN->hasAnyUseOfValue(i))
1254            continue;
1255          // Check if this increases register pressure of the specific register
1256          // class to the point where it would cause spills.
1257          if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1258            return true;
1259        }
1260      }
1261
1262      return false;
1263    }
1264
1265    void ScheduledNode(SUnit *SU) {
1266      if (!TracksRegPressure)
1267        return;
1268
1269      const SDNode *N = SU->getNode();
1270      if (!N->isMachineOpcode()) {
1271        if (N->getOpcode() != ISD::CopyToReg)
1272          return;
1273      } else {
1274        unsigned Opc = N->getMachineOpcode();
1275        if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1276            Opc == TargetOpcode::INSERT_SUBREG ||
1277            Opc == TargetOpcode::SUBREG_TO_REG ||
1278            Opc == TargetOpcode::REG_SEQUENCE ||
1279            Opc == TargetOpcode::IMPLICIT_DEF)
1280          return;
1281      }
1282
1283      for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1284           I != E; ++I) {
1285        if (I->isCtrl())
1286          continue;
1287        SUnit *PredSU = I->getSUnit();
1288        if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
1289          continue;
1290        const SDNode *PN = PredSU->getNode();
1291        if (!PN->isMachineOpcode()) {
1292          if (PN->getOpcode() == ISD::CopyFromReg) {
1293            EVT VT = PN->getValueType(0);
1294            unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1295            RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1296          }
1297          continue;
1298        }
1299        unsigned POpc = PN->getMachineOpcode();
1300        if (POpc == TargetOpcode::IMPLICIT_DEF)
1301          continue;
1302        if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1303          EVT VT = PN->getOperand(0).getValueType();
1304          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1305          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1306          continue;
1307        } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1308                   POpc == TargetOpcode::SUBREG_TO_REG) {
1309          EVT VT = PN->getValueType(0);
1310          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1311          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1312          continue;
1313        }
1314        unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1315        for (unsigned i = 0; i != NumDefs; ++i) {
1316          EVT VT = PN->getValueType(i);
1317          if (!PN->hasAnyUseOfValue(i))
1318            continue;
1319          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1320          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1321        }
1322      }
1323
1324      // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
1325      // may transfer data dependencies to CopyToReg.
1326      if (SU->NumSuccs && N->isMachineOpcode()) {
1327        unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1328        for (unsigned i = 0; i != NumDefs; ++i) {
1329          EVT VT = N->getValueType(i);
1330          if (!N->hasAnyUseOfValue(i))
1331            continue;
1332          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1333          if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
1334            // Register pressure tracking is imprecise. This can happen.
1335            RegPressure[RCId] = 0;
1336          else
1337            RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1338        }
1339      }
1340
1341      dumpRegPressure();
1342    }
1343
1344    void UnscheduledNode(SUnit *SU) {
1345      if (!TracksRegPressure)
1346        return;
1347
1348      const SDNode *N = SU->getNode();
1349      if (!N->isMachineOpcode()) {
1350        if (N->getOpcode() != ISD::CopyToReg)
1351          return;
1352      } else {
1353        unsigned Opc = N->getMachineOpcode();
1354        if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1355            Opc == TargetOpcode::INSERT_SUBREG ||
1356            Opc == TargetOpcode::SUBREG_TO_REG ||
1357            Opc == TargetOpcode::REG_SEQUENCE ||
1358            Opc == TargetOpcode::IMPLICIT_DEF)
1359          return;
1360      }
1361
1362      for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1363           I != E; ++I) {
1364        if (I->isCtrl())
1365          continue;
1366        SUnit *PredSU = I->getSUnit();
1367        if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
1368          continue;
1369        const SDNode *PN = PredSU->getNode();
1370        if (!PN->isMachineOpcode()) {
1371          if (PN->getOpcode() == ISD::CopyFromReg) {
1372            EVT VT = PN->getValueType(0);
1373            unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1374            RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1375          }
1376          continue;
1377        }
1378        unsigned POpc = PN->getMachineOpcode();
1379        if (POpc == TargetOpcode::IMPLICIT_DEF)
1380          continue;
1381        if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1382          EVT VT = PN->getOperand(0).getValueType();
1383          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1384          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1385          continue;
1386        } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1387                   POpc == TargetOpcode::SUBREG_TO_REG) {
1388          EVT VT = PN->getValueType(0);
1389          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1390          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1391          continue;
1392        }
1393        unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1394        for (unsigned i = 0; i != NumDefs; ++i) {
1395          EVT VT = PN->getValueType(i);
1396          if (!PN->hasAnyUseOfValue(i))
1397            continue;
1398          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1399          if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
1400            // Register pressure tracking is imprecise. This can happen.
1401            RegPressure[RCId] = 0;
1402          else
1403            RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1404        }
1405      }
1406
1407      // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
1408      // may transfer data dependencies to CopyToReg.
1409      if (SU->NumSuccs && N->isMachineOpcode()) {
1410        unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1411        for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1412          EVT VT = N->getValueType(i);
1413          if (VT == MVT::Glue || VT == MVT::Other)
1414            continue;
1415          if (!N->hasAnyUseOfValue(i))
1416            continue;
1417          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1418          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1419        }
1420      }
1421
1422      dumpRegPressure();
1423    }
1424
1425    void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1426      scheduleDAG = scheduleDag;
1427    }
1428
1429    void dumpRegPressure() const {
1430      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1431             E = TRI->regclass_end(); I != E; ++I) {
1432        const TargetRegisterClass *RC = *I;
1433        unsigned Id = RC->getID();
1434        unsigned RP = RegPressure[Id];
1435        if (!RP) continue;
1436        DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
1437              << '\n');
1438      }
1439    }
1440
1441  protected:
1442    bool canClobber(const SUnit *SU, const SUnit *Op);
1443    void AddPseudoTwoAddrDeps();
1444    void PrescheduleNodesWithMultipleUses();
1445    void CalculateSethiUllmanNumbers();
1446  };
1447
1448  typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1449    BURegReductionPriorityQueue;
1450
1451  typedef RegReductionPriorityQueue<td_ls_rr_sort>
1452    TDRegReductionPriorityQueue;
1453
1454  typedef RegReductionPriorityQueue<src_ls_rr_sort>
1455    SrcRegReductionPriorityQueue;
1456
1457  typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1458    HybridBURRPriorityQueue;
1459
1460  typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
1461    ILPBURRPriorityQueue;
1462}
1463
1464/// closestSucc - Returns the scheduled cycle of the successor which is
1465/// closest to the current cycle.
1466static unsigned closestSucc(const SUnit *SU) {
1467  unsigned MaxHeight = 0;
1468  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1469       I != E; ++I) {
1470    if (I->isCtrl()) continue;  // ignore chain succs
1471    unsigned Height = I->getSUnit()->getHeight();
1472    // If there are bunch of CopyToRegs stacked up, they should be considered
1473    // to be at the same position.
1474    if (I->getSUnit()->getNode() &&
1475        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
1476      Height = closestSucc(I->getSUnit())+1;
1477    if (Height > MaxHeight)
1478      MaxHeight = Height;
1479  }
1480  return MaxHeight;
1481}
1482
1483/// calcMaxScratches - Returns an cost estimate of the worse case requirement
1484/// for scratch registers, i.e. number of data dependencies.
1485static unsigned calcMaxScratches(const SUnit *SU) {
1486  unsigned Scratches = 0;
1487  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1488       I != E; ++I) {
1489    if (I->isCtrl()) continue;  // ignore chain preds
1490    Scratches++;
1491  }
1492  return Scratches;
1493}
1494
1495/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a
1496/// CopyToReg to a virtual register. This SU def is probably a liveout and
1497/// it has no other use. It should be scheduled closer to the terminator.
1498static bool hasOnlyLiveOutUses(const SUnit *SU) {
1499  bool RetVal = false;
1500  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1501       I != E; ++I) {
1502    if (I->isCtrl()) continue;
1503    const SUnit *SuccSU = I->getSUnit();
1504    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
1505      unsigned Reg =
1506        cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
1507      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
1508        RetVal = true;
1509        continue;
1510      }
1511    }
1512    return false;
1513  }
1514  return RetVal;
1515}
1516
1517/// UnitsSharePred - Return true if the two scheduling units share a common
1518/// data predecessor.
1519static bool UnitsSharePred(const SUnit *left, const SUnit *right) {
1520  SmallSet<const SUnit*, 4> Preds;
1521  for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end();
1522       I != E; ++I) {
1523    if (I->isCtrl()) continue;  // ignore chain preds
1524    Preds.insert(I->getSUnit());
1525  }
1526  for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end();
1527       I != E; ++I) {
1528    if (I->isCtrl()) continue;  // ignore chain preds
1529    if (Preds.count(I->getSUnit()))
1530      return true;
1531  }
1532  return false;
1533}
1534
1535template <typename RRSort>
1536static bool BURRSort(const SUnit *left, const SUnit *right,
1537                     const RegReductionPriorityQueue<RRSort> *SPQ) {
1538  unsigned LPriority = SPQ->getNodePriority(left);
1539  unsigned RPriority = SPQ->getNodePriority(right);
1540  if (LPriority != RPriority)
1541    return LPriority > RPriority;
1542
1543  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
1544  // e.g.
1545  // t1 = op t2, c1
1546  // t3 = op t4, c2
1547  //
1548  // and the following instructions are both ready.
1549  // t2 = op c3
1550  // t4 = op c4
1551  //
1552  // Then schedule t2 = op first.
1553  // i.e.
1554  // t4 = op c4
1555  // t2 = op c3
1556  // t1 = op t2, c1
1557  // t3 = op t4, c2
1558  //
1559  // This creates more short live intervals.
1560  unsigned LDist = closestSucc(left);
1561  unsigned RDist = closestSucc(right);
1562  if (LDist != RDist)
1563    return LDist < RDist;
1564
1565  // How many registers becomes live when the node is scheduled.
1566  unsigned LScratch = calcMaxScratches(left);
1567  unsigned RScratch = calcMaxScratches(right);
1568  if (LScratch != RScratch)
1569    return LScratch > RScratch;
1570
1571  if (left->getHeight() != right->getHeight())
1572    return left->getHeight() > right->getHeight();
1573
1574  if (left->getDepth() != right->getDepth())
1575    return left->getDepth() < right->getDepth();
1576
1577  assert(left->NodeQueueId && right->NodeQueueId &&
1578         "NodeQueueId cannot be zero");
1579  return (left->NodeQueueId > right->NodeQueueId);
1580}
1581
1582// Bottom up
1583bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1584  return BURRSort(left, right, SPQ);
1585}
1586
1587// Source order, otherwise bottom up.
1588bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1589  unsigned LOrder = SPQ->getNodeOrdering(left);
1590  unsigned ROrder = SPQ->getNodeOrdering(right);
1591
1592  // Prefer an ordering where the lower the non-zero order number, the higher
1593  // the preference.
1594  if ((LOrder || ROrder) && LOrder != ROrder)
1595    return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
1596
1597  return BURRSort(left, right, SPQ);
1598}
1599
1600bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
1601  if (left->isCall || right->isCall)
1602    // No way to compute latency of calls.
1603    return BURRSort(left, right, SPQ);
1604
1605  bool LHigh = SPQ->HighRegPressure(left);
1606  bool RHigh = SPQ->HighRegPressure(right);
1607  // Avoid causing spills. If register pressure is high, schedule for
1608  // register pressure reduction.
1609  if (LHigh && !RHigh)
1610    return true;
1611  else if (!LHigh && RHigh)
1612    return false;
1613  else if (!LHigh && !RHigh) {
1614    // If the two nodes share an operand and one of them has a single
1615    // use that is a live out copy, favor the one that is live out. Otherwise
1616    // it will be difficult to eliminate the copy if the instruction is a
1617    // loop induction variable update. e.g.
1618    // BB:
1619    // sub r1, r3, #1
1620    // str r0, [r2, r3]
1621    // mov r3, r1
1622    // cmp
1623    // bne BB
1624    bool SharePred = UnitsSharePred(left, right);
1625    // FIXME: Only adjust if BB is a loop back edge.
1626    // FIXME: What's the cost of a copy?
1627    int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0;
1628    int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0;
1629    int LHeight = (int)left->getHeight() - LBonus;
1630    int RHeight = (int)right->getHeight() - RBonus;
1631
1632    // Low register pressure situation, schedule for latency if possible.
1633    bool LStall = left->SchedulingPref == Sched::Latency &&
1634      (int)SPQ->getCurCycle() < LHeight;
1635    bool RStall = right->SchedulingPref == Sched::Latency &&
1636      (int)SPQ->getCurCycle() < RHeight;
1637    // If scheduling one of the node will cause a pipeline stall, delay it.
1638    // If scheduling either one of the node will cause a pipeline stall, sort
1639    // them according to their height.
1640    if (LStall) {
1641      if (!RStall)
1642        return true;
1643      if (LHeight != RHeight)
1644        return LHeight > RHeight;
1645    } else if (RStall)
1646      return false;
1647
1648    // If either node is scheduling for latency, sort them by height
1649    // and latency.
1650    if (left->SchedulingPref == Sched::Latency ||
1651        right->SchedulingPref == Sched::Latency) {
1652      if (LHeight != RHeight)
1653        return LHeight > RHeight;
1654      if (left->Latency != right->Latency)
1655        return left->Latency > right->Latency;
1656    }
1657  }
1658
1659  return BURRSort(left, right, SPQ);
1660}
1661
1662bool ilp_ls_rr_sort::operator()(const SUnit *left,
1663                                const SUnit *right) const {
1664  if (left->isCall || right->isCall)
1665    // No way to compute latency of calls.
1666    return BURRSort(left, right, SPQ);
1667
1668  bool LHigh = SPQ->HighRegPressure(left);
1669  bool RHigh = SPQ->HighRegPressure(right);
1670  // Avoid causing spills. If register pressure is high, schedule for
1671  // register pressure reduction.
1672  if (LHigh && !RHigh)
1673    return true;
1674  else if (!LHigh && RHigh)
1675    return false;
1676  else if (!LHigh && !RHigh) {
1677    // Low register pressure situation, schedule to maximize instruction level
1678    // parallelism.
1679    if (left->NumPreds > right->NumPreds)
1680      return false;
1681    else if (left->NumPreds < right->NumPreds)
1682      return false;
1683  }
1684
1685  return BURRSort(left, right, SPQ);
1686}
1687
1688template<class SF>
1689bool
1690RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
1691  if (SU->isTwoAddress) {
1692    unsigned Opc = SU->getNode()->getMachineOpcode();
1693    const TargetInstrDesc &TID = TII->get(Opc);
1694    unsigned NumRes = TID.getNumDefs();
1695    unsigned NumOps = TID.getNumOperands() - NumRes;
1696    for (unsigned i = 0; i != NumOps; ++i) {
1697      if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
1698        SDNode *DU = SU->getNode()->getOperand(i).getNode();
1699        if (DU->getNodeId() != -1 &&
1700            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
1701          return true;
1702      }
1703    }
1704  }
1705  return false;
1706}
1707
1708/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
1709/// physical register defs.
1710static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
1711                                  const TargetInstrInfo *TII,
1712                                  const TargetRegisterInfo *TRI) {
1713  SDNode *N = SuccSU->getNode();
1714  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1715  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
1716  assert(ImpDefs && "Caller should check hasPhysRegDefs");
1717  for (const SDNode *SUNode = SU->getNode(); SUNode;
1718       SUNode = SUNode->getFlaggedNode()) {
1719    if (!SUNode->isMachineOpcode())
1720      continue;
1721    const unsigned *SUImpDefs =
1722      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
1723    if (!SUImpDefs)
1724      return false;
1725    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1726      EVT VT = N->getValueType(i);
1727      if (VT == MVT::Glue || VT == MVT::Other)
1728        continue;
1729      if (!N->hasAnyUseOfValue(i))
1730        continue;
1731      unsigned Reg = ImpDefs[i - NumDefs];
1732      for (;*SUImpDefs; ++SUImpDefs) {
1733        unsigned SUReg = *SUImpDefs;
1734        if (TRI->regsOverlap(Reg, SUReg))
1735          return true;
1736      }
1737    }
1738  }
1739  return false;
1740}
1741
1742/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
1743/// are not handled well by the general register pressure reduction
1744/// heuristics. When presented with code like this:
1745///
1746///      N
1747///    / |
1748///   /  |
1749///  U  store
1750///  |
1751/// ...
1752///
1753/// the heuristics tend to push the store up, but since the
1754/// operand of the store has another use (U), this would increase
1755/// the length of that other use (the U->N edge).
1756///
1757/// This function transforms code like the above to route U's
1758/// dependence through the store when possible, like this:
1759///
1760///      N
1761///      ||
1762///      ||
1763///     store
1764///       |
1765///       U
1766///       |
1767///      ...
1768///
1769/// This results in the store being scheduled immediately
1770/// after N, which shortens the U->N live range, reducing
1771/// register pressure.
1772///
1773template<class SF>
1774void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
1775  // Visit all the nodes in topological order, working top-down.
1776  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1777    SUnit *SU = &(*SUnits)[i];
1778    // For now, only look at nodes with no data successors, such as stores.
1779    // These are especially important, due to the heuristics in
1780    // getNodePriority for nodes with no data successors.
1781    if (SU->NumSuccs != 0)
1782      continue;
1783    // For now, only look at nodes with exactly one data predecessor.
1784    if (SU->NumPreds != 1)
1785      continue;
1786    // Avoid prescheduling copies to virtual registers, which don't behave
1787    // like other nodes from the perspective of scheduling heuristics.
1788    if (SDNode *N = SU->getNode())
1789      if (N->getOpcode() == ISD::CopyToReg &&
1790          TargetRegisterInfo::isVirtualRegister
1791            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1792        continue;
1793
1794    // Locate the single data predecessor.
1795    SUnit *PredSU = 0;
1796    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
1797         EE = SU->Preds.end(); II != EE; ++II)
1798      if (!II->isCtrl()) {
1799        PredSU = II->getSUnit();
1800        break;
1801      }
1802    assert(PredSU);
1803
1804    // Don't rewrite edges that carry physregs, because that requires additional
1805    // support infrastructure.
1806    if (PredSU->hasPhysRegDefs)
1807      continue;
1808    // Short-circuit the case where SU is PredSU's only data successor.
1809    if (PredSU->NumSuccs == 1)
1810      continue;
1811    // Avoid prescheduling to copies from virtual registers, which don't behave
1812    // like other nodes from the perspective of scheduling // heuristics.
1813    if (SDNode *N = SU->getNode())
1814      if (N->getOpcode() == ISD::CopyFromReg &&
1815          TargetRegisterInfo::isVirtualRegister
1816            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1817        continue;
1818
1819    // Perform checks on the successors of PredSU.
1820    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
1821         EE = PredSU->Succs.end(); II != EE; ++II) {
1822      SUnit *PredSuccSU = II->getSUnit();
1823      if (PredSuccSU == SU) continue;
1824      // If PredSU has another successor with no data successors, for
1825      // now don't attempt to choose either over the other.
1826      if (PredSuccSU->NumSuccs == 0)
1827        goto outer_loop_continue;
1828      // Don't break physical register dependencies.
1829      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
1830        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
1831          goto outer_loop_continue;
1832      // Don't introduce graph cycles.
1833      if (scheduleDAG->IsReachable(SU, PredSuccSU))
1834        goto outer_loop_continue;
1835    }
1836
1837    // Ok, the transformation is safe and the heuristics suggest it is
1838    // profitable. Update the graph.
1839    DEBUG(dbgs() << "    Prescheduling SU #" << SU->NodeNum
1840                 << " next to PredSU #" << PredSU->NodeNum
1841                 << " to guide scheduling in the presence of multiple uses\n");
1842    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
1843      SDep Edge = PredSU->Succs[i];
1844      assert(!Edge.isAssignedRegDep());
1845      SUnit *SuccSU = Edge.getSUnit();
1846      if (SuccSU != SU) {
1847        Edge.setSUnit(PredSU);
1848        scheduleDAG->RemovePred(SuccSU, Edge);
1849        scheduleDAG->AddPred(SU, Edge);
1850        Edge.setSUnit(SU);
1851        scheduleDAG->AddPred(SuccSU, Edge);
1852        --i;
1853      }
1854    }
1855  outer_loop_continue:;
1856  }
1857}
1858
1859/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
1860/// it as a def&use operand. Add a pseudo control edge from it to the other
1861/// node (if it won't create a cycle) so the two-address one will be scheduled
1862/// first (lower in the schedule). If both nodes are two-address, favor the
1863/// one that has a CopyToReg use (more likely to be a loop induction update).
1864/// If both are two-address, but one is commutable while the other is not
1865/// commutable, favor the one that's not commutable.
1866template<class SF>
1867void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
1868  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1869    SUnit *SU = &(*SUnits)[i];
1870    if (!SU->isTwoAddress)
1871      continue;
1872
1873    SDNode *Node = SU->getNode();
1874    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
1875      continue;
1876
1877    bool isLiveOut = hasOnlyLiveOutUses(SU);
1878    unsigned Opc = Node->getMachineOpcode();
1879    const TargetInstrDesc &TID = TII->get(Opc);
1880    unsigned NumRes = TID.getNumDefs();
1881    unsigned NumOps = TID.getNumOperands() - NumRes;
1882    for (unsigned j = 0; j != NumOps; ++j) {
1883      if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
1884        continue;
1885      SDNode *DU = SU->getNode()->getOperand(j).getNode();
1886      if (DU->getNodeId() == -1)
1887        continue;
1888      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
1889      if (!DUSU) continue;
1890      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
1891           E = DUSU->Succs.end(); I != E; ++I) {
1892        if (I->isCtrl()) continue;
1893        SUnit *SuccSU = I->getSUnit();
1894        if (SuccSU == SU)
1895          continue;
1896        // Be conservative. Ignore if nodes aren't at roughly the same
1897        // depth and height.
1898        if (SuccSU->getHeight() < SU->getHeight() &&
1899            (SU->getHeight() - SuccSU->getHeight()) > 1)
1900          continue;
1901        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
1902        // constrains whatever is using the copy, instead of the copy
1903        // itself. In the case that the copy is coalesced, this
1904        // preserves the intent of the pseudo two-address heurietics.
1905        while (SuccSU->Succs.size() == 1 &&
1906               SuccSU->getNode()->isMachineOpcode() &&
1907               SuccSU->getNode()->getMachineOpcode() ==
1908                 TargetOpcode::COPY_TO_REGCLASS)
1909          SuccSU = SuccSU->Succs.front().getSUnit();
1910        // Don't constrain non-instruction nodes.
1911        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
1912          continue;
1913        // Don't constrain nodes with physical register defs if the
1914        // predecessor can clobber them.
1915        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
1916          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
1917            continue;
1918        }
1919        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
1920        // these may be coalesced away. We want them close to their uses.
1921        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
1922        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
1923            SuccOpc == TargetOpcode::INSERT_SUBREG ||
1924            SuccOpc == TargetOpcode::SUBREG_TO_REG)
1925          continue;
1926        if ((!canClobber(SuccSU, DUSU) ||
1927             (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
1928             (!SU->isCommutable && SuccSU->isCommutable)) &&
1929            !scheduleDAG->IsReachable(SuccSU, SU)) {
1930          DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
1931                       << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
1932          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
1933                                        /*Reg=*/0, /*isNormalMemory=*/false,
1934                                        /*isMustAlias=*/false,
1935                                        /*isArtificial=*/true));
1936        }
1937      }
1938    }
1939  }
1940}
1941
1942/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1943/// scheduling units.
1944template<class SF>
1945void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
1946  SethiUllmanNumbers.assign(SUnits->size(), 0);
1947
1948  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1949    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1950}
1951
1952/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
1953/// predecessors of the successors of the SUnit SU. Stop when the provided
1954/// limit is exceeded.
1955static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
1956                                                    unsigned Limit) {
1957  unsigned Sum = 0;
1958  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1959       I != E; ++I) {
1960    const SUnit *SuccSU = I->getSUnit();
1961    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
1962         EE = SuccSU->Preds.end(); II != EE; ++II) {
1963      SUnit *PredSU = II->getSUnit();
1964      if (!PredSU->isScheduled)
1965        if (++Sum > Limit)
1966          return Sum;
1967    }
1968  }
1969  return Sum;
1970}
1971
1972
1973// Top down
1974bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1975  unsigned LPriority = SPQ->getNodePriority(left);
1976  unsigned RPriority = SPQ->getNodePriority(right);
1977  bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
1978  bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
1979  bool LIsFloater = LIsTarget && left->NumPreds == 0;
1980  bool RIsFloater = RIsTarget && right->NumPreds == 0;
1981  unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
1982  unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
1983
1984  if (left->NumSuccs == 0 && right->NumSuccs != 0)
1985    return false;
1986  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
1987    return true;
1988
1989  if (LIsFloater)
1990    LBonus -= 2;
1991  if (RIsFloater)
1992    RBonus -= 2;
1993  if (left->NumSuccs == 1)
1994    LBonus += 2;
1995  if (right->NumSuccs == 1)
1996    RBonus += 2;
1997
1998  if (LPriority+LBonus != RPriority+RBonus)
1999    return LPriority+LBonus < RPriority+RBonus;
2000
2001  if (left->getDepth() != right->getDepth())
2002    return left->getDepth() < right->getDepth();
2003
2004  if (left->NumSuccsLeft != right->NumSuccsLeft)
2005    return left->NumSuccsLeft > right->NumSuccsLeft;
2006
2007  assert(left->NodeQueueId && right->NodeQueueId &&
2008         "NodeQueueId cannot be zero");
2009  return (left->NodeQueueId > right->NodeQueueId);
2010}
2011
2012//===----------------------------------------------------------------------===//
2013//                         Public Constructor Functions
2014//===----------------------------------------------------------------------===//
2015
2016llvm::ScheduleDAGSDNodes *
2017llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
2018  const TargetMachine &TM = IS->TM;
2019  const TargetInstrInfo *TII = TM.getInstrInfo();
2020  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2021
2022  BURegReductionPriorityQueue *PQ =
2023    new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2024  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
2025  PQ->setScheduleDAG(SD);
2026  return SD;
2027}
2028
2029llvm::ScheduleDAGSDNodes *
2030llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
2031  const TargetMachine &TM = IS->TM;
2032  const TargetInstrInfo *TII = TM.getInstrInfo();
2033  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2034
2035  TDRegReductionPriorityQueue *PQ =
2036    new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2037  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ);
2038  PQ->setScheduleDAG(SD);
2039  return SD;
2040}
2041
2042llvm::ScheduleDAGSDNodes *
2043llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
2044  const TargetMachine &TM = IS->TM;
2045  const TargetInstrInfo *TII = TM.getInstrInfo();
2046  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2047
2048  SrcRegReductionPriorityQueue *PQ =
2049    new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2050  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
2051  PQ->setScheduleDAG(SD);
2052  return SD;
2053}
2054
2055llvm::ScheduleDAGSDNodes *
2056llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
2057  const TargetMachine &TM = IS->TM;
2058  const TargetInstrInfo *TII = TM.getInstrInfo();
2059  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2060  const TargetLowering *TLI = &IS->getTargetLowering();
2061
2062  HybridBURRPriorityQueue *PQ =
2063    new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2064  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
2065  PQ->setScheduleDAG(SD);
2066  return SD;
2067}
2068
2069llvm::ScheduleDAGSDNodes *
2070llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
2071  const TargetMachine &TM = IS->TM;
2072  const TargetInstrInfo *TII = TM.getInstrInfo();
2073  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2074  const TargetLowering *TLI = &IS->getTargetLowering();
2075
2076  ILPBURRPriorityQueue *PQ =
2077    new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2078  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
2079  PQ->setScheduleDAG(SD);
2080  return SD;
2081}
2082