1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/CodeGen/SchedulerRegistry.h"
19#include "ScheduleDAGSDNodes.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
24#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
25#include "llvm/CodeGen/SelectionDAGISel.h"
26#include "llvm/IR/DataLayout.h"
27#include "llvm/IR/InlineAsm.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/ErrorHandling.h"
30#include "llvm/Support/raw_ostream.h"
31#include "llvm/Target/TargetInstrInfo.h"
32#include "llvm/Target/TargetLowering.h"
33#include "llvm/Target/TargetRegisterInfo.h"
34#include "llvm/Target/TargetSubtargetInfo.h"
35#include <climits>
36using namespace llvm;
37
38#define DEBUG_TYPE "pre-RA-sched"
39
40STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
41STATISTIC(NumUnfolds,    "Number of nodes unfolded");
42STATISTIC(NumDups,       "Number of duplicated nodes");
43STATISTIC(NumPRCopies,   "Number of physical register copies");
44
45static RegisterScheduler
46  burrListDAGScheduler("list-burr",
47                       "Bottom-up register reduction list scheduling",
48                       createBURRListDAGScheduler);
49static RegisterScheduler
50  sourceListDAGScheduler("source",
51                         "Similar to list-burr but schedules in source "
52                         "order when possible",
53                         createSourceListDAGScheduler);
54
55static RegisterScheduler
56  hybridListDAGScheduler("list-hybrid",
57                         "Bottom-up register pressure aware list scheduling "
58                         "which tries to balance latency and register pressure",
59                         createHybridListDAGScheduler);
60
61static RegisterScheduler
62  ILPListDAGScheduler("list-ilp",
63                      "Bottom-up register pressure aware list scheduling "
64                      "which tries to balance ILP and register pressure",
65                      createILPListDAGScheduler);
66
67static cl::opt<bool> DisableSchedCycles(
68  "disable-sched-cycles", cl::Hidden, cl::init(false),
69  cl::desc("Disable cycle-level precision during preRA scheduling"));
70
71// Temporary sched=list-ilp flags until the heuristics are robust.
72// Some options are also available under sched=list-hybrid.
73static cl::opt<bool> DisableSchedRegPressure(
74  "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
75  cl::desc("Disable regpressure priority in sched=list-ilp"));
76static cl::opt<bool> DisableSchedLiveUses(
77  "disable-sched-live-uses", cl::Hidden, cl::init(true),
78  cl::desc("Disable live use priority in sched=list-ilp"));
79static cl::opt<bool> DisableSchedVRegCycle(
80  "disable-sched-vrcycle", cl::Hidden, cl::init(false),
81  cl::desc("Disable virtual register cycle interference checks"));
82static cl::opt<bool> DisableSchedPhysRegJoin(
83  "disable-sched-physreg-join", cl::Hidden, cl::init(false),
84  cl::desc("Disable physreg def-use affinity"));
85static cl::opt<bool> DisableSchedStalls(
86  "disable-sched-stalls", cl::Hidden, cl::init(true),
87  cl::desc("Disable no-stall priority in sched=list-ilp"));
88static cl::opt<bool> DisableSchedCriticalPath(
89  "disable-sched-critical-path", cl::Hidden, cl::init(false),
90  cl::desc("Disable critical path priority in sched=list-ilp"));
91static cl::opt<bool> DisableSchedHeight(
92  "disable-sched-height", cl::Hidden, cl::init(false),
93  cl::desc("Disable scheduled-height priority in sched=list-ilp"));
94static cl::opt<bool> Disable2AddrHack(
95  "disable-2addr-hack", cl::Hidden, cl::init(true),
96  cl::desc("Disable scheduler's two-address hack"));
97
98static cl::opt<int> MaxReorderWindow(
99  "max-sched-reorder", cl::Hidden, cl::init(6),
100  cl::desc("Number of instructions to allow ahead of the critical path "
101           "in sched=list-ilp"));
102
103static cl::opt<unsigned> AvgIPC(
104  "sched-avg-ipc", cl::Hidden, cl::init(1),
105  cl::desc("Average inst/cycle whan no target itinerary exists."));
106
107namespace {
108//===----------------------------------------------------------------------===//
109/// ScheduleDAGRRList - The actual register reduction list scheduler
110/// implementation.  This supports both top-down and bottom-up scheduling.
111///
112class ScheduleDAGRRList : public ScheduleDAGSDNodes {
113private:
114  /// NeedLatency - True if the scheduler will make use of latency information.
115  ///
116  bool NeedLatency;
117
118  /// AvailableQueue - The priority queue to use for the available SUnits.
119  SchedulingPriorityQueue *AvailableQueue;
120
121  /// PendingQueue - This contains all of the instructions whose operands have
122  /// been issued, but their results are not ready yet (due to the latency of
123  /// the operation).  Once the operands becomes available, the instruction is
124  /// added to the AvailableQueue.
125  std::vector<SUnit*> PendingQueue;
126
127  /// HazardRec - The hazard recognizer to use.
128  ScheduleHazardRecognizer *HazardRec;
129
130  /// CurCycle - The current scheduler state corresponds to this cycle.
131  unsigned CurCycle;
132
133  /// MinAvailableCycle - Cycle of the soonest available instruction.
134  unsigned MinAvailableCycle;
135
136  /// IssueCount - Count instructions issued in this cycle
137  /// Currently valid only for bottom-up scheduling.
138  unsigned IssueCount;
139
140  /// LiveRegDefs - A set of physical registers and their definition
141  /// that are "live". These nodes must be scheduled before any other nodes that
142  /// modifies the registers can be scheduled.
143  unsigned NumLiveRegs;
144  std::unique_ptr<SUnit*[]> LiveRegDefs;
145  std::unique_ptr<SUnit*[]> LiveRegGens;
146
147  // Collect interferences between physical register use/defs.
148  // Each interference is an SUnit and set of physical registers.
149  SmallVector<SUnit*, 4> Interferences;
150  typedef DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMapT;
151  LRegsMapT LRegsMap;
152
153  /// Topo - A topological ordering for SUnits which permits fast IsReachable
154  /// and similar queries.
155  ScheduleDAGTopologicalSort Topo;
156
157  // Hack to keep track of the inverse of FindCallSeqStart without more crazy
158  // DAG crawling.
159  DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
160
161public:
162  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
163                    SchedulingPriorityQueue *availqueue,
164                    CodeGenOpt::Level OptLevel)
165    : ScheduleDAGSDNodes(mf),
166      NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
167      Topo(SUnits, nullptr) {
168
169    const TargetSubtargetInfo &STI = mf.getSubtarget();
170    if (DisableSchedCycles || !NeedLatency)
171      HazardRec = new ScheduleHazardRecognizer();
172    else
173      HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
174  }
175
176  ~ScheduleDAGRRList() override {
177    delete HazardRec;
178    delete AvailableQueue;
179  }
180
181  void Schedule() override;
182
183  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
184
185  /// IsReachable - Checks if SU is reachable from TargetSU.
186  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
187    return Topo.IsReachable(SU, TargetSU);
188  }
189
190  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
191  /// create a cycle.
192  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
193    return Topo.WillCreateCycle(SU, TargetSU);
194  }
195
196  /// AddPred - adds a predecessor edge to SUnit SU.
197  /// This returns true if this is a new predecessor.
198  /// Updates the topological ordering if required.
199  void AddPred(SUnit *SU, const SDep &D) {
200    Topo.AddPred(SU, D.getSUnit());
201    SU->addPred(D);
202  }
203
204  /// RemovePred - removes a predecessor edge from SUnit SU.
205  /// This returns true if an edge was removed.
206  /// Updates the topological ordering if required.
207  void RemovePred(SUnit *SU, const SDep &D) {
208    Topo.RemovePred(SU, D.getSUnit());
209    SU->removePred(D);
210  }
211
212private:
213  bool isReady(SUnit *SU) {
214    return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
215      AvailableQueue->isReady(SU);
216  }
217
218  void ReleasePred(SUnit *SU, const SDep *PredEdge);
219  void ReleasePredecessors(SUnit *SU);
220  void ReleasePending();
221  void AdvanceToCycle(unsigned NextCycle);
222  void AdvancePastStalls(SUnit *SU);
223  void EmitNode(SUnit *SU);
224  void ScheduleNodeBottomUp(SUnit*);
225  void CapturePred(SDep *PredEdge);
226  void UnscheduleNodeBottomUp(SUnit*);
227  void RestoreHazardCheckerBottomUp();
228  void BacktrackBottomUp(SUnit*, SUnit*);
229  SUnit *CopyAndMoveSuccessors(SUnit*);
230  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
231                                const TargetRegisterClass*,
232                                const TargetRegisterClass*,
233                                SmallVectorImpl<SUnit*>&);
234  bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
235
236  void releaseInterferences(unsigned Reg = 0);
237
238  SUnit *PickNodeToScheduleBottomUp();
239  void ListScheduleBottomUp();
240
241  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
242  /// Updates the topological ordering if required.
243  SUnit *CreateNewSUnit(SDNode *N) {
244    unsigned NumSUnits = SUnits.size();
245    SUnit *NewNode = newSUnit(N);
246    // Update the topological ordering.
247    if (NewNode->NodeNum >= NumSUnits)
248      Topo.InitDAGTopologicalSorting();
249    return NewNode;
250  }
251
252  /// CreateClone - Creates a new SUnit from an existing one.
253  /// Updates the topological ordering if required.
254  SUnit *CreateClone(SUnit *N) {
255    unsigned NumSUnits = SUnits.size();
256    SUnit *NewNode = Clone(N);
257    // Update the topological ordering.
258    if (NewNode->NodeNum >= NumSUnits)
259      Topo.InitDAGTopologicalSorting();
260    return NewNode;
261  }
262
263  /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
264  /// need actual latency information but the hybrid scheduler does.
265  bool forceUnitLatencies() const override {
266    return !NeedLatency;
267  }
268};
269}  // end anonymous namespace
270
271/// GetCostForDef - Looks up the register class and cost for a given definition.
272/// Typically this just means looking up the representative register class,
273/// but for untyped values (MVT::Untyped) it means inspecting the node's
274/// opcode to determine what register class is being generated.
275static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
276                          const TargetLowering *TLI,
277                          const TargetInstrInfo *TII,
278                          const TargetRegisterInfo *TRI,
279                          unsigned &RegClass, unsigned &Cost,
280                          const MachineFunction &MF) {
281  MVT VT = RegDefPos.GetValue();
282
283  // Special handling for untyped values.  These values can only come from
284  // the expansion of custom DAG-to-DAG patterns.
285  if (VT == MVT::Untyped) {
286    const SDNode *Node = RegDefPos.GetNode();
287
288    // Special handling for CopyFromReg of untyped values.
289    if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
290      unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
291      const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
292      RegClass = RC->getID();
293      Cost = 1;
294      return;
295    }
296
297    unsigned Opcode = Node->getMachineOpcode();
298    if (Opcode == TargetOpcode::REG_SEQUENCE) {
299      unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
300      const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
301      RegClass = RC->getID();
302      Cost = 1;
303      return;
304    }
305
306    unsigned Idx = RegDefPos.GetIdx();
307    const MCInstrDesc Desc = TII->get(Opcode);
308    const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
309    RegClass = RC->getID();
310    // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
311    // better way to determine it.
312    Cost = 1;
313  } else {
314    RegClass = TLI->getRepRegClassFor(VT)->getID();
315    Cost = TLI->getRepRegClassCostFor(VT);
316  }
317}
318
319/// Schedule - Schedule the DAG using list scheduling.
320void ScheduleDAGRRList::Schedule() {
321  DEBUG(dbgs()
322        << "********** List Scheduling BB#" << BB->getNumber()
323        << " '" << BB->getName() << "' **********\n");
324
325  CurCycle = 0;
326  IssueCount = 0;
327  MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
328  NumLiveRegs = 0;
329  // Allocate slots for each physical register, plus one for a special register
330  // to track the virtual resource of a calling sequence.
331  LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
332  LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
333  CallSeqEndForStart.clear();
334  assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
335
336  // Build the scheduling graph.
337  BuildSchedGraph(nullptr);
338
339  DEBUG(for (SUnit &SU : SUnits)
340          SU.dumpAll(this));
341  Topo.InitDAGTopologicalSorting();
342
343  AvailableQueue->initNodes(SUnits);
344
345  HazardRec->Reset();
346
347  // Execute the actual scheduling loop.
348  ListScheduleBottomUp();
349
350  AvailableQueue->releaseState();
351
352  DEBUG({
353      dbgs() << "*** Final schedule ***\n";
354      dumpSchedule();
355      dbgs() << '\n';
356    });
357}
358
359//===----------------------------------------------------------------------===//
360//  Bottom-Up Scheduling
361//===----------------------------------------------------------------------===//
362
363/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
364/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
365void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
366  SUnit *PredSU = PredEdge->getSUnit();
367
368#ifndef NDEBUG
369  if (PredSU->NumSuccsLeft == 0) {
370    dbgs() << "*** Scheduling failed! ***\n";
371    PredSU->dump(this);
372    dbgs() << " has been released too many times!\n";
373    llvm_unreachable(nullptr);
374  }
375#endif
376  --PredSU->NumSuccsLeft;
377
378  if (!forceUnitLatencies()) {
379    // Updating predecessor's height. This is now the cycle when the
380    // predecessor can be scheduled without causing a pipeline stall.
381    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
382  }
383
384  // If all the node's successors are scheduled, this node is ready
385  // to be scheduled. Ignore the special EntrySU node.
386  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
387    PredSU->isAvailable = true;
388
389    unsigned Height = PredSU->getHeight();
390    if (Height < MinAvailableCycle)
391      MinAvailableCycle = Height;
392
393    if (isReady(PredSU)) {
394      AvailableQueue->push(PredSU);
395    }
396    // CapturePred and others may have left the node in the pending queue, avoid
397    // adding it twice.
398    else if (!PredSU->isPending) {
399      PredSU->isPending = true;
400      PendingQueue.push_back(PredSU);
401    }
402  }
403}
404
405/// IsChainDependent - Test if Outer is reachable from Inner through
406/// chain dependencies.
407static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
408                             unsigned NestLevel,
409                             const TargetInstrInfo *TII) {
410  SDNode *N = Outer;
411  for (;;) {
412    if (N == Inner)
413      return true;
414    // For a TokenFactor, examine each operand. There may be multiple ways
415    // to get to the CALLSEQ_BEGIN, but we need to find the path with the
416    // most nesting in order to ensure that we find the corresponding match.
417    if (N->getOpcode() == ISD::TokenFactor) {
418      for (const SDValue &Op : N->op_values())
419        if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII))
420          return true;
421      return false;
422    }
423    // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
424    if (N->isMachineOpcode()) {
425      if (N->getMachineOpcode() ==
426          (unsigned)TII->getCallFrameDestroyOpcode()) {
427        ++NestLevel;
428      } else if (N->getMachineOpcode() ==
429                 (unsigned)TII->getCallFrameSetupOpcode()) {
430        if (NestLevel == 0)
431          return false;
432        --NestLevel;
433      }
434    }
435    // Otherwise, find the chain and continue climbing.
436    for (const SDValue &Op : N->op_values())
437      if (Op.getValueType() == MVT::Other) {
438        N = Op.getNode();
439        goto found_chain_operand;
440      }
441    return false;
442  found_chain_operand:;
443    if (N->getOpcode() == ISD::EntryToken)
444      return false;
445  }
446}
447
448/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
449/// the corresponding (lowered) CALLSEQ_BEGIN node.
450///
451/// NestLevel and MaxNested are used in recursion to indcate the current level
452/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
453/// level seen so far.
454///
455/// TODO: It would be better to give CALLSEQ_END an explicit operand to point
456/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
457static SDNode *
458FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
459                 const TargetInstrInfo *TII) {
460  for (;;) {
461    // For a TokenFactor, examine each operand. There may be multiple ways
462    // to get to the CALLSEQ_BEGIN, but we need to find the path with the
463    // most nesting in order to ensure that we find the corresponding match.
464    if (N->getOpcode() == ISD::TokenFactor) {
465      SDNode *Best = nullptr;
466      unsigned BestMaxNest = MaxNest;
467      for (const SDValue &Op : N->op_values()) {
468        unsigned MyNestLevel = NestLevel;
469        unsigned MyMaxNest = MaxNest;
470        if (SDNode *New = FindCallSeqStart(Op.getNode(),
471                                           MyNestLevel, MyMaxNest, TII))
472          if (!Best || (MyMaxNest > BestMaxNest)) {
473            Best = New;
474            BestMaxNest = MyMaxNest;
475          }
476      }
477      assert(Best);
478      MaxNest = BestMaxNest;
479      return Best;
480    }
481    // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
482    if (N->isMachineOpcode()) {
483      if (N->getMachineOpcode() ==
484          (unsigned)TII->getCallFrameDestroyOpcode()) {
485        ++NestLevel;
486        MaxNest = std::max(MaxNest, NestLevel);
487      } else if (N->getMachineOpcode() ==
488                 (unsigned)TII->getCallFrameSetupOpcode()) {
489        assert(NestLevel != 0);
490        --NestLevel;
491        if (NestLevel == 0)
492          return N;
493      }
494    }
495    // Otherwise, find the chain and continue climbing.
496    for (const SDValue &Op : N->op_values())
497      if (Op.getValueType() == MVT::Other) {
498        N = Op.getNode();
499        goto found_chain_operand;
500      }
501    return nullptr;
502  found_chain_operand:;
503    if (N->getOpcode() == ISD::EntryToken)
504      return nullptr;
505  }
506}
507
508/// Call ReleasePred for each predecessor, then update register live def/gen.
509/// Always update LiveRegDefs for a register dependence even if the current SU
510/// also defines the register. This effectively create one large live range
511/// across a sequence of two-address node. This is important because the
512/// entire chain must be scheduled together. Example:
513///
514/// flags = (3) add
515/// flags = (2) addc flags
516/// flags = (1) addc flags
517///
518/// results in
519///
520/// LiveRegDefs[flags] = 3
521/// LiveRegGens[flags] = 1
522///
523/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
524/// interference on flags.
525void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
526  // Bottom up: release predecessors
527  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
528       I != E; ++I) {
529    ReleasePred(SU, &*I);
530    if (I->isAssignedRegDep()) {
531      // This is a physical register dependency and it's impossible or
532      // expensive to copy the register. Make sure nothing that can
533      // clobber the register is scheduled between the predecessor and
534      // this node.
535      SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
536      assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
537             "interference on register dependence");
538      LiveRegDefs[I->getReg()] = I->getSUnit();
539      if (!LiveRegGens[I->getReg()]) {
540        ++NumLiveRegs;
541        LiveRegGens[I->getReg()] = SU;
542      }
543    }
544  }
545
546  // If we're scheduling a lowered CALLSEQ_END, find the corresponding
547  // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
548  // these nodes, to prevent other calls from being interscheduled with them.
549  unsigned CallResource = TRI->getNumRegs();
550  if (!LiveRegDefs[CallResource])
551    for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
552      if (Node->isMachineOpcode() &&
553          Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
554        unsigned NestLevel = 0;
555        unsigned MaxNest = 0;
556        SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
557
558        SUnit *Def = &SUnits[N->getNodeId()];
559        CallSeqEndForStart[Def] = SU;
560
561        ++NumLiveRegs;
562        LiveRegDefs[CallResource] = Def;
563        LiveRegGens[CallResource] = SU;
564        break;
565      }
566}
567
568/// Check to see if any of the pending instructions are ready to issue.  If
569/// so, add them to the available queue.
570void ScheduleDAGRRList::ReleasePending() {
571  if (DisableSchedCycles) {
572    assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
573    return;
574  }
575
576  // If the available queue is empty, it is safe to reset MinAvailableCycle.
577  if (AvailableQueue->empty())
578    MinAvailableCycle = UINT_MAX;
579
580  // Check to see if any of the pending instructions are ready to issue.  If
581  // so, add them to the available queue.
582  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
583    unsigned ReadyCycle = PendingQueue[i]->getHeight();
584    if (ReadyCycle < MinAvailableCycle)
585      MinAvailableCycle = ReadyCycle;
586
587    if (PendingQueue[i]->isAvailable) {
588      if (!isReady(PendingQueue[i]))
589          continue;
590      AvailableQueue->push(PendingQueue[i]);
591    }
592    PendingQueue[i]->isPending = false;
593    PendingQueue[i] = PendingQueue.back();
594    PendingQueue.pop_back();
595    --i; --e;
596  }
597}
598
599/// Move the scheduler state forward by the specified number of Cycles.
600void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
601  if (NextCycle <= CurCycle)
602    return;
603
604  IssueCount = 0;
605  AvailableQueue->setCurCycle(NextCycle);
606  if (!HazardRec->isEnabled()) {
607    // Bypass lots of virtual calls in case of long latency.
608    CurCycle = NextCycle;
609  }
610  else {
611    for (; CurCycle != NextCycle; ++CurCycle) {
612      HazardRec->RecedeCycle();
613    }
614  }
615  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
616  // available Q to release pending nodes at least once before popping.
617  ReleasePending();
618}
619
620/// Move the scheduler state forward until the specified node's dependents are
621/// ready and can be scheduled with no resource conflicts.
622void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
623  if (DisableSchedCycles)
624    return;
625
626  // FIXME: Nodes such as CopyFromReg probably should not advance the current
627  // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
628  // has predecessors the cycle will be advanced when they are scheduled.
629  // But given the crude nature of modeling latency though such nodes, we
630  // currently need to treat these nodes like real instructions.
631  // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
632
633  unsigned ReadyCycle = SU->getHeight();
634
635  // Bump CurCycle to account for latency. We assume the latency of other
636  // available instructions may be hidden by the stall (not a full pipe stall).
637  // This updates the hazard recognizer's cycle before reserving resources for
638  // this instruction.
639  AdvanceToCycle(ReadyCycle);
640
641  // Calls are scheduled in their preceding cycle, so don't conflict with
642  // hazards from instructions after the call. EmitNode will reset the
643  // scoreboard state before emitting the call.
644  if (SU->isCall)
645    return;
646
647  // FIXME: For resource conflicts in very long non-pipelined stages, we
648  // should probably skip ahead here to avoid useless scoreboard checks.
649  int Stalls = 0;
650  while (true) {
651    ScheduleHazardRecognizer::HazardType HT =
652      HazardRec->getHazardType(SU, -Stalls);
653
654    if (HT == ScheduleHazardRecognizer::NoHazard)
655      break;
656
657    ++Stalls;
658  }
659  AdvanceToCycle(CurCycle + Stalls);
660}
661
662/// Record this SUnit in the HazardRecognizer.
663/// Does not update CurCycle.
664void ScheduleDAGRRList::EmitNode(SUnit *SU) {
665  if (!HazardRec->isEnabled())
666    return;
667
668  // Check for phys reg copy.
669  if (!SU->getNode())
670    return;
671
672  switch (SU->getNode()->getOpcode()) {
673  default:
674    assert(SU->getNode()->isMachineOpcode() &&
675           "This target-independent node should not be scheduled.");
676    break;
677  case ISD::MERGE_VALUES:
678  case ISD::TokenFactor:
679  case ISD::LIFETIME_START:
680  case ISD::LIFETIME_END:
681  case ISD::CopyToReg:
682  case ISD::CopyFromReg:
683  case ISD::EH_LABEL:
684    // Noops don't affect the scoreboard state. Copies are likely to be
685    // removed.
686    return;
687  case ISD::INLINEASM:
688    // For inline asm, clear the pipeline state.
689    HazardRec->Reset();
690    return;
691  }
692  if (SU->isCall) {
693    // Calls are scheduled with their preceding instructions. For bottom-up
694    // scheduling, clear the pipeline state before emitting.
695    HazardRec->Reset();
696  }
697
698  HazardRec->EmitInstruction(SU);
699}
700
701static void resetVRegCycle(SUnit *SU);
702
703/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
704/// count of its predecessors. If a predecessor pending count is zero, add it to
705/// the Available queue.
706void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
707  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
708  DEBUG(SU->dump(this));
709
710#ifndef NDEBUG
711  if (CurCycle < SU->getHeight())
712    DEBUG(dbgs() << "   Height [" << SU->getHeight()
713          << "] pipeline stall!\n");
714#endif
715
716  // FIXME: Do not modify node height. It may interfere with
717  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
718  // node its ready cycle can aid heuristics, and after scheduling it can
719  // indicate the scheduled cycle.
720  SU->setHeightToAtLeast(CurCycle);
721
722  // Reserve resources for the scheduled instruction.
723  EmitNode(SU);
724
725  Sequence.push_back(SU);
726
727  AvailableQueue->scheduledNode(SU);
728
729  // If HazardRec is disabled, and each inst counts as one cycle, then
730  // advance CurCycle before ReleasePredecessors to avoid useless pushes to
731  // PendingQueue for schedulers that implement HasReadyFilter.
732  if (!HazardRec->isEnabled() && AvgIPC < 2)
733    AdvanceToCycle(CurCycle + 1);
734
735  // Update liveness of predecessors before successors to avoid treating a
736  // two-address node as a live range def.
737  ReleasePredecessors(SU);
738
739  // Release all the implicit physical register defs that are live.
740  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
741       I != E; ++I) {
742    // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
743    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
744      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
745      --NumLiveRegs;
746      LiveRegDefs[I->getReg()] = nullptr;
747      LiveRegGens[I->getReg()] = nullptr;
748      releaseInterferences(I->getReg());
749    }
750  }
751  // Release the special call resource dependence, if this is the beginning
752  // of a call.
753  unsigned CallResource = TRI->getNumRegs();
754  if (LiveRegDefs[CallResource] == SU)
755    for (const SDNode *SUNode = SU->getNode(); SUNode;
756         SUNode = SUNode->getGluedNode()) {
757      if (SUNode->isMachineOpcode() &&
758          SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
759        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
760        --NumLiveRegs;
761        LiveRegDefs[CallResource] = nullptr;
762        LiveRegGens[CallResource] = nullptr;
763        releaseInterferences(CallResource);
764      }
765    }
766
767  resetVRegCycle(SU);
768
769  SU->isScheduled = true;
770
771  // Conditions under which the scheduler should eagerly advance the cycle:
772  // (1) No available instructions
773  // (2) All pipelines full, so available instructions must have hazards.
774  //
775  // If HazardRec is disabled, the cycle was pre-advanced before calling
776  // ReleasePredecessors. In that case, IssueCount should remain 0.
777  //
778  // Check AvailableQueue after ReleasePredecessors in case of zero latency.
779  if (HazardRec->isEnabled() || AvgIPC > 1) {
780    if (SU->getNode() && SU->getNode()->isMachineOpcode())
781      ++IssueCount;
782    if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
783        || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
784      AdvanceToCycle(CurCycle + 1);
785  }
786}
787
788/// CapturePred - This does the opposite of ReleasePred. Since SU is being
789/// unscheduled, incrcease the succ left count of its predecessors. Remove
790/// them from AvailableQueue if necessary.
791void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
792  SUnit *PredSU = PredEdge->getSUnit();
793  if (PredSU->isAvailable) {
794    PredSU->isAvailable = false;
795    if (!PredSU->isPending)
796      AvailableQueue->remove(PredSU);
797  }
798
799  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
800  ++PredSU->NumSuccsLeft;
801}
802
803/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
804/// its predecessor states to reflect the change.
805void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
806  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
807  DEBUG(SU->dump(this));
808
809  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
810       I != E; ++I) {
811    CapturePred(&*I);
812    if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
813      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
814      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
815             "Physical register dependency violated?");
816      --NumLiveRegs;
817      LiveRegDefs[I->getReg()] = nullptr;
818      LiveRegGens[I->getReg()] = nullptr;
819      releaseInterferences(I->getReg());
820    }
821  }
822
823  // Reclaim the special call resource dependence, if this is the beginning
824  // of a call.
825  unsigned CallResource = TRI->getNumRegs();
826  for (const SDNode *SUNode = SU->getNode(); SUNode;
827       SUNode = SUNode->getGluedNode()) {
828    if (SUNode->isMachineOpcode() &&
829        SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
830      ++NumLiveRegs;
831      LiveRegDefs[CallResource] = SU;
832      LiveRegGens[CallResource] = CallSeqEndForStart[SU];
833    }
834  }
835
836  // Release the special call resource dependence, if this is the end
837  // of a call.
838  if (LiveRegGens[CallResource] == SU)
839    for (const SDNode *SUNode = SU->getNode(); SUNode;
840         SUNode = SUNode->getGluedNode()) {
841      if (SUNode->isMachineOpcode() &&
842          SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
843        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
844        --NumLiveRegs;
845        LiveRegDefs[CallResource] = nullptr;
846        LiveRegGens[CallResource] = nullptr;
847        releaseInterferences(CallResource);
848      }
849    }
850
851  for (auto &Succ : SU->Succs) {
852    if (Succ.isAssignedRegDep()) {
853      auto Reg = Succ.getReg();
854      if (!LiveRegDefs[Reg])
855        ++NumLiveRegs;
856      // This becomes the nearest def. Note that an earlier def may still be
857      // pending if this is a two-address node.
858      LiveRegDefs[Reg] = SU;
859
860      // Update LiveRegGen only if was empty before this unscheduling.
861      // This is to avoid incorrect updating LiveRegGen set in previous run.
862      if (!LiveRegGens[Reg]) {
863        // Find the successor with the lowest height.
864        LiveRegGens[Reg] = Succ.getSUnit();
865        for (auto &Succ2 : SU->Succs) {
866          if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg &&
867              Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight())
868            LiveRegGens[Reg] = Succ2.getSUnit();
869        }
870      }
871    }
872  }
873  if (SU->getHeight() < MinAvailableCycle)
874    MinAvailableCycle = SU->getHeight();
875
876  SU->setHeightDirty();
877  SU->isScheduled = false;
878  SU->isAvailable = true;
879  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
880    // Don't make available until backtracking is complete.
881    SU->isPending = true;
882    PendingQueue.push_back(SU);
883  }
884  else {
885    AvailableQueue->push(SU);
886  }
887  AvailableQueue->unscheduledNode(SU);
888}
889
890/// After backtracking, the hazard checker needs to be restored to a state
891/// corresponding the current cycle.
892void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
893  HazardRec->Reset();
894
895  unsigned LookAhead = std::min((unsigned)Sequence.size(),
896                                HazardRec->getMaxLookAhead());
897  if (LookAhead == 0)
898    return;
899
900  std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
901  unsigned HazardCycle = (*I)->getHeight();
902  for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
903    SUnit *SU = *I;
904    for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
905      HazardRec->RecedeCycle();
906    }
907    EmitNode(SU);
908  }
909}
910
911/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
912/// BTCycle in order to schedule a specific node.
913void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
914  SUnit *OldSU = Sequence.back();
915  while (true) {
916    Sequence.pop_back();
917    // FIXME: use ready cycle instead of height
918    CurCycle = OldSU->getHeight();
919    UnscheduleNodeBottomUp(OldSU);
920    AvailableQueue->setCurCycle(CurCycle);
921    if (OldSU == BtSU)
922      break;
923    OldSU = Sequence.back();
924  }
925
926  assert(!SU->isSucc(OldSU) && "Something is wrong!");
927
928  RestoreHazardCheckerBottomUp();
929
930  ReleasePending();
931
932  ++NumBacktracks;
933}
934
935static bool isOperandOf(const SUnit *SU, SDNode *N) {
936  for (const SDNode *SUNode = SU->getNode(); SUNode;
937       SUNode = SUNode->getGluedNode()) {
938    if (SUNode->isOperandOf(N))
939      return true;
940  }
941  return false;
942}
943
944/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
945/// successors to the newly created node.
946SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
947  SDNode *N = SU->getNode();
948  if (!N)
949    return nullptr;
950
951  if (SU->getNode()->getGluedNode())
952    return nullptr;
953
954  SUnit *NewSU;
955  bool TryUnfold = false;
956  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
957    MVT VT = N->getSimpleValueType(i);
958    if (VT == MVT::Glue)
959      return nullptr;
960    else if (VT == MVT::Other)
961      TryUnfold = true;
962  }
963  for (const SDValue &Op : N->op_values()) {
964    MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
965    if (VT == MVT::Glue)
966      return nullptr;
967  }
968
969  if (TryUnfold) {
970    SmallVector<SDNode*, 2> NewNodes;
971    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
972      return nullptr;
973
974    // unfolding an x86 DEC64m operation results in store, dec, load which
975    // can't be handled here so quit
976    if (NewNodes.size() == 3)
977      return nullptr;
978
979    DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
980    assert(NewNodes.size() == 2 && "Expected a load folding node!");
981
982    N = NewNodes[1];
983    SDNode *LoadNode = NewNodes[0];
984    unsigned NumVals = N->getNumValues();
985    unsigned OldNumVals = SU->getNode()->getNumValues();
986    for (unsigned i = 0; i != NumVals; ++i)
987      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
988    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
989                                   SDValue(LoadNode, 1));
990
991    // LoadNode may already exist. This can happen when there is another
992    // load from the same location and producing the same type of value
993    // but it has different alignment or volatileness.
994    bool isNewLoad = true;
995    SUnit *LoadSU;
996    if (LoadNode->getNodeId() != -1) {
997      LoadSU = &SUnits[LoadNode->getNodeId()];
998      isNewLoad = false;
999    } else {
1000      LoadSU = CreateNewSUnit(LoadNode);
1001      LoadNode->setNodeId(LoadSU->NodeNum);
1002
1003      InitNumRegDefsLeft(LoadSU);
1004      computeLatency(LoadSU);
1005    }
1006
1007    SUnit *NewSU = CreateNewSUnit(N);
1008    assert(N->getNodeId() == -1 && "Node already inserted!");
1009    N->setNodeId(NewSU->NodeNum);
1010
1011    const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1012    for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
1013      if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
1014        NewSU->isTwoAddress = true;
1015        break;
1016      }
1017    }
1018    if (MCID.isCommutable())
1019      NewSU->isCommutable = true;
1020
1021    InitNumRegDefsLeft(NewSU);
1022    computeLatency(NewSU);
1023
1024    // Record all the edges to and from the old SU, by category.
1025    SmallVector<SDep, 4> ChainPreds;
1026    SmallVector<SDep, 4> ChainSuccs;
1027    SmallVector<SDep, 4> LoadPreds;
1028    SmallVector<SDep, 4> NodePreds;
1029    SmallVector<SDep, 4> NodeSuccs;
1030    for (SDep &Pred : SU->Preds) {
1031      if (Pred.isCtrl())
1032        ChainPreds.push_back(Pred);
1033      else if (isOperandOf(Pred.getSUnit(), LoadNode))
1034        LoadPreds.push_back(Pred);
1035      else
1036        NodePreds.push_back(Pred);
1037    }
1038    for (SDep &Succ : SU->Succs) {
1039      if (Succ.isCtrl())
1040        ChainSuccs.push_back(Succ);
1041      else
1042        NodeSuccs.push_back(Succ);
1043    }
1044
1045    // Now assign edges to the newly-created nodes.
1046    for (const SDep &Pred : ChainPreds) {
1047      RemovePred(SU, Pred);
1048      if (isNewLoad)
1049        AddPred(LoadSU, Pred);
1050    }
1051    for (const SDep &Pred : LoadPreds) {
1052      RemovePred(SU, Pred);
1053      if (isNewLoad)
1054        AddPred(LoadSU, Pred);
1055    }
1056    for (const SDep &Pred : NodePreds) {
1057      RemovePred(SU, Pred);
1058      AddPred(NewSU, Pred);
1059    }
1060    for (SDep D : NodeSuccs) {
1061      SUnit *SuccDep = D.getSUnit();
1062      D.setSUnit(SU);
1063      RemovePred(SuccDep, D);
1064      D.setSUnit(NewSU);
1065      AddPred(SuccDep, D);
1066      // Balance register pressure.
1067      if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
1068          && !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
1069        --NewSU->NumRegDefsLeft;
1070    }
1071    for (SDep D : ChainSuccs) {
1072      SUnit *SuccDep = D.getSUnit();
1073      D.setSUnit(SU);
1074      RemovePred(SuccDep, D);
1075      if (isNewLoad) {
1076        D.setSUnit(LoadSU);
1077        AddPred(SuccDep, D);
1078      }
1079    }
1080
1081    // Add a data dependency to reflect that NewSU reads the value defined
1082    // by LoadSU.
1083    SDep D(LoadSU, SDep::Data, 0);
1084    D.setLatency(LoadSU->Latency);
1085    AddPred(NewSU, D);
1086
1087    if (isNewLoad)
1088      AvailableQueue->addNode(LoadSU);
1089    AvailableQueue->addNode(NewSU);
1090
1091    ++NumUnfolds;
1092
1093    if (NewSU->NumSuccsLeft == 0) {
1094      NewSU->isAvailable = true;
1095      return NewSU;
1096    }
1097    SU = NewSU;
1098  }
1099
1100  DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n");
1101  NewSU = CreateClone(SU);
1102
1103  // New SUnit has the exact same predecessors.
1104  for (SDep &Pred : SU->Preds)
1105    if (!Pred.isArtificial())
1106      AddPred(NewSU, Pred);
1107
1108  // Only copy scheduled successors. Cut them from old node's successor
1109  // list and move them over.
1110  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
1111  for (SDep &Succ : SU->Succs) {
1112    if (Succ.isArtificial())
1113      continue;
1114    SUnit *SuccSU = Succ.getSUnit();
1115    if (SuccSU->isScheduled) {
1116      SDep D = Succ;
1117      D.setSUnit(NewSU);
1118      AddPred(SuccSU, D);
1119      D.setSUnit(SU);
1120      DelDeps.push_back(std::make_pair(SuccSU, D));
1121    }
1122  }
1123  for (auto &DelDep : DelDeps)
1124    RemovePred(DelDep.first, DelDep.second);
1125
1126  AvailableQueue->updateNode(SU);
1127  AvailableQueue->addNode(NewSU);
1128
1129  ++NumDups;
1130  return NewSU;
1131}
1132
1133/// InsertCopiesAndMoveSuccs - Insert register copies and move all
1134/// scheduled successors of the given SUnit to the last copy.
1135void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
1136                                              const TargetRegisterClass *DestRC,
1137                                              const TargetRegisterClass *SrcRC,
1138                                              SmallVectorImpl<SUnit*> &Copies) {
1139  SUnit *CopyFromSU = CreateNewSUnit(nullptr);
1140  CopyFromSU->CopySrcRC = SrcRC;
1141  CopyFromSU->CopyDstRC = DestRC;
1142
1143  SUnit *CopyToSU = CreateNewSUnit(nullptr);
1144  CopyToSU->CopySrcRC = DestRC;
1145  CopyToSU->CopyDstRC = SrcRC;
1146
1147  // Only copy scheduled successors. Cut them from old node's successor
1148  // list and move them over.
1149  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
1150  for (SDep &Succ : SU->Succs) {
1151    if (Succ.isArtificial())
1152      continue;
1153    SUnit *SuccSU = Succ.getSUnit();
1154    if (SuccSU->isScheduled) {
1155      SDep D = Succ;
1156      D.setSUnit(CopyToSU);
1157      AddPred(SuccSU, D);
1158      DelDeps.push_back(std::make_pair(SuccSU, Succ));
1159    }
1160    else {
1161      // Avoid scheduling the def-side copy before other successors. Otherwise
1162      // we could introduce another physreg interference on the copy and
1163      // continue inserting copies indefinitely.
1164      AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial));
1165    }
1166  }
1167  for (auto &DelDep : DelDeps)
1168    RemovePred(DelDep.first, DelDep.second);
1169
1170  SDep FromDep(SU, SDep::Data, Reg);
1171  FromDep.setLatency(SU->Latency);
1172  AddPred(CopyFromSU, FromDep);
1173  SDep ToDep(CopyFromSU, SDep::Data, 0);
1174  ToDep.setLatency(CopyFromSU->Latency);
1175  AddPred(CopyToSU, ToDep);
1176
1177  AvailableQueue->updateNode(SU);
1178  AvailableQueue->addNode(CopyFromSU);
1179  AvailableQueue->addNode(CopyToSU);
1180  Copies.push_back(CopyFromSU);
1181  Copies.push_back(CopyToSU);
1182
1183  ++NumPRCopies;
1184}
1185
1186/// getPhysicalRegisterVT - Returns the ValueType of the physical register
1187/// definition of the specified node.
1188/// FIXME: Move to SelectionDAG?
1189static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
1190                                 const TargetInstrInfo *TII) {
1191  unsigned NumRes;
1192  if (N->getOpcode() == ISD::CopyFromReg) {
1193    // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
1194    NumRes = 1;
1195  } else {
1196    const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1197    assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
1198    NumRes = MCID.getNumDefs();
1199    for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
1200      if (Reg == *ImpDef)
1201        break;
1202      ++NumRes;
1203    }
1204  }
1205  return N->getSimpleValueType(NumRes);
1206}
1207
1208/// CheckForLiveRegDef - Return true and update live register vector if the
1209/// specified register def of the specified SUnit clobbers any "live" registers.
1210static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
1211                               SUnit **LiveRegDefs,
1212                               SmallSet<unsigned, 4> &RegAdded,
1213                               SmallVectorImpl<unsigned> &LRegs,
1214                               const TargetRegisterInfo *TRI) {
1215  for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
1216
1217    // Check if Ref is live.
1218    if (!LiveRegDefs[*AliasI]) continue;
1219
1220    // Allow multiple uses of the same def.
1221    if (LiveRegDefs[*AliasI] == SU) continue;
1222
1223    // Add Reg to the set of interfering live regs.
1224    if (RegAdded.insert(*AliasI).second) {
1225      LRegs.push_back(*AliasI);
1226    }
1227  }
1228}
1229
1230/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
1231/// by RegMask, and add them to LRegs.
1232static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
1233                                     ArrayRef<SUnit*> LiveRegDefs,
1234                                     SmallSet<unsigned, 4> &RegAdded,
1235                                     SmallVectorImpl<unsigned> &LRegs) {
1236  // Look at all live registers. Skip Reg0 and the special CallResource.
1237  for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
1238    if (!LiveRegDefs[i]) continue;
1239    if (LiveRegDefs[i] == SU) continue;
1240    if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
1241    if (RegAdded.insert(i).second)
1242      LRegs.push_back(i);
1243  }
1244}
1245
1246/// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
1247static const uint32_t *getNodeRegMask(const SDNode *N) {
1248  for (const SDValue &Op : N->op_values())
1249    if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode()))
1250      return RegOp->getRegMask();
1251  return nullptr;
1252}
1253
1254/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1255/// scheduling of the given node to satisfy live physical register dependencies.
1256/// If the specific node is the last one that's available to schedule, do
1257/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1258bool ScheduleDAGRRList::
1259DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
1260  if (NumLiveRegs == 0)
1261    return false;
1262
1263  SmallSet<unsigned, 4> RegAdded;
1264  // If this node would clobber any "live" register, then it's not ready.
1265  //
1266  // If SU is the currently live definition of the same register that it uses,
1267  // then we are free to schedule it.
1268  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1269       I != E; ++I) {
1270    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
1271      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs.get(),
1272                         RegAdded, LRegs, TRI);
1273  }
1274
1275  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1276    if (Node->getOpcode() == ISD::INLINEASM) {
1277      // Inline asm can clobber physical defs.
1278      unsigned NumOps = Node->getNumOperands();
1279      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1280        --NumOps;  // Ignore the glue operand.
1281
1282      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1283        unsigned Flags =
1284          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
1285        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
1286
1287        ++i; // Skip the ID value.
1288        if (InlineAsm::isRegDefKind(Flags) ||
1289            InlineAsm::isRegDefEarlyClobberKind(Flags) ||
1290            InlineAsm::isClobberKind(Flags)) {
1291          // Check for def of register or earlyclobber register.
1292          for (; NumVals; --NumVals, ++i) {
1293            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1294            if (TargetRegisterInfo::isPhysicalRegister(Reg))
1295              CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1296          }
1297        } else
1298          i += NumVals;
1299      }
1300      continue;
1301    }
1302
1303    if (!Node->isMachineOpcode())
1304      continue;
1305    // If we're in the middle of scheduling a call, don't begin scheduling
1306    // another call. Also, don't allow any physical registers to be live across
1307    // the call.
1308    if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
1309      // Check the special calling-sequence resource.
1310      unsigned CallResource = TRI->getNumRegs();
1311      if (LiveRegDefs[CallResource]) {
1312        SDNode *Gen = LiveRegGens[CallResource]->getNode();
1313        while (SDNode *Glued = Gen->getGluedNode())
1314          Gen = Glued;
1315        if (!IsChainDependent(Gen, Node, 0, TII) &&
1316            RegAdded.insert(CallResource).second)
1317          LRegs.push_back(CallResource);
1318      }
1319    }
1320    if (const uint32_t *RegMask = getNodeRegMask(Node))
1321      CheckForLiveRegDefMasked(SU, RegMask,
1322                               makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
1323                               RegAdded, LRegs);
1324
1325    const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
1326    if (!MCID.ImplicitDefs)
1327      continue;
1328    for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
1329      CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1330  }
1331
1332  return !LRegs.empty();
1333}
1334
1335void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
1336  // Add the nodes that aren't ready back onto the available list.
1337  for (unsigned i = Interferences.size(); i > 0; --i) {
1338    SUnit *SU = Interferences[i-1];
1339    LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
1340    if (Reg) {
1341      SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
1342      if (std::find(LRegs.begin(), LRegs.end(), Reg) == LRegs.end())
1343        continue;
1344    }
1345    SU->isPending = false;
1346    // The interfering node may no longer be available due to backtracking.
1347    // Furthermore, it may have been made available again, in which case it is
1348    // now already in the AvailableQueue.
1349    if (SU->isAvailable && !SU->NodeQueueId) {
1350      DEBUG(dbgs() << "    Repushing SU #" << SU->NodeNum << '\n');
1351      AvailableQueue->push(SU);
1352    }
1353    if (i < Interferences.size())
1354      Interferences[i-1] = Interferences.back();
1355    Interferences.pop_back();
1356    LRegsMap.erase(LRegsPos);
1357  }
1358}
1359
1360/// Return a node that can be scheduled in this cycle. Requirements:
1361/// (1) Ready: latency has been satisfied
1362/// (2) No Hazards: resources are available
1363/// (3) No Interferences: may unschedule to break register interferences.
1364SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1365  SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
1366  while (CurSU) {
1367    SmallVector<unsigned, 4> LRegs;
1368    if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1369      break;
1370    DEBUG(dbgs() << "    Interfering reg " <<
1371          (LRegs[0] == TRI->getNumRegs() ? "CallResource"
1372           : TRI->getName(LRegs[0]))
1373           << " SU #" << CurSU->NodeNum << '\n');
1374    std::pair<LRegsMapT::iterator, bool> LRegsPair =
1375      LRegsMap.insert(std::make_pair(CurSU, LRegs));
1376    if (LRegsPair.second) {
1377      CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
1378      Interferences.push_back(CurSU);
1379    }
1380    else {
1381      assert(CurSU->isPending && "Interferences are pending");
1382      // Update the interference with current live regs.
1383      LRegsPair.first->second = LRegs;
1384    }
1385    CurSU = AvailableQueue->pop();
1386  }
1387  if (CurSU)
1388    return CurSU;
1389
1390  // All candidates are delayed due to live physical reg dependencies.
1391  // Try backtracking, code duplication, or inserting cross class copies
1392  // to resolve it.
1393  for (SUnit *TrySU : Interferences) {
1394    SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1395
1396    // Try unscheduling up to the point where it's safe to schedule
1397    // this node.
1398    SUnit *BtSU = nullptr;
1399    unsigned LiveCycle = UINT_MAX;
1400    for (unsigned Reg : LRegs) {
1401      if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1402        BtSU = LiveRegGens[Reg];
1403        LiveCycle = BtSU->getHeight();
1404      }
1405    }
1406    if (!WillCreateCycle(TrySU, BtSU))  {
1407      // BacktrackBottomUp mutates Interferences!
1408      BacktrackBottomUp(TrySU, BtSU);
1409
1410      // Force the current node to be scheduled before the node that
1411      // requires the physical reg dep.
1412      if (BtSU->isAvailable) {
1413        BtSU->isAvailable = false;
1414        if (!BtSU->isPending)
1415          AvailableQueue->remove(BtSU);
1416      }
1417      DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU("
1418            << TrySU->NodeNum << ")\n");
1419      AddPred(TrySU, SDep(BtSU, SDep::Artificial));
1420
1421      // If one or more successors has been unscheduled, then the current
1422      // node is no longer available.
1423      if (!TrySU->isAvailable || !TrySU->NodeQueueId)
1424        CurSU = AvailableQueue->pop();
1425      else {
1426        // Available and in AvailableQueue
1427        AvailableQueue->remove(TrySU);
1428        CurSU = TrySU;
1429      }
1430      // Interferences has been mutated. We must break.
1431      break;
1432    }
1433  }
1434
1435  if (!CurSU) {
1436    // Can't backtrack. If it's too expensive to copy the value, then try
1437    // duplicate the nodes that produces these "too expensive to copy"
1438    // values to break the dependency. In case even that doesn't work,
1439    // insert cross class copies.
1440    // If it's not too expensive, i.e. cost != -1, issue copies.
1441    SUnit *TrySU = Interferences[0];
1442    SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1443    assert(LRegs.size() == 1 && "Can't handle this yet!");
1444    unsigned Reg = LRegs[0];
1445    SUnit *LRDef = LiveRegDefs[Reg];
1446    MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1447    const TargetRegisterClass *RC =
1448      TRI->getMinimalPhysRegClass(Reg, VT);
1449    const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1450
1451    // If cross copy register class is the same as RC, then it must be possible
1452    // copy the value directly. Do not try duplicate the def.
1453    // If cross copy register class is not the same as RC, then it's possible to
1454    // copy the value but it require cross register class copies and it is
1455    // expensive.
1456    // If cross copy register class is null, then it's not possible to copy
1457    // the value at all.
1458    SUnit *NewDef = nullptr;
1459    if (DestRC != RC) {
1460      NewDef = CopyAndMoveSuccessors(LRDef);
1461      if (!DestRC && !NewDef)
1462        report_fatal_error("Can't handle live physical register dependency!");
1463    }
1464    if (!NewDef) {
1465      // Issue copies, these can be expensive cross register class copies.
1466      SmallVector<SUnit*, 2> Copies;
1467      InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1468      DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
1469            << " to SU #" << Copies.front()->NodeNum << "\n");
1470      AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
1471      NewDef = Copies.back();
1472    }
1473
1474    DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
1475          << " to SU #" << TrySU->NodeNum << "\n");
1476    LiveRegDefs[Reg] = NewDef;
1477    AddPred(NewDef, SDep(TrySU, SDep::Artificial));
1478    TrySU->isAvailable = false;
1479    CurSU = NewDef;
1480  }
1481  assert(CurSU && "Unable to resolve live physical register dependencies!");
1482  return CurSU;
1483}
1484
1485/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1486/// schedulers.
1487void ScheduleDAGRRList::ListScheduleBottomUp() {
1488  // Release any predecessors of the special Exit node.
1489  ReleasePredecessors(&ExitSU);
1490
1491  // Add root to Available queue.
1492  if (!SUnits.empty()) {
1493    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1494    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1495    RootSU->isAvailable = true;
1496    AvailableQueue->push(RootSU);
1497  }
1498
1499  // While Available queue is not empty, grab the node with the highest
1500  // priority. If it is not ready put it back.  Schedule the node.
1501  Sequence.reserve(SUnits.size());
1502  while (!AvailableQueue->empty() || !Interferences.empty()) {
1503    DEBUG(dbgs() << "\nExamining Available:\n";
1504          AvailableQueue->dump(this));
1505
1506    // Pick the best node to schedule taking all constraints into
1507    // consideration.
1508    SUnit *SU = PickNodeToScheduleBottomUp();
1509
1510    AdvancePastStalls(SU);
1511
1512    ScheduleNodeBottomUp(SU);
1513
1514    while (AvailableQueue->empty() && !PendingQueue.empty()) {
1515      // Advance the cycle to free resources. Skip ahead to the next ready SU.
1516      assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
1517      AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1518    }
1519  }
1520
1521  // Reverse the order if it is bottom up.
1522  std::reverse(Sequence.begin(), Sequence.end());
1523
1524#ifndef NDEBUG
1525  VerifyScheduledSequence(/*isBottomUp=*/true);
1526#endif
1527}
1528
1529//===----------------------------------------------------------------------===//
1530//                RegReductionPriorityQueue Definition
1531//===----------------------------------------------------------------------===//
1532//
1533// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1534// to reduce register pressure.
1535//
1536namespace {
1537class RegReductionPQBase;
1538
1539struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1540  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1541};
1542
1543#ifndef NDEBUG
1544template<class SF>
1545struct reverse_sort : public queue_sort {
1546  SF &SortFunc;
1547  reverse_sort(SF &sf) : SortFunc(sf) {}
1548
1549  bool operator()(SUnit* left, SUnit* right) const {
1550    // reverse left/right rather than simply !SortFunc(left, right)
1551    // to expose different paths in the comparison logic.
1552    return SortFunc(right, left);
1553  }
1554};
1555#endif // NDEBUG
1556
1557/// bu_ls_rr_sort - Priority function for bottom up register pressure
1558// reduction scheduler.
1559struct bu_ls_rr_sort : public queue_sort {
1560  enum {
1561    IsBottomUp = true,
1562    HasReadyFilter = false
1563  };
1564
1565  RegReductionPQBase *SPQ;
1566  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1567
1568  bool operator()(SUnit* left, SUnit* right) const;
1569};
1570
1571// src_ls_rr_sort - Priority function for source order scheduler.
1572struct src_ls_rr_sort : public queue_sort {
1573  enum {
1574    IsBottomUp = true,
1575    HasReadyFilter = false
1576  };
1577
1578  RegReductionPQBase *SPQ;
1579  src_ls_rr_sort(RegReductionPQBase *spq)
1580    : SPQ(spq) {}
1581
1582  bool operator()(SUnit* left, SUnit* right) const;
1583};
1584
1585// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1586struct hybrid_ls_rr_sort : public queue_sort {
1587  enum {
1588    IsBottomUp = true,
1589    HasReadyFilter = false
1590  };
1591
1592  RegReductionPQBase *SPQ;
1593  hybrid_ls_rr_sort(RegReductionPQBase *spq)
1594    : SPQ(spq) {}
1595
1596  bool isReady(SUnit *SU, unsigned CurCycle) const;
1597
1598  bool operator()(SUnit* left, SUnit* right) const;
1599};
1600
1601// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1602// scheduler.
1603struct ilp_ls_rr_sort : public queue_sort {
1604  enum {
1605    IsBottomUp = true,
1606    HasReadyFilter = false
1607  };
1608
1609  RegReductionPQBase *SPQ;
1610  ilp_ls_rr_sort(RegReductionPQBase *spq)
1611    : SPQ(spq) {}
1612
1613  bool isReady(SUnit *SU, unsigned CurCycle) const;
1614
1615  bool operator()(SUnit* left, SUnit* right) const;
1616};
1617
1618class RegReductionPQBase : public SchedulingPriorityQueue {
1619protected:
1620  std::vector<SUnit*> Queue;
1621  unsigned CurQueueId;
1622  bool TracksRegPressure;
1623  bool SrcOrder;
1624
1625  // SUnits - The SUnits for the current graph.
1626  std::vector<SUnit> *SUnits;
1627
1628  MachineFunction &MF;
1629  const TargetInstrInfo *TII;
1630  const TargetRegisterInfo *TRI;
1631  const TargetLowering *TLI;
1632  ScheduleDAGRRList *scheduleDAG;
1633
1634  // SethiUllmanNumbers - The SethiUllman number for each node.
1635  std::vector<unsigned> SethiUllmanNumbers;
1636
1637  /// RegPressure - Tracking current reg pressure per register class.
1638  ///
1639  std::vector<unsigned> RegPressure;
1640
1641  /// RegLimit - Tracking the number of allocatable registers per register
1642  /// class.
1643  std::vector<unsigned> RegLimit;
1644
1645public:
1646  RegReductionPQBase(MachineFunction &mf,
1647                     bool hasReadyFilter,
1648                     bool tracksrp,
1649                     bool srcorder,
1650                     const TargetInstrInfo *tii,
1651                     const TargetRegisterInfo *tri,
1652                     const TargetLowering *tli)
1653    : SchedulingPriorityQueue(hasReadyFilter),
1654      CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder),
1655      MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(nullptr) {
1656    if (TracksRegPressure) {
1657      unsigned NumRC = TRI->getNumRegClasses();
1658      RegLimit.resize(NumRC);
1659      RegPressure.resize(NumRC);
1660      std::fill(RegLimit.begin(), RegLimit.end(), 0);
1661      std::fill(RegPressure.begin(), RegPressure.end(), 0);
1662      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1663             E = TRI->regclass_end(); I != E; ++I)
1664        RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF);
1665    }
1666  }
1667
1668  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1669    scheduleDAG = scheduleDag;
1670  }
1671
1672  ScheduleHazardRecognizer* getHazardRec() {
1673    return scheduleDAG->getHazardRec();
1674  }
1675
1676  void initNodes(std::vector<SUnit> &sunits) override;
1677
1678  void addNode(const SUnit *SU) override;
1679
1680  void updateNode(const SUnit *SU) override;
1681
1682  void releaseState() override {
1683    SUnits = nullptr;
1684    SethiUllmanNumbers.clear();
1685    std::fill(RegPressure.begin(), RegPressure.end(), 0);
1686  }
1687
1688  unsigned getNodePriority(const SUnit *SU) const;
1689
1690  unsigned getNodeOrdering(const SUnit *SU) const {
1691    if (!SU->getNode()) return 0;
1692
1693    return SU->getNode()->getIROrder();
1694  }
1695
1696  bool empty() const override { return Queue.empty(); }
1697
1698  void push(SUnit *U) override {
1699    assert(!U->NodeQueueId && "Node in the queue already");
1700    U->NodeQueueId = ++CurQueueId;
1701    Queue.push_back(U);
1702  }
1703
1704  void remove(SUnit *SU) override {
1705    assert(!Queue.empty() && "Queue is empty!");
1706    assert(SU->NodeQueueId != 0 && "Not in queue!");
1707    std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
1708                                                 SU);
1709    if (I != std::prev(Queue.end()))
1710      std::swap(*I, Queue.back());
1711    Queue.pop_back();
1712    SU->NodeQueueId = 0;
1713  }
1714
1715  bool tracksRegPressure() const override { return TracksRegPressure; }
1716
1717  void dumpRegPressure() const;
1718
1719  bool HighRegPressure(const SUnit *SU) const;
1720
1721  bool MayReduceRegPressure(SUnit *SU) const;
1722
1723  int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1724
1725  void scheduledNode(SUnit *SU) override;
1726
1727  void unscheduledNode(SUnit *SU) override;
1728
1729protected:
1730  bool canClobber(const SUnit *SU, const SUnit *Op);
1731  void AddPseudoTwoAddrDeps();
1732  void PrescheduleNodesWithMultipleUses();
1733  void CalculateSethiUllmanNumbers();
1734};
1735
1736template<class SF>
1737static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
1738  std::vector<SUnit *>::iterator Best = Q.begin();
1739  for (std::vector<SUnit *>::iterator I = std::next(Q.begin()),
1740         E = Q.end(); I != E; ++I)
1741    if (Picker(*Best, *I))
1742      Best = I;
1743  SUnit *V = *Best;
1744  if (Best != std::prev(Q.end()))
1745    std::swap(*Best, Q.back());
1746  Q.pop_back();
1747  return V;
1748}
1749
1750template<class SF>
1751SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
1752#ifndef NDEBUG
1753  if (DAG->StressSched) {
1754    reverse_sort<SF> RPicker(Picker);
1755    return popFromQueueImpl(Q, RPicker);
1756  }
1757#endif
1758  (void)DAG;
1759  return popFromQueueImpl(Q, Picker);
1760}
1761
1762template<class SF>
1763class RegReductionPriorityQueue : public RegReductionPQBase {
1764  SF Picker;
1765
1766public:
1767  RegReductionPriorityQueue(MachineFunction &mf,
1768                            bool tracksrp,
1769                            bool srcorder,
1770                            const TargetInstrInfo *tii,
1771                            const TargetRegisterInfo *tri,
1772                            const TargetLowering *tli)
1773    : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
1774                         tii, tri, tli),
1775      Picker(this) {}
1776
1777  bool isBottomUp() const override { return SF::IsBottomUp; }
1778
1779  bool isReady(SUnit *U) const override {
1780    return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1781  }
1782
1783  SUnit *pop() override {
1784    if (Queue.empty()) return nullptr;
1785
1786    SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1787    V->NodeQueueId = 0;
1788    return V;
1789  }
1790
1791#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1792  void dump(ScheduleDAG *DAG) const override {
1793    // Emulate pop() without clobbering NodeQueueIds.
1794    std::vector<SUnit*> DumpQueue = Queue;
1795    SF DumpPicker = Picker;
1796    while (!DumpQueue.empty()) {
1797      SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1798      dbgs() << "Height " << SU->getHeight() << ": ";
1799      SU->dump(DAG);
1800    }
1801  }
1802#endif
1803};
1804
1805typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1806BURegReductionPriorityQueue;
1807
1808typedef RegReductionPriorityQueue<src_ls_rr_sort>
1809SrcRegReductionPriorityQueue;
1810
1811typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1812HybridBURRPriorityQueue;
1813
1814typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
1815ILPBURRPriorityQueue;
1816} // end anonymous namespace
1817
1818//===----------------------------------------------------------------------===//
1819//           Static Node Priority for Register Pressure Reduction
1820//===----------------------------------------------------------------------===//
1821
1822// Check for special nodes that bypass scheduling heuristics.
1823// Currently this pushes TokenFactor nodes down, but may be used for other
1824// pseudo-ops as well.
1825//
1826// Return -1 to schedule right above left, 1 for left above right.
1827// Return 0 if no bias exists.
1828static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
1829  bool LSchedLow = left->isScheduleLow;
1830  bool RSchedLow = right->isScheduleLow;
1831  if (LSchedLow != RSchedLow)
1832    return LSchedLow < RSchedLow ? 1 : -1;
1833  return 0;
1834}
1835
1836/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1837/// Smaller number is the higher priority.
1838static unsigned
1839CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1840  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
1841  if (SethiUllmanNumber != 0)
1842    return SethiUllmanNumber;
1843
1844  unsigned Extra = 0;
1845  for (const SDep &Pred : SU->Preds) {
1846    if (Pred.isCtrl()) continue;  // ignore chain preds
1847    SUnit *PredSU = Pred.getSUnit();
1848    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
1849    if (PredSethiUllman > SethiUllmanNumber) {
1850      SethiUllmanNumber = PredSethiUllman;
1851      Extra = 0;
1852    } else if (PredSethiUllman == SethiUllmanNumber)
1853      ++Extra;
1854  }
1855
1856  SethiUllmanNumber += Extra;
1857
1858  if (SethiUllmanNumber == 0)
1859    SethiUllmanNumber = 1;
1860
1861  return SethiUllmanNumber;
1862}
1863
1864/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1865/// scheduling units.
1866void RegReductionPQBase::CalculateSethiUllmanNumbers() {
1867  SethiUllmanNumbers.assign(SUnits->size(), 0);
1868
1869  for (const SUnit &SU : *SUnits)
1870    CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers);
1871}
1872
1873void RegReductionPQBase::addNode(const SUnit *SU) {
1874  unsigned SUSize = SethiUllmanNumbers.size();
1875  if (SUnits->size() > SUSize)
1876    SethiUllmanNumbers.resize(SUSize*2, 0);
1877  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1878}
1879
1880void RegReductionPQBase::updateNode(const SUnit *SU) {
1881  SethiUllmanNumbers[SU->NodeNum] = 0;
1882  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1883}
1884
1885// Lower priority means schedule further down. For bottom-up scheduling, lower
1886// priority SUs are scheduled before higher priority SUs.
1887unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
1888  assert(SU->NodeNum < SethiUllmanNumbers.size());
1889  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1890  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1891    // CopyToReg should be close to its uses to facilitate coalescing and
1892    // avoid spilling.
1893    return 0;
1894  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1895      Opc == TargetOpcode::SUBREG_TO_REG ||
1896      Opc == TargetOpcode::INSERT_SUBREG)
1897    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1898    // close to their uses to facilitate coalescing.
1899    return 0;
1900  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1901    // If SU does not have a register use, i.e. it doesn't produce a value
1902    // that would be consumed (e.g. store), then it terminates a chain of
1903    // computation.  Give it a large SethiUllman number so it will be
1904    // scheduled right before its predecessors that it doesn't lengthen
1905    // their live ranges.
1906    return 0xffff;
1907  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1908    // If SU does not have a register def, schedule it close to its uses
1909    // because it does not lengthen any live ranges.
1910    return 0;
1911#if 1
1912  return SethiUllmanNumbers[SU->NodeNum];
1913#else
1914  unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
1915  if (SU->isCallOp) {
1916    // FIXME: This assumes all of the defs are used as call operands.
1917    int NP = (int)Priority - SU->getNode()->getNumValues();
1918    return (NP > 0) ? NP : 0;
1919  }
1920  return Priority;
1921#endif
1922}
1923
1924//===----------------------------------------------------------------------===//
1925//                     Register Pressure Tracking
1926//===----------------------------------------------------------------------===//
1927
1928void RegReductionPQBase::dumpRegPressure() const {
1929#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1930  for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1931         E = TRI->regclass_end(); I != E; ++I) {
1932    const TargetRegisterClass *RC = *I;
1933    unsigned Id = RC->getID();
1934    unsigned RP = RegPressure[Id];
1935    if (!RP) continue;
1936    DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
1937          << RegLimit[Id] << '\n');
1938  }
1939#endif
1940}
1941
1942bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
1943  if (!TLI)
1944    return false;
1945
1946  for (const SDep &Pred : SU->Preds) {
1947    if (Pred.isCtrl())
1948      continue;
1949    SUnit *PredSU = Pred.getSUnit();
1950    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1951    // to cover the number of registers defined (they are all live).
1952    if (PredSU->NumRegDefsLeft == 0) {
1953      continue;
1954    }
1955    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
1956         RegDefPos.IsValid(); RegDefPos.Advance()) {
1957      unsigned RCId, Cost;
1958      GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
1959
1960      if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1961        return true;
1962    }
1963  }
1964  return false;
1965}
1966
1967bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
1968  const SDNode *N = SU->getNode();
1969
1970  if (!N->isMachineOpcode() || !SU->NumSuccs)
1971    return false;
1972
1973  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1974  for (unsigned i = 0; i != NumDefs; ++i) {
1975    MVT VT = N->getSimpleValueType(i);
1976    if (!N->hasAnyUseOfValue(i))
1977      continue;
1978    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1979    if (RegPressure[RCId] >= RegLimit[RCId])
1980      return true;
1981  }
1982  return false;
1983}
1984
1985// Compute the register pressure contribution by this instruction by count up
1986// for uses that are not live and down for defs. Only count register classes
1987// that are already under high pressure. As a side effect, compute the number of
1988// uses of registers that are already live.
1989//
1990// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
1991// so could probably be factored.
1992int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
1993  LiveUses = 0;
1994  int PDiff = 0;
1995  for (const SDep &Pred : SU->Preds) {
1996    if (Pred.isCtrl())
1997      continue;
1998    SUnit *PredSU = Pred.getSUnit();
1999    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2000    // to cover the number of registers defined (they are all live).
2001    if (PredSU->NumRegDefsLeft == 0) {
2002      if (PredSU->getNode()->isMachineOpcode())
2003        ++LiveUses;
2004      continue;
2005    }
2006    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2007         RegDefPos.IsValid(); RegDefPos.Advance()) {
2008      MVT VT = RegDefPos.GetValue();
2009      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2010      if (RegPressure[RCId] >= RegLimit[RCId])
2011        ++PDiff;
2012    }
2013  }
2014  const SDNode *N = SU->getNode();
2015
2016  if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
2017    return PDiff;
2018
2019  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2020  for (unsigned i = 0; i != NumDefs; ++i) {
2021    MVT VT = N->getSimpleValueType(i);
2022    if (!N->hasAnyUseOfValue(i))
2023      continue;
2024    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2025    if (RegPressure[RCId] >= RegLimit[RCId])
2026      --PDiff;
2027  }
2028  return PDiff;
2029}
2030
2031void RegReductionPQBase::scheduledNode(SUnit *SU) {
2032  if (!TracksRegPressure)
2033    return;
2034
2035  if (!SU->getNode())
2036    return;
2037
2038  for (const SDep &Pred : SU->Preds) {
2039    if (Pred.isCtrl())
2040      continue;
2041    SUnit *PredSU = Pred.getSUnit();
2042    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2043    // to cover the number of registers defined (they are all live).
2044    if (PredSU->NumRegDefsLeft == 0) {
2045      continue;
2046    }
2047    // FIXME: The ScheduleDAG currently loses information about which of a
2048    // node's values is consumed by each dependence. Consequently, if the node
2049    // defines multiple register classes, we don't know which to pressurize
2050    // here. Instead the following loop consumes the register defs in an
2051    // arbitrary order. At least it handles the common case of clustered loads
2052    // to the same class. For precise liveness, each SDep needs to indicate the
2053    // result number. But that tightly couples the ScheduleDAG with the
2054    // SelectionDAG making updates tricky. A simpler hack would be to attach a
2055    // value type or register class to SDep.
2056    //
2057    // The most important aspect of register tracking is balancing the increase
2058    // here with the reduction further below. Note that this SU may use multiple
2059    // defs in PredSU. The can't be determined here, but we've already
2060    // compensated by reducing NumRegDefsLeft in PredSU during
2061    // ScheduleDAGSDNodes::AddSchedEdges.
2062    --PredSU->NumRegDefsLeft;
2063    unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
2064    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2065         RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2066      if (SkipRegDefs)
2067        continue;
2068
2069      unsigned RCId, Cost;
2070      GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2071      RegPressure[RCId] += Cost;
2072      break;
2073    }
2074  }
2075
2076  // We should have this assert, but there may be dead SDNodes that never
2077  // materialize as SUnits, so they don't appear to generate liveness.
2078  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
2079  int SkipRegDefs = (int)SU->NumRegDefsLeft;
2080  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
2081       RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2082    if (SkipRegDefs > 0)
2083      continue;
2084    unsigned RCId, Cost;
2085    GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2086    if (RegPressure[RCId] < Cost) {
2087      // Register pressure tracking is imprecise. This can happen. But we try
2088      // hard not to let it happen because it likely results in poor scheduling.
2089      DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") has too many regdefs\n");
2090      RegPressure[RCId] = 0;
2091    }
2092    else {
2093      RegPressure[RCId] -= Cost;
2094    }
2095  }
2096  dumpRegPressure();
2097}
2098
2099void RegReductionPQBase::unscheduledNode(SUnit *SU) {
2100  if (!TracksRegPressure)
2101    return;
2102
2103  const SDNode *N = SU->getNode();
2104  if (!N) return;
2105
2106  if (!N->isMachineOpcode()) {
2107    if (N->getOpcode() != ISD::CopyToReg)
2108      return;
2109  } else {
2110    unsigned Opc = N->getMachineOpcode();
2111    if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2112        Opc == TargetOpcode::INSERT_SUBREG ||
2113        Opc == TargetOpcode::SUBREG_TO_REG ||
2114        Opc == TargetOpcode::REG_SEQUENCE ||
2115        Opc == TargetOpcode::IMPLICIT_DEF)
2116      return;
2117  }
2118
2119  for (const SDep &Pred : SU->Preds) {
2120    if (Pred.isCtrl())
2121      continue;
2122    SUnit *PredSU = Pred.getSUnit();
2123    // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
2124    // counts data deps.
2125    if (PredSU->NumSuccsLeft != PredSU->Succs.size())
2126      continue;
2127    const SDNode *PN = PredSU->getNode();
2128    if (!PN->isMachineOpcode()) {
2129      if (PN->getOpcode() == ISD::CopyFromReg) {
2130        MVT VT = PN->getSimpleValueType(0);
2131        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2132        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2133      }
2134      continue;
2135    }
2136    unsigned POpc = PN->getMachineOpcode();
2137    if (POpc == TargetOpcode::IMPLICIT_DEF)
2138      continue;
2139    if (POpc == TargetOpcode::EXTRACT_SUBREG ||
2140        POpc == TargetOpcode::INSERT_SUBREG ||
2141        POpc == TargetOpcode::SUBREG_TO_REG) {
2142      MVT VT = PN->getSimpleValueType(0);
2143      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2144      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2145      continue;
2146    }
2147    unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
2148    for (unsigned i = 0; i != NumDefs; ++i) {
2149      MVT VT = PN->getSimpleValueType(i);
2150      if (!PN->hasAnyUseOfValue(i))
2151        continue;
2152      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2153      if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
2154        // Register pressure tracking is imprecise. This can happen.
2155        RegPressure[RCId] = 0;
2156      else
2157        RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
2158    }
2159  }
2160
2161  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2162  // may transfer data dependencies to CopyToReg.
2163  if (SU->NumSuccs && N->isMachineOpcode()) {
2164    unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2165    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2166      MVT VT = N->getSimpleValueType(i);
2167      if (VT == MVT::Glue || VT == MVT::Other)
2168        continue;
2169      if (!N->hasAnyUseOfValue(i))
2170        continue;
2171      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2172      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2173    }
2174  }
2175
2176  dumpRegPressure();
2177}
2178
2179//===----------------------------------------------------------------------===//
2180//           Dynamic Node Priority for Register Pressure Reduction
2181//===----------------------------------------------------------------------===//
2182
2183/// closestSucc - Returns the scheduled cycle of the successor which is
2184/// closest to the current cycle.
2185static unsigned closestSucc(const SUnit *SU) {
2186  unsigned MaxHeight = 0;
2187  for (const SDep &Succ : SU->Succs) {
2188    if (Succ.isCtrl()) continue;  // ignore chain succs
2189    unsigned Height = Succ.getSUnit()->getHeight();
2190    // If there are bunch of CopyToRegs stacked up, they should be considered
2191    // to be at the same position.
2192    if (Succ.getSUnit()->getNode() &&
2193        Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
2194      Height = closestSucc(Succ.getSUnit())+1;
2195    if (Height > MaxHeight)
2196      MaxHeight = Height;
2197  }
2198  return MaxHeight;
2199}
2200
2201/// calcMaxScratches - Returns an cost estimate of the worse case requirement
2202/// for scratch registers, i.e. number of data dependencies.
2203static unsigned calcMaxScratches(const SUnit *SU) {
2204  unsigned Scratches = 0;
2205  for (const SDep &Pred : SU->Preds) {
2206    if (Pred.isCtrl()) continue;  // ignore chain preds
2207    Scratches++;
2208  }
2209  return Scratches;
2210}
2211
2212/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2213/// CopyFromReg from a virtual register.
2214static bool hasOnlyLiveInOpers(const SUnit *SU) {
2215  bool RetVal = false;
2216  for (const SDep &Pred : SU->Preds) {
2217    if (Pred.isCtrl()) continue;
2218    const SUnit *PredSU = Pred.getSUnit();
2219    if (PredSU->getNode() &&
2220        PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
2221      unsigned Reg =
2222        cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
2223      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
2224        RetVal = true;
2225        continue;
2226      }
2227    }
2228    return false;
2229  }
2230  return RetVal;
2231}
2232
2233/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2234/// CopyToReg to a virtual register. This SU def is probably a liveout and
2235/// it has no other use. It should be scheduled closer to the terminator.
2236static bool hasOnlyLiveOutUses(const SUnit *SU) {
2237  bool RetVal = false;
2238  for (const SDep &Succ : SU->Succs) {
2239    if (Succ.isCtrl()) continue;
2240    const SUnit *SuccSU = Succ.getSUnit();
2241    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2242      unsigned Reg =
2243        cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2244      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
2245        RetVal = true;
2246        continue;
2247      }
2248    }
2249    return false;
2250  }
2251  return RetVal;
2252}
2253
2254// Set isVRegCycle for a node with only live in opers and live out uses. Also
2255// set isVRegCycle for its CopyFromReg operands.
2256//
2257// This is only relevant for single-block loops, in which case the VRegCycle
2258// node is likely an induction variable in which the operand and target virtual
2259// registers should be coalesced (e.g. pre/post increment values). Setting the
2260// isVRegCycle flag helps the scheduler prioritize other uses of the same
2261// CopyFromReg so that this node becomes the virtual register "kill". This
2262// avoids interference between the values live in and out of the block and
2263// eliminates a copy inside the loop.
2264static void initVRegCycle(SUnit *SU) {
2265  if (DisableSchedVRegCycle)
2266    return;
2267
2268  if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
2269    return;
2270
2271  DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2272
2273  SU->isVRegCycle = true;
2274
2275  for (const SDep &Pred : SU->Preds) {
2276    if (Pred.isCtrl()) continue;
2277    Pred.getSUnit()->isVRegCycle = true;
2278  }
2279}
2280
2281// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2282// CopyFromReg operands. We should no longer penalize other uses of this VReg.
2283static void resetVRegCycle(SUnit *SU) {
2284  if (!SU->isVRegCycle)
2285    return;
2286
2287  for (const SDep &Pred : SU->Preds) {
2288    if (Pred.isCtrl()) continue;  // ignore chain preds
2289    SUnit *PredSU = Pred.getSUnit();
2290    if (PredSU->isVRegCycle) {
2291      assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2292             "VRegCycle def must be CopyFromReg");
2293      Pred.getSUnit()->isVRegCycle = false;
2294    }
2295  }
2296}
2297
2298// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2299// means a node that defines the VRegCycle has not been scheduled yet.
2300static bool hasVRegCycleUse(const SUnit *SU) {
2301  // If this SU also defines the VReg, don't hoist it as a "use".
2302  if (SU->isVRegCycle)
2303    return false;
2304
2305  for (const SDep &Pred : SU->Preds) {
2306    if (Pred.isCtrl()) continue;  // ignore chain preds
2307    if (Pred.getSUnit()->isVRegCycle &&
2308        Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
2309      DEBUG(dbgs() << "  VReg cycle use: SU (" << SU->NodeNum << ")\n");
2310      return true;
2311    }
2312  }
2313  return false;
2314}
2315
2316// Check for either a dependence (latency) or resource (hazard) stall.
2317//
2318// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2319static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2320  if ((int)SPQ->getCurCycle() < Height) return true;
2321  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2322      != ScheduleHazardRecognizer::NoHazard)
2323    return true;
2324  return false;
2325}
2326
2327// Return -1 if left has higher priority, 1 if right has higher priority.
2328// Return 0 if latency-based priority is equivalent.
2329static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2330                            RegReductionPQBase *SPQ) {
2331  // Scheduling an instruction that uses a VReg whose postincrement has not yet
2332  // been scheduled will induce a copy. Model this as an extra cycle of latency.
2333  int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
2334  int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
2335  int LHeight = (int)left->getHeight() + LPenalty;
2336  int RHeight = (int)right->getHeight() + RPenalty;
2337
2338  bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
2339    BUHasStall(left, LHeight, SPQ);
2340  bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
2341    BUHasStall(right, RHeight, SPQ);
2342
2343  // If scheduling one of the node will cause a pipeline stall, delay it.
2344  // If scheduling either one of the node will cause a pipeline stall, sort
2345  // them according to their height.
2346  if (LStall) {
2347    if (!RStall)
2348      return 1;
2349    if (LHeight != RHeight)
2350      return LHeight > RHeight ? 1 : -1;
2351  } else if (RStall)
2352    return -1;
2353
2354  // If either node is scheduling for latency, sort them by height/depth
2355  // and latency.
2356  if (!checkPref || (left->SchedulingPref == Sched::ILP ||
2357                     right->SchedulingPref == Sched::ILP)) {
2358    // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
2359    // is enabled, grouping instructions by cycle, then its height is already
2360    // covered so only its depth matters. We also reach this point if both stall
2361    // but have the same height.
2362    if (!SPQ->getHazardRec()->isEnabled()) {
2363      if (LHeight != RHeight)
2364        return LHeight > RHeight ? 1 : -1;
2365    }
2366    int LDepth = left->getDepth() - LPenalty;
2367    int RDepth = right->getDepth() - RPenalty;
2368    if (LDepth != RDepth) {
2369      DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
2370            << ") depth " << LDepth << " vs SU (" << right->NodeNum
2371            << ") depth " << RDepth << "\n");
2372      return LDepth < RDepth ? 1 : -1;
2373    }
2374    if (left->Latency != right->Latency)
2375      return left->Latency > right->Latency ? 1 : -1;
2376  }
2377  return 0;
2378}
2379
2380static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2381  // Schedule physical register definitions close to their use. This is
2382  // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2383  // long as shortening physreg live ranges is generally good, we can defer
2384  // creating a subtarget hook.
2385  if (!DisableSchedPhysRegJoin) {
2386    bool LHasPhysReg = left->hasPhysRegDefs;
2387    bool RHasPhysReg = right->hasPhysRegDefs;
2388    if (LHasPhysReg != RHasPhysReg) {
2389      #ifndef NDEBUG
2390      static const char *const PhysRegMsg[] = { " has no physreg",
2391                                                " defines a physreg" };
2392      #endif
2393      DEBUG(dbgs() << "  SU (" << left->NodeNum << ") "
2394            << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
2395            << PhysRegMsg[RHasPhysReg] << "\n");
2396      return LHasPhysReg < RHasPhysReg;
2397    }
2398  }
2399
2400  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2401  unsigned LPriority = SPQ->getNodePriority(left);
2402  unsigned RPriority = SPQ->getNodePriority(right);
2403
2404  // Be really careful about hoisting call operands above previous calls.
2405  // Only allows it if it would reduce register pressure.
2406  if (left->isCall && right->isCallOp) {
2407    unsigned RNumVals = right->getNode()->getNumValues();
2408    RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2409  }
2410  if (right->isCall && left->isCallOp) {
2411    unsigned LNumVals = left->getNode()->getNumValues();
2412    LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2413  }
2414
2415  if (LPriority != RPriority)
2416    return LPriority > RPriority;
2417
2418  // One or both of the nodes are calls and their sethi-ullman numbers are the
2419  // same, then keep source order.
2420  if (left->isCall || right->isCall) {
2421    unsigned LOrder = SPQ->getNodeOrdering(left);
2422    unsigned ROrder = SPQ->getNodeOrdering(right);
2423
2424    // Prefer an ordering where the lower the non-zero order number, the higher
2425    // the preference.
2426    if ((LOrder || ROrder) && LOrder != ROrder)
2427      return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2428  }
2429
2430  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2431  // e.g.
2432  // t1 = op t2, c1
2433  // t3 = op t4, c2
2434  //
2435  // and the following instructions are both ready.
2436  // t2 = op c3
2437  // t4 = op c4
2438  //
2439  // Then schedule t2 = op first.
2440  // i.e.
2441  // t4 = op c4
2442  // t2 = op c3
2443  // t1 = op t2, c1
2444  // t3 = op t4, c2
2445  //
2446  // This creates more short live intervals.
2447  unsigned LDist = closestSucc(left);
2448  unsigned RDist = closestSucc(right);
2449  if (LDist != RDist)
2450    return LDist < RDist;
2451
2452  // How many registers becomes live when the node is scheduled.
2453  unsigned LScratch = calcMaxScratches(left);
2454  unsigned RScratch = calcMaxScratches(right);
2455  if (LScratch != RScratch)
2456    return LScratch > RScratch;
2457
2458  // Comparing latency against a call makes little sense unless the node
2459  // is register pressure-neutral.
2460  if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
2461    return (left->NodeQueueId > right->NodeQueueId);
2462
2463  // Do not compare latencies when one or both of the nodes are calls.
2464  if (!DisableSchedCycles &&
2465      !(left->isCall || right->isCall)) {
2466    int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2467    if (result != 0)
2468      return result > 0;
2469  }
2470  else {
2471    if (left->getHeight() != right->getHeight())
2472      return left->getHeight() > right->getHeight();
2473
2474    if (left->getDepth() != right->getDepth())
2475      return left->getDepth() < right->getDepth();
2476  }
2477
2478  assert(left->NodeQueueId && right->NodeQueueId &&
2479         "NodeQueueId cannot be zero");
2480  return (left->NodeQueueId > right->NodeQueueId);
2481}
2482
2483// Bottom up
2484bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2485  if (int res = checkSpecialNodes(left, right))
2486    return res > 0;
2487
2488  return BURRSort(left, right, SPQ);
2489}
2490
2491// Source order, otherwise bottom up.
2492bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2493  if (int res = checkSpecialNodes(left, right))
2494    return res > 0;
2495
2496  unsigned LOrder = SPQ->getNodeOrdering(left);
2497  unsigned ROrder = SPQ->getNodeOrdering(right);
2498
2499  // Prefer an ordering where the lower the non-zero order number, the higher
2500  // the preference.
2501  if ((LOrder || ROrder) && LOrder != ROrder)
2502    return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2503
2504  return BURRSort(left, right, SPQ);
2505}
2506
2507// If the time between now and when the instruction will be ready can cover
2508// the spill code, then avoid adding it to the ready queue. This gives long
2509// stalls highest priority and allows hoisting across calls. It should also
2510// speed up processing the available queue.
2511bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2512  static const unsigned ReadyDelay = 3;
2513
2514  if (SPQ->MayReduceRegPressure(SU)) return true;
2515
2516  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2517
2518  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2519      != ScheduleHazardRecognizer::NoHazard)
2520    return false;
2521
2522  return true;
2523}
2524
2525// Return true if right should be scheduled with higher priority than left.
2526bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2527  if (int res = checkSpecialNodes(left, right))
2528    return res > 0;
2529
2530  if (left->isCall || right->isCall)
2531    // No way to compute latency of calls.
2532    return BURRSort(left, right, SPQ);
2533
2534  bool LHigh = SPQ->HighRegPressure(left);
2535  bool RHigh = SPQ->HighRegPressure(right);
2536  // Avoid causing spills. If register pressure is high, schedule for
2537  // register pressure reduction.
2538  if (LHigh && !RHigh) {
2539    DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU("
2540          << right->NodeNum << ")\n");
2541    return true;
2542  }
2543  else if (!LHigh && RHigh) {
2544    DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU("
2545          << left->NodeNum << ")\n");
2546    return false;
2547  }
2548  if (!LHigh && !RHigh) {
2549    int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2550    if (result != 0)
2551      return result > 0;
2552  }
2553  return BURRSort(left, right, SPQ);
2554}
2555
2556// Schedule as many instructions in each cycle as possible. So don't make an
2557// instruction available unless it is ready in the current cycle.
2558bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2559  if (SU->getHeight() > CurCycle) return false;
2560
2561  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2562      != ScheduleHazardRecognizer::NoHazard)
2563    return false;
2564
2565  return true;
2566}
2567
2568static bool canEnableCoalescing(SUnit *SU) {
2569  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2570  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2571    // CopyToReg should be close to its uses to facilitate coalescing and
2572    // avoid spilling.
2573    return true;
2574
2575  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2576      Opc == TargetOpcode::SUBREG_TO_REG ||
2577      Opc == TargetOpcode::INSERT_SUBREG)
2578    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2579    // close to their uses to facilitate coalescing.
2580    return true;
2581
2582  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2583    // If SU does not have a register def, schedule it close to its uses
2584    // because it does not lengthen any live ranges.
2585    return true;
2586
2587  return false;
2588}
2589
2590// list-ilp is currently an experimental scheduler that allows various
2591// heuristics to be enabled prior to the normal register reduction logic.
2592bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2593  if (int res = checkSpecialNodes(left, right))
2594    return res > 0;
2595
2596  if (left->isCall || right->isCall)
2597    // No way to compute latency of calls.
2598    return BURRSort(left, right, SPQ);
2599
2600  unsigned LLiveUses = 0, RLiveUses = 0;
2601  int LPDiff = 0, RPDiff = 0;
2602  if (!DisableSchedRegPressure || !DisableSchedLiveUses) {
2603    LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2604    RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2605  }
2606  if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2607    DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
2608          << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
2609    return LPDiff > RPDiff;
2610  }
2611
2612  if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
2613    bool LReduce = canEnableCoalescing(left);
2614    bool RReduce = canEnableCoalescing(right);
2615    if (LReduce && !RReduce) return false;
2616    if (RReduce && !LReduce) return true;
2617  }
2618
2619  if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2620    DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2621          << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
2622    return LLiveUses < RLiveUses;
2623  }
2624
2625  if (!DisableSchedStalls) {
2626    bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2627    bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2628    if (LStall != RStall)
2629      return left->getHeight() > right->getHeight();
2630  }
2631
2632  if (!DisableSchedCriticalPath) {
2633    int spread = (int)left->getDepth() - (int)right->getDepth();
2634    if (std::abs(spread) > MaxReorderWindow) {
2635      DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2636            << left->getDepth() << " != SU(" << right->NodeNum << "): "
2637            << right->getDepth() << "\n");
2638      return left->getDepth() < right->getDepth();
2639    }
2640  }
2641
2642  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2643    int spread = (int)left->getHeight() - (int)right->getHeight();
2644    if (std::abs(spread) > MaxReorderWindow)
2645      return left->getHeight() > right->getHeight();
2646  }
2647
2648  return BURRSort(left, right, SPQ);
2649}
2650
2651void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2652  SUnits = &sunits;
2653  // Add pseudo dependency edges for two-address nodes.
2654  if (!Disable2AddrHack)
2655    AddPseudoTwoAddrDeps();
2656  // Reroute edges to nodes with multiple uses.
2657  if (!TracksRegPressure && !SrcOrder)
2658    PrescheduleNodesWithMultipleUses();
2659  // Calculate node priorities.
2660  CalculateSethiUllmanNumbers();
2661
2662  // For single block loops, mark nodes that look like canonical IV increments.
2663  if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB))
2664    for (SUnit &SU : sunits)
2665      initVRegCycle(&SU);
2666}
2667
2668//===----------------------------------------------------------------------===//
2669//                    Preschedule for Register Pressure
2670//===----------------------------------------------------------------------===//
2671
2672bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2673  if (SU->isTwoAddress) {
2674    unsigned Opc = SU->getNode()->getMachineOpcode();
2675    const MCInstrDesc &MCID = TII->get(Opc);
2676    unsigned NumRes = MCID.getNumDefs();
2677    unsigned NumOps = MCID.getNumOperands() - NumRes;
2678    for (unsigned i = 0; i != NumOps; ++i) {
2679      if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
2680        SDNode *DU = SU->getNode()->getOperand(i).getNode();
2681        if (DU->getNodeId() != -1 &&
2682            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2683          return true;
2684      }
2685    }
2686  }
2687  return false;
2688}
2689
2690/// canClobberReachingPhysRegUse - True if SU would clobber one of it's
2691/// successor's explicit physregs whose definition can reach DepSU.
2692/// i.e. DepSU should not be scheduled above SU.
2693static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
2694                                         ScheduleDAGRRList *scheduleDAG,
2695                                         const TargetInstrInfo *TII,
2696                                         const TargetRegisterInfo *TRI) {
2697  const MCPhysReg *ImpDefs
2698    = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
2699  const uint32_t *RegMask = getNodeRegMask(SU->getNode());
2700  if(!ImpDefs && !RegMask)
2701    return false;
2702
2703  for (const SDep &Succ : SU->Succs) {
2704    SUnit *SuccSU = Succ.getSUnit();
2705    for (const SDep &SuccPred : SuccSU->Preds) {
2706      if (!SuccPred.isAssignedRegDep())
2707        continue;
2708
2709      if (RegMask &&
2710          MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) &&
2711          scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2712        return true;
2713
2714      if (ImpDefs)
2715        for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
2716          // Return true if SU clobbers this physical register use and the
2717          // definition of the register reaches from DepSU. IsReachable queries
2718          // a topological forward sort of the DAG (following the successors).
2719          if (TRI->regsOverlap(*ImpDef, SuccPred.getReg()) &&
2720              scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2721            return true;
2722    }
2723  }
2724  return false;
2725}
2726
2727/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2728/// physical register defs.
2729static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2730                                  const TargetInstrInfo *TII,
2731                                  const TargetRegisterInfo *TRI) {
2732  SDNode *N = SuccSU->getNode();
2733  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2734  const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2735  assert(ImpDefs && "Caller should check hasPhysRegDefs");
2736  for (const SDNode *SUNode = SU->getNode(); SUNode;
2737       SUNode = SUNode->getGluedNode()) {
2738    if (!SUNode->isMachineOpcode())
2739      continue;
2740    const MCPhysReg *SUImpDefs =
2741      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2742    const uint32_t *SURegMask = getNodeRegMask(SUNode);
2743    if (!SUImpDefs && !SURegMask)
2744      continue;
2745    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2746      MVT VT = N->getSimpleValueType(i);
2747      if (VT == MVT::Glue || VT == MVT::Other)
2748        continue;
2749      if (!N->hasAnyUseOfValue(i))
2750        continue;
2751      unsigned Reg = ImpDefs[i - NumDefs];
2752      if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
2753        return true;
2754      if (!SUImpDefs)
2755        continue;
2756      for (;*SUImpDefs; ++SUImpDefs) {
2757        unsigned SUReg = *SUImpDefs;
2758        if (TRI->regsOverlap(Reg, SUReg))
2759          return true;
2760      }
2761    }
2762  }
2763  return false;
2764}
2765
2766/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2767/// are not handled well by the general register pressure reduction
2768/// heuristics. When presented with code like this:
2769///
2770///      N
2771///    / |
2772///   /  |
2773///  U  store
2774///  |
2775/// ...
2776///
2777/// the heuristics tend to push the store up, but since the
2778/// operand of the store has another use (U), this would increase
2779/// the length of that other use (the U->N edge).
2780///
2781/// This function transforms code like the above to route U's
2782/// dependence through the store when possible, like this:
2783///
2784///      N
2785///      ||
2786///      ||
2787///     store
2788///       |
2789///       U
2790///       |
2791///      ...
2792///
2793/// This results in the store being scheduled immediately
2794/// after N, which shortens the U->N live range, reducing
2795/// register pressure.
2796///
2797void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2798  // Visit all the nodes in topological order, working top-down.
2799  for (SUnit &SU : *SUnits) {
2800    // For now, only look at nodes with no data successors, such as stores.
2801    // These are especially important, due to the heuristics in
2802    // getNodePriority for nodes with no data successors.
2803    if (SU.NumSuccs != 0)
2804      continue;
2805    // For now, only look at nodes with exactly one data predecessor.
2806    if (SU.NumPreds != 1)
2807      continue;
2808    // Avoid prescheduling copies to virtual registers, which don't behave
2809    // like other nodes from the perspective of scheduling heuristics.
2810    if (SDNode *N = SU.getNode())
2811      if (N->getOpcode() == ISD::CopyToReg &&
2812          TargetRegisterInfo::isVirtualRegister
2813            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2814        continue;
2815
2816    // Locate the single data predecessor.
2817    SUnit *PredSU = nullptr;
2818    for (const SDep &Pred : SU.Preds)
2819      if (!Pred.isCtrl()) {
2820        PredSU = Pred.getSUnit();
2821        break;
2822      }
2823    assert(PredSU);
2824
2825    // Don't rewrite edges that carry physregs, because that requires additional
2826    // support infrastructure.
2827    if (PredSU->hasPhysRegDefs)
2828      continue;
2829    // Short-circuit the case where SU is PredSU's only data successor.
2830    if (PredSU->NumSuccs == 1)
2831      continue;
2832    // Avoid prescheduling to copies from virtual registers, which don't behave
2833    // like other nodes from the perspective of scheduling heuristics.
2834    if (SDNode *N = SU.getNode())
2835      if (N->getOpcode() == ISD::CopyFromReg &&
2836          TargetRegisterInfo::isVirtualRegister
2837            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2838        continue;
2839
2840    // Perform checks on the successors of PredSU.
2841    for (const SDep &PredSucc : PredSU->Succs) {
2842      SUnit *PredSuccSU = PredSucc.getSUnit();
2843      if (PredSuccSU == &SU) continue;
2844      // If PredSU has another successor with no data successors, for
2845      // now don't attempt to choose either over the other.
2846      if (PredSuccSU->NumSuccs == 0)
2847        goto outer_loop_continue;
2848      // Don't break physical register dependencies.
2849      if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
2850        if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI))
2851          goto outer_loop_continue;
2852      // Don't introduce graph cycles.
2853      if (scheduleDAG->IsReachable(&SU, PredSuccSU))
2854        goto outer_loop_continue;
2855    }
2856
2857    // Ok, the transformation is safe and the heuristics suggest it is
2858    // profitable. Update the graph.
2859    DEBUG(dbgs() << "    Prescheduling SU #" << SU.NodeNum
2860                 << " next to PredSU #" << PredSU->NodeNum
2861                 << " to guide scheduling in the presence of multiple uses\n");
2862    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
2863      SDep Edge = PredSU->Succs[i];
2864      assert(!Edge.isAssignedRegDep());
2865      SUnit *SuccSU = Edge.getSUnit();
2866      if (SuccSU != &SU) {
2867        Edge.setSUnit(PredSU);
2868        scheduleDAG->RemovePred(SuccSU, Edge);
2869        scheduleDAG->AddPred(&SU, Edge);
2870        Edge.setSUnit(&SU);
2871        scheduleDAG->AddPred(SuccSU, Edge);
2872        --i;
2873      }
2874    }
2875  outer_loop_continue:;
2876  }
2877}
2878
2879/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
2880/// it as a def&use operand. Add a pseudo control edge from it to the other
2881/// node (if it won't create a cycle) so the two-address one will be scheduled
2882/// first (lower in the schedule). If both nodes are two-address, favor the
2883/// one that has a CopyToReg use (more likely to be a loop induction update).
2884/// If both are two-address, but one is commutable while the other is not
2885/// commutable, favor the one that's not commutable.
2886void RegReductionPQBase::AddPseudoTwoAddrDeps() {
2887  for (SUnit &SU : *SUnits) {
2888    if (!SU.isTwoAddress)
2889      continue;
2890
2891    SDNode *Node = SU.getNode();
2892    if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode())
2893      continue;
2894
2895    bool isLiveOut = hasOnlyLiveOutUses(&SU);
2896    unsigned Opc = Node->getMachineOpcode();
2897    const MCInstrDesc &MCID = TII->get(Opc);
2898    unsigned NumRes = MCID.getNumDefs();
2899    unsigned NumOps = MCID.getNumOperands() - NumRes;
2900    for (unsigned j = 0; j != NumOps; ++j) {
2901      if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
2902        continue;
2903      SDNode *DU = SU.getNode()->getOperand(j).getNode();
2904      if (DU->getNodeId() == -1)
2905        continue;
2906      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
2907      if (!DUSU)
2908        continue;
2909      for (const SDep &Succ : DUSU->Succs) {
2910        if (Succ.isCtrl())
2911          continue;
2912        SUnit *SuccSU = Succ.getSUnit();
2913        if (SuccSU == &SU)
2914          continue;
2915        // Be conservative. Ignore if nodes aren't at roughly the same
2916        // depth and height.
2917        if (SuccSU->getHeight() < SU.getHeight() &&
2918            (SU.getHeight() - SuccSU->getHeight()) > 1)
2919          continue;
2920        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
2921        // constrains whatever is using the copy, instead of the copy
2922        // itself. In the case that the copy is coalesced, this
2923        // preserves the intent of the pseudo two-address heurietics.
2924        while (SuccSU->Succs.size() == 1 &&
2925               SuccSU->getNode()->isMachineOpcode() &&
2926               SuccSU->getNode()->getMachineOpcode() ==
2927                 TargetOpcode::COPY_TO_REGCLASS)
2928          SuccSU = SuccSU->Succs.front().getSUnit();
2929        // Don't constrain non-instruction nodes.
2930        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
2931          continue;
2932        // Don't constrain nodes with physical register defs if the
2933        // predecessor can clobber them.
2934        if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) {
2935          if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI))
2936            continue;
2937        }
2938        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
2939        // these may be coalesced away. We want them close to their uses.
2940        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
2941        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
2942            SuccOpc == TargetOpcode::INSERT_SUBREG ||
2943            SuccOpc == TargetOpcode::SUBREG_TO_REG)
2944          continue;
2945        if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) &&
2946            (!canClobber(SuccSU, DUSU) ||
2947             (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
2948             (!SU.isCommutable && SuccSU->isCommutable)) &&
2949            !scheduleDAG->IsReachable(SuccSU, &SU)) {
2950          DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
2951                       << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
2952          scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial));
2953        }
2954      }
2955    }
2956  }
2957}
2958
2959//===----------------------------------------------------------------------===//
2960//                         Public Constructor Functions
2961//===----------------------------------------------------------------------===//
2962
2963llvm::ScheduleDAGSDNodes *
2964llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
2965                                 CodeGenOpt::Level OptLevel) {
2966  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
2967  const TargetInstrInfo *TII = STI.getInstrInfo();
2968  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
2969
2970  BURegReductionPriorityQueue *PQ =
2971    new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
2972  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2973  PQ->setScheduleDAG(SD);
2974  return SD;
2975}
2976
2977llvm::ScheduleDAGSDNodes *
2978llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
2979                                   CodeGenOpt::Level OptLevel) {
2980  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
2981  const TargetInstrInfo *TII = STI.getInstrInfo();
2982  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
2983
2984  SrcRegReductionPriorityQueue *PQ =
2985    new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
2986  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2987  PQ->setScheduleDAG(SD);
2988  return SD;
2989}
2990
2991llvm::ScheduleDAGSDNodes *
2992llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
2993                                   CodeGenOpt::Level OptLevel) {
2994  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
2995  const TargetInstrInfo *TII = STI.getInstrInfo();
2996  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
2997  const TargetLowering *TLI = IS->TLI;
2998
2999  HybridBURRPriorityQueue *PQ =
3000    new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3001
3002  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3003  PQ->setScheduleDAG(SD);
3004  return SD;
3005}
3006
3007llvm::ScheduleDAGSDNodes *
3008llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
3009                                CodeGenOpt::Level OptLevel) {
3010  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3011  const TargetInstrInfo *TII = STI.getInstrInfo();
3012  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3013  const TargetLowering *TLI = IS->TLI;
3014
3015  ILPBURRPriorityQueue *PQ =
3016    new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3017  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3018  PQ->setScheduleDAG(SD);
3019  return SD;
3020}
3021