BlockFrequencyInfoImpl.cpp revision cd81d94322a39503e4a3e87b6ee03d4fcb3465fb
1//===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Loops should be simplified before this analysis.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
15#include "llvm/ADT/SCCIterator.h"
16#include "llvm/Support/raw_ostream.h"
17#include <deque>
18
19using namespace llvm;
20using namespace llvm::bfi_detail;
21
22#define DEBUG_TYPE "block-freq"
23
24//===----------------------------------------------------------------------===//
25//
26// BlockMass implementation.
27//
28//===----------------------------------------------------------------------===//
29ScaledNumber<uint64_t> BlockMass::toScaled() const {
30  if (isFull())
31    return ScaledNumber<uint64_t>(1, 0);
32  return ScaledNumber<uint64_t>(getMass() + 1, -64);
33}
34
35void BlockMass::dump() const { print(dbgs()); }
36
37static char getHexDigit(int N) {
38  assert(N < 16);
39  if (N < 10)
40    return '0' + N;
41  return 'a' + N - 10;
42}
43raw_ostream &BlockMass::print(raw_ostream &OS) const {
44  for (int Digits = 0; Digits < 16; ++Digits)
45    OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf);
46  return OS;
47}
48
49//===----------------------------------------------------------------------===//
50//
51// BlockFrequencyInfoImpl implementation.
52//
53//===----------------------------------------------------------------------===//
54namespace {
55
56typedef BlockFrequencyInfoImplBase::BlockNode BlockNode;
57typedef BlockFrequencyInfoImplBase::Distribution Distribution;
58typedef BlockFrequencyInfoImplBase::Distribution::WeightList WeightList;
59typedef BlockFrequencyInfoImplBase::Scaled64 Scaled64;
60typedef BlockFrequencyInfoImplBase::LoopData LoopData;
61typedef BlockFrequencyInfoImplBase::Weight Weight;
62typedef BlockFrequencyInfoImplBase::FrequencyData FrequencyData;
63
64/// \brief Dithering mass distributer.
65///
66/// This class splits up a single mass into portions by weight, dithering to
67/// spread out error.  No mass is lost.  The dithering precision depends on the
68/// precision of the product of \a BlockMass and \a BranchProbability.
69///
70/// The distribution algorithm follows.
71///
72///  1. Initialize by saving the sum of the weights in \a RemWeight and the
73///     mass to distribute in \a RemMass.
74///
75///  2. For each portion:
76///
77///      1. Construct a branch probability, P, as the portion's weight divided
78///         by the current value of \a RemWeight.
79///      2. Calculate the portion's mass as \a RemMass times P.
80///      3. Update \a RemWeight and \a RemMass at each portion by subtracting
81///         the current portion's weight and mass.
82struct DitheringDistributer {
83  uint32_t RemWeight;
84  BlockMass RemMass;
85
86  DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
87
88  BlockMass takeMass(uint32_t Weight);
89};
90}
91
92DitheringDistributer::DitheringDistributer(Distribution &Dist,
93                                           const BlockMass &Mass) {
94  Dist.normalize();
95  RemWeight = Dist.Total;
96  RemMass = Mass;
97}
98
99BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
100  assert(Weight && "invalid weight");
101  assert(Weight <= RemWeight);
102  BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight);
103
104  // Decrement totals (dither).
105  RemWeight -= Weight;
106  RemMass -= Mass;
107  return Mass;
108}
109
110void Distribution::add(const BlockNode &Node, uint64_t Amount,
111                       Weight::DistType Type) {
112  assert(Amount && "invalid weight of 0");
113  uint64_t NewTotal = Total + Amount;
114
115  // Check for overflow.  It should be impossible to overflow twice.
116  bool IsOverflow = NewTotal < Total;
117  assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow");
118  DidOverflow |= IsOverflow;
119
120  // Update the total.
121  Total = NewTotal;
122
123  // Save the weight.
124  Weight W;
125  W.TargetNode = Node;
126  W.Amount = Amount;
127  W.Type = Type;
128  Weights.push_back(W);
129}
130
131static void combineWeight(Weight &W, const Weight &OtherW) {
132  assert(OtherW.TargetNode.isValid());
133  if (!W.Amount) {
134    W = OtherW;
135    return;
136  }
137  assert(W.Type == OtherW.Type);
138  assert(W.TargetNode == OtherW.TargetNode);
139  assert(W.Amount < W.Amount + OtherW.Amount && "Unexpected overflow");
140  W.Amount += OtherW.Amount;
141}
142static void combineWeightsBySorting(WeightList &Weights) {
143  // Sort so edges to the same node are adjacent.
144  std::sort(Weights.begin(), Weights.end(),
145            [](const Weight &L,
146               const Weight &R) { return L.TargetNode < R.TargetNode; });
147
148  // Combine adjacent edges.
149  WeightList::iterator O = Weights.begin();
150  for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E;
151       ++O, (I = L)) {
152    *O = *I;
153
154    // Find the adjacent weights to the same node.
155    for (++L; L != E && I->TargetNode == L->TargetNode; ++L)
156      combineWeight(*O, *L);
157  }
158
159  // Erase extra entries.
160  Weights.erase(O, Weights.end());
161  return;
162}
163static void combineWeightsByHashing(WeightList &Weights) {
164  // Collect weights into a DenseMap.
165  typedef DenseMap<BlockNode::IndexType, Weight> HashTable;
166  HashTable Combined(NextPowerOf2(2 * Weights.size()));
167  for (const Weight &W : Weights)
168    combineWeight(Combined[W.TargetNode.Index], W);
169
170  // Check whether anything changed.
171  if (Weights.size() == Combined.size())
172    return;
173
174  // Fill in the new weights.
175  Weights.clear();
176  Weights.reserve(Combined.size());
177  for (const auto &I : Combined)
178    Weights.push_back(I.second);
179}
180static void combineWeights(WeightList &Weights) {
181  // Use a hash table for many successors to keep this linear.
182  if (Weights.size() > 128) {
183    combineWeightsByHashing(Weights);
184    return;
185  }
186
187  combineWeightsBySorting(Weights);
188}
189static uint64_t shiftRightAndRound(uint64_t N, int Shift) {
190  assert(Shift >= 0);
191  assert(Shift < 64);
192  if (!Shift)
193    return N;
194  return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1));
195}
196void Distribution::normalize() {
197  // Early exit for termination nodes.
198  if (Weights.empty())
199    return;
200
201  // Only bother if there are multiple successors.
202  if (Weights.size() > 1)
203    combineWeights(Weights);
204
205  // Early exit when combined into a single successor.
206  if (Weights.size() == 1) {
207    Total = 1;
208    Weights.front().Amount = 1;
209    return;
210  }
211
212  // Determine how much to shift right so that the total fits into 32-bits.
213  //
214  // If we shift at all, shift by 1 extra.  Otherwise, the lower limit of 1
215  // for each weight can cause a 32-bit overflow.
216  int Shift = 0;
217  if (DidOverflow)
218    Shift = 33;
219  else if (Total > UINT32_MAX)
220    Shift = 33 - countLeadingZeros(Total);
221
222  // Early exit if nothing needs to be scaled.
223  if (!Shift)
224    return;
225
226  // Recompute the total through accumulation (rather than shifting it) so that
227  // it's accurate after shifting.
228  Total = 0;
229
230  // Sum the weights to each node and shift right if necessary.
231  for (Weight &W : Weights) {
232    // Scale down below UINT32_MAX.  Since Shift is larger than necessary, we
233    // can round here without concern about overflow.
234    assert(W.TargetNode.isValid());
235    W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift));
236    assert(W.Amount <= UINT32_MAX);
237
238    // Update the total.
239    Total += W.Amount;
240  }
241  assert(Total <= UINT32_MAX);
242}
243
244void BlockFrequencyInfoImplBase::clear() {
245  // Swap with a default-constructed std::vector, since std::vector<>::clear()
246  // does not actually clear heap storage.
247  std::vector<FrequencyData>().swap(Freqs);
248  std::vector<WorkingData>().swap(Working);
249  Loops.clear();
250}
251
252/// \brief Clear all memory not needed downstream.
253///
254/// Releases all memory not used downstream.  In particular, saves Freqs.
255static void cleanup(BlockFrequencyInfoImplBase &BFI) {
256  std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
257  BFI.clear();
258  BFI.Freqs = std::move(SavedFreqs);
259}
260
261bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
262                                           const LoopData *OuterLoop,
263                                           const BlockNode &Pred,
264                                           const BlockNode &Succ,
265                                           uint64_t Weight) {
266  if (!Weight)
267    Weight = 1;
268
269  auto isLoopHeader = [&OuterLoop](const BlockNode &Node) {
270    return OuterLoop && OuterLoop->isHeader(Node);
271  };
272
273  BlockNode Resolved = Working[Succ.Index].getResolvedNode();
274
275#ifndef NDEBUG
276  auto debugSuccessor = [&](const char *Type) {
277    dbgs() << "  =>"
278           << " [" << Type << "] weight = " << Weight;
279    if (!isLoopHeader(Resolved))
280      dbgs() << ", succ = " << getBlockName(Succ);
281    if (Resolved != Succ)
282      dbgs() << ", resolved = " << getBlockName(Resolved);
283    dbgs() << "\n";
284  };
285  (void)debugSuccessor;
286#endif
287
288  if (isLoopHeader(Resolved)) {
289    DEBUG(debugSuccessor("backedge"));
290    Dist.addBackedge(OuterLoop->getHeader(), Weight);
291    return true;
292  }
293
294  if (Working[Resolved.Index].getContainingLoop() != OuterLoop) {
295    DEBUG(debugSuccessor("  exit  "));
296    Dist.addExit(Resolved, Weight);
297    return true;
298  }
299
300  if (Resolved < Pred) {
301    if (!isLoopHeader(Pred)) {
302      // If OuterLoop is an irreducible loop, we can't actually handle this.
303      assert((!OuterLoop || !OuterLoop->isIrreducible()) &&
304             "unhandled irreducible control flow");
305
306      // Irreducible backedge.  Abort.
307      DEBUG(debugSuccessor("abort!!!"));
308      return false;
309    }
310
311    // If "Pred" is a loop header, then this isn't really a backedge; rather,
312    // OuterLoop must be irreducible.  These false backedges can come only from
313    // secondary loop headers.
314    assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) &&
315           "unhandled irreducible control flow");
316  }
317
318  DEBUG(debugSuccessor(" local  "));
319  Dist.addLocal(Resolved, Weight);
320  return true;
321}
322
323bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
324    const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
325  // Copy the exit map into Dist.
326  for (const auto &I : Loop.Exits)
327    if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first,
328                   I.second.getMass()))
329      // Irreducible backedge.
330      return false;
331
332  return true;
333}
334
335/// \brief Get the maximum allowed loop scale.
336///
337/// Gives the maximum number of estimated iterations allowed for a loop.  Very
338/// large numbers cause problems downstream (even within 64-bits).
339static Scaled64 getMaxLoopScale() { return Scaled64(1, 12); }
340
341/// \brief Compute the loop scale for a loop.
342void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
343  // Compute loop scale.
344  DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
345
346  // LoopScale == 1 / ExitMass
347  // ExitMass == HeadMass - BackedgeMass
348  BlockMass ExitMass = BlockMass::getFull() - Loop.BackedgeMass;
349
350  // Block scale stores the inverse of the scale.
351  Loop.Scale = ExitMass.toScaled().inverse();
352
353  DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
354               << " - " << Loop.BackedgeMass << ")\n"
355               << " - scale = " << Loop.Scale << "\n");
356
357  if (Loop.Scale > getMaxLoopScale()) {
358    Loop.Scale = getMaxLoopScale();
359    DEBUG(dbgs() << " - reduced-to-max-scale: " << getMaxLoopScale() << "\n");
360  }
361}
362
363/// \brief Package up a loop.
364void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) {
365  DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n");
366
367  // Clear the subloop exits to prevent quadratic memory usage.
368  for (const BlockNode &M : Loop.Nodes) {
369    if (auto *Loop = Working[M.Index].getPackagedLoop())
370      Loop->Exits.clear();
371    DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n");
372  }
373  Loop.IsPackaged = true;
374}
375
376void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
377                                                LoopData *OuterLoop,
378                                                Distribution &Dist) {
379  BlockMass Mass = Working[Source.Index].getMass();
380  DEBUG(dbgs() << "  => mass:  " << Mass << "\n");
381
382  // Distribute mass to successors as laid out in Dist.
383  DitheringDistributer D(Dist, Mass);
384
385#ifndef NDEBUG
386  auto debugAssign = [&](const BlockNode &T, const BlockMass &M,
387                         const char *Desc) {
388    dbgs() << "  => assign " << M << " (" << D.RemMass << ")";
389    if (Desc)
390      dbgs() << " [" << Desc << "]";
391    if (T.isValid())
392      dbgs() << " to " << getBlockName(T);
393    dbgs() << "\n";
394  };
395  (void)debugAssign;
396#endif
397
398  for (const Weight &W : Dist.Weights) {
399    // Check for a local edge (non-backedge and non-exit).
400    BlockMass Taken = D.takeMass(W.Amount);
401    if (W.Type == Weight::Local) {
402      Working[W.TargetNode.Index].getMass() += Taken;
403      DEBUG(debugAssign(W.TargetNode, Taken, nullptr));
404      continue;
405    }
406
407    // Backedges and exits only make sense if we're processing a loop.
408    assert(OuterLoop && "backedge or exit outside of loop");
409
410    // Check for a backedge.
411    if (W.Type == Weight::Backedge) {
412      OuterLoop->BackedgeMass += Taken;
413      DEBUG(debugAssign(BlockNode(), Taken, "back"));
414      continue;
415    }
416
417    // This must be an exit.
418    assert(W.Type == Weight::Exit);
419    OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
420    DEBUG(debugAssign(W.TargetNode, Taken, "exit"));
421  }
422}
423
424static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
425                                     const Scaled64 &Min, const Scaled64 &Max) {
426  // Scale the Factor to a size that creates integers.  Ideally, integers would
427  // be scaled so that Max == UINT64_MAX so that they can be best
428  // differentiated.  However, the register allocator currently deals poorly
429  // with large numbers.  Instead, push Min up a little from 1 to give some
430  // room to differentiate small, unequal numbers.
431  //
432  // TODO: fix issues downstream so that ScalingFactor can be
433  // Scaled64(1,64)/Max.
434  Scaled64 ScalingFactor = Min.inverse();
435  if ((Max / Min).lg() < 60)
436    ScalingFactor <<= 3;
437
438  // Translate the floats to integers.
439  DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
440               << ", factor = " << ScalingFactor << "\n");
441  for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
442    Scaled64 Scaled = BFI.Freqs[Index].Scaled * ScalingFactor;
443    BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());
444    DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
445                 << BFI.Freqs[Index].Scaled << ", scaled = " << Scaled
446                 << ", int = " << BFI.Freqs[Index].Integer << "\n");
447  }
448}
449
450/// \brief Unwrap a loop package.
451///
452/// Visits all the members of a loop, adjusting their BlockData according to
453/// the loop's pseudo-node.
454static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
455  DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop)
456               << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
457               << "\n");
458  Loop.Scale *= Loop.Mass.toScaled();
459  Loop.IsPackaged = false;
460  DEBUG(dbgs() << "  => combined-scale = " << Loop.Scale << "\n");
461
462  // Propagate the head scale through the loop.  Since members are visited in
463  // RPO, the head scale will be updated by the loop scale first, and then the
464  // final head scale will be used for updated the rest of the members.
465  for (const BlockNode &N : Loop.Nodes) {
466    const auto &Working = BFI.Working[N.Index];
467    Scaled64 &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
468                                       : BFI.Freqs[N.Index].Scaled;
469    Scaled64 New = Loop.Scale * F;
470    DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New
471                 << "\n");
472    F = New;
473  }
474}
475
476void BlockFrequencyInfoImplBase::unwrapLoops() {
477  // Set initial frequencies from loop-local masses.
478  for (size_t Index = 0; Index < Working.size(); ++Index)
479    Freqs[Index].Scaled = Working[Index].Mass.toScaled();
480
481  for (LoopData &Loop : Loops)
482    unwrapLoop(*this, Loop);
483}
484
485void BlockFrequencyInfoImplBase::finalizeMetrics() {
486  // Unwrap loop packages in reverse post-order, tracking min and max
487  // frequencies.
488  auto Min = Scaled64::getLargest();
489  auto Max = Scaled64::getZero();
490  for (size_t Index = 0; Index < Working.size(); ++Index) {
491    // Update min/max scale.
492    Min = std::min(Min, Freqs[Index].Scaled);
493    Max = std::max(Max, Freqs[Index].Scaled);
494  }
495
496  // Convert to integers.
497  convertFloatingToInteger(*this, Min, Max);
498
499  // Clean up data structures.
500  cleanup(*this);
501
502  // Print out the final stats.
503  DEBUG(dump());
504}
505
506BlockFrequency
507BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const {
508  if (!Node.isValid())
509    return 0;
510  return Freqs[Node.Index].Integer;
511}
512Scaled64
513BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
514  if (!Node.isValid())
515    return Scaled64::getZero();
516  return Freqs[Node.Index].Scaled;
517}
518
519std::string
520BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const {
521  return std::string();
522}
523std::string
524BlockFrequencyInfoImplBase::getLoopName(const LoopData &Loop) const {
525  return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*");
526}
527
528raw_ostream &
529BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
530                                           const BlockNode &Node) const {
531  return OS << getFloatingBlockFreq(Node);
532}
533
534raw_ostream &
535BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
536                                           const BlockFrequency &Freq) const {
537  Scaled64 Block(Freq.getFrequency(), 0);
538  Scaled64 Entry(getEntryFreq(), 0);
539
540  return OS << Block / Entry;
541}
542
543void IrreducibleGraph::addNodesInLoop(const BFIBase::LoopData &OuterLoop) {
544  Start = OuterLoop.getHeader();
545  Nodes.reserve(OuterLoop.Nodes.size());
546  for (auto N : OuterLoop.Nodes)
547    addNode(N);
548  indexNodes();
549}
550void IrreducibleGraph::addNodesInFunction() {
551  Start = 0;
552  for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index)
553    if (!BFI.Working[Index].isPackaged())
554      addNode(Index);
555  indexNodes();
556}
557void IrreducibleGraph::indexNodes() {
558  for (auto &I : Nodes)
559    Lookup[I.Node.Index] = &I;
560}
561void IrreducibleGraph::addEdge(IrrNode &Irr, const BlockNode &Succ,
562                               const BFIBase::LoopData *OuterLoop) {
563  if (OuterLoop && OuterLoop->isHeader(Succ))
564    return;
565  auto L = Lookup.find(Succ.Index);
566  if (L == Lookup.end())
567    return;
568  IrrNode &SuccIrr = *L->second;
569  Irr.Edges.push_back(&SuccIrr);
570  SuccIrr.Edges.push_front(&Irr);
571  ++SuccIrr.NumIn;
572}
573
574namespace llvm {
575template <> struct GraphTraits<IrreducibleGraph> {
576  typedef bfi_detail::IrreducibleGraph GraphT;
577
578  typedef const GraphT::IrrNode NodeType;
579  typedef GraphT::IrrNode::iterator ChildIteratorType;
580
581  static const NodeType *getEntryNode(const GraphT &G) {
582    return G.StartIrr;
583  }
584  static ChildIteratorType child_begin(NodeType *N) { return N->succ_begin(); }
585  static ChildIteratorType child_end(NodeType *N) { return N->succ_end(); }
586};
587}
588
589/// \brief Find extra irreducible headers.
590///
591/// Find entry blocks and other blocks with backedges, which exist when \c G
592/// contains irreducible sub-SCCs.
593static void findIrreducibleHeaders(
594    const BlockFrequencyInfoImplBase &BFI,
595    const IrreducibleGraph &G,
596    const std::vector<const IrreducibleGraph::IrrNode *> &SCC,
597    LoopData::NodeList &Headers, LoopData::NodeList &Others) {
598  // Map from nodes in the SCC to whether it's an entry block.
599  SmallDenseMap<const IrreducibleGraph::IrrNode *, bool, 8> InSCC;
600
601  // InSCC also acts the set of nodes in the graph.  Seed it.
602  for (const auto *I : SCC)
603    InSCC[I] = false;
604
605  for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) {
606    auto &Irr = *I->first;
607    for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
608      if (InSCC.count(P))
609        continue;
610
611      // This is an entry block.
612      I->second = true;
613      Headers.push_back(Irr.Node);
614      DEBUG(dbgs() << "  => entry = " << BFI.getBlockName(Irr.Node) << "\n");
615      break;
616    }
617  }
618  assert(Headers.size() >= 2 && "Should be irreducible");
619  if (Headers.size() == InSCC.size()) {
620    // Every block is a header.
621    std::sort(Headers.begin(), Headers.end());
622    return;
623  }
624
625  // Look for extra headers from irreducible sub-SCCs.
626  for (const auto &I : InSCC) {
627    // Entry blocks are already headers.
628    if (I.second)
629      continue;
630
631    auto &Irr = *I.first;
632    for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
633      // Skip forward edges.
634      if (P->Node < Irr.Node)
635        continue;
636
637      // Skip predecessors from entry blocks.  These can have inverted
638      // ordering.
639      if (InSCC.lookup(P))
640        continue;
641
642      // Store the extra header.
643      Headers.push_back(Irr.Node);
644      DEBUG(dbgs() << "  => extra = " << BFI.getBlockName(Irr.Node) << "\n");
645      break;
646    }
647    if (Headers.back() == Irr.Node)
648      // Added this as a header.
649      continue;
650
651    // This is not a header.
652    Others.push_back(Irr.Node);
653    DEBUG(dbgs() << "  => other = " << BFI.getBlockName(Irr.Node) << "\n");
654  }
655  std::sort(Headers.begin(), Headers.end());
656  std::sort(Others.begin(), Others.end());
657}
658
659static void createIrreducibleLoop(
660    BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G,
661    LoopData *OuterLoop, std::list<LoopData>::iterator Insert,
662    const std::vector<const IrreducibleGraph::IrrNode *> &SCC) {
663  // Translate the SCC into RPO.
664  DEBUG(dbgs() << " - found-scc\n");
665
666  LoopData::NodeList Headers;
667  LoopData::NodeList Others;
668  findIrreducibleHeaders(BFI, G, SCC, Headers, Others);
669
670  auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(),
671                                Headers.end(), Others.begin(), Others.end());
672
673  // Update loop hierarchy.
674  for (const auto &N : Loop->Nodes)
675    if (BFI.Working[N.Index].isLoopHeader())
676      BFI.Working[N.Index].Loop->Parent = &*Loop;
677    else
678      BFI.Working[N.Index].Loop = &*Loop;
679}
680
681iterator_range<std::list<LoopData>::iterator>
682BlockFrequencyInfoImplBase::analyzeIrreducible(
683    const IrreducibleGraph &G, LoopData *OuterLoop,
684    std::list<LoopData>::iterator Insert) {
685  assert((OuterLoop == nullptr) == (Insert == Loops.begin()));
686  auto Prev = OuterLoop ? std::prev(Insert) : Loops.end();
687
688  for (auto I = scc_begin(G); !I.isAtEnd(); ++I) {
689    if (I->size() < 2)
690      continue;
691
692    // Translate the SCC into RPO.
693    createIrreducibleLoop(*this, G, OuterLoop, Insert, *I);
694  }
695
696  if (OuterLoop)
697    return make_range(std::next(Prev), Insert);
698  return make_range(Loops.begin(), Insert);
699}
700
701void
702BlockFrequencyInfoImplBase::updateLoopWithIrreducible(LoopData &OuterLoop) {
703  OuterLoop.Exits.clear();
704  OuterLoop.BackedgeMass = BlockMass::getEmpty();
705  auto O = OuterLoop.Nodes.begin() + 1;
706  for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I)
707    if (!Working[I->Index].isPackaged())
708      *O++ = *I;
709  OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end());
710}
711