1//===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Loops should be simplified before this analysis.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
15#include "llvm/ADT/SCCIterator.h"
16#include "llvm/IR/Function.h"
17#include "llvm/Support/raw_ostream.h"
18#include <numeric>
19
20using namespace llvm;
21using namespace llvm::bfi_detail;
22
23#define DEBUG_TYPE "block-freq"
24
25ScaledNumber<uint64_t> BlockMass::toScaled() const {
26  if (isFull())
27    return ScaledNumber<uint64_t>(1, 0);
28  return ScaledNumber<uint64_t>(getMass() + 1, -64);
29}
30
31LLVM_DUMP_METHOD void BlockMass::dump() const { print(dbgs()); }
32
33static char getHexDigit(int N) {
34  assert(N < 16);
35  if (N < 10)
36    return '0' + N;
37  return 'a' + N - 10;
38}
39
40raw_ostream &BlockMass::print(raw_ostream &OS) const {
41  for (int Digits = 0; Digits < 16; ++Digits)
42    OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf);
43  return OS;
44}
45
46namespace {
47
48typedef BlockFrequencyInfoImplBase::BlockNode BlockNode;
49typedef BlockFrequencyInfoImplBase::Distribution Distribution;
50typedef BlockFrequencyInfoImplBase::Distribution::WeightList WeightList;
51typedef BlockFrequencyInfoImplBase::Scaled64 Scaled64;
52typedef BlockFrequencyInfoImplBase::LoopData LoopData;
53typedef BlockFrequencyInfoImplBase::Weight Weight;
54typedef BlockFrequencyInfoImplBase::FrequencyData FrequencyData;
55
56/// \brief Dithering mass distributer.
57///
58/// This class splits up a single mass into portions by weight, dithering to
59/// spread out error.  No mass is lost.  The dithering precision depends on the
60/// precision of the product of \a BlockMass and \a BranchProbability.
61///
62/// The distribution algorithm follows.
63///
64///  1. Initialize by saving the sum of the weights in \a RemWeight and the
65///     mass to distribute in \a RemMass.
66///
67///  2. For each portion:
68///
69///      1. Construct a branch probability, P, as the portion's weight divided
70///         by the current value of \a RemWeight.
71///      2. Calculate the portion's mass as \a RemMass times P.
72///      3. Update \a RemWeight and \a RemMass at each portion by subtracting
73///         the current portion's weight and mass.
74struct DitheringDistributer {
75  uint32_t RemWeight;
76  BlockMass RemMass;
77
78  DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
79
80  BlockMass takeMass(uint32_t Weight);
81};
82
83} // end anonymous namespace
84
85DitheringDistributer::DitheringDistributer(Distribution &Dist,
86                                           const BlockMass &Mass) {
87  Dist.normalize();
88  RemWeight = Dist.Total;
89  RemMass = Mass;
90}
91
92BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
93  assert(Weight && "invalid weight");
94  assert(Weight <= RemWeight);
95  BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight);
96
97  // Decrement totals (dither).
98  RemWeight -= Weight;
99  RemMass -= Mass;
100  return Mass;
101}
102
103void Distribution::add(const BlockNode &Node, uint64_t Amount,
104                       Weight::DistType Type) {
105  assert(Amount && "invalid weight of 0");
106  uint64_t NewTotal = Total + Amount;
107
108  // Check for overflow.  It should be impossible to overflow twice.
109  bool IsOverflow = NewTotal < Total;
110  assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow");
111  DidOverflow |= IsOverflow;
112
113  // Update the total.
114  Total = NewTotal;
115
116  // Save the weight.
117  Weights.push_back(Weight(Type, Node, Amount));
118}
119
120static void combineWeight(Weight &W, const Weight &OtherW) {
121  assert(OtherW.TargetNode.isValid());
122  if (!W.Amount) {
123    W = OtherW;
124    return;
125  }
126  assert(W.Type == OtherW.Type);
127  assert(W.TargetNode == OtherW.TargetNode);
128  assert(OtherW.Amount && "Expected non-zero weight");
129  if (W.Amount > W.Amount + OtherW.Amount)
130    // Saturate on overflow.
131    W.Amount = UINT64_MAX;
132  else
133    W.Amount += OtherW.Amount;
134}
135
136static void combineWeightsBySorting(WeightList &Weights) {
137  // Sort so edges to the same node are adjacent.
138  std::sort(Weights.begin(), Weights.end(),
139            [](const Weight &L,
140               const Weight &R) { return L.TargetNode < R.TargetNode; });
141
142  // Combine adjacent edges.
143  WeightList::iterator O = Weights.begin();
144  for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E;
145       ++O, (I = L)) {
146    *O = *I;
147
148    // Find the adjacent weights to the same node.
149    for (++L; L != E && I->TargetNode == L->TargetNode; ++L)
150      combineWeight(*O, *L);
151  }
152
153  // Erase extra entries.
154  Weights.erase(O, Weights.end());
155}
156
157static void combineWeightsByHashing(WeightList &Weights) {
158  // Collect weights into a DenseMap.
159  typedef DenseMap<BlockNode::IndexType, Weight> HashTable;
160  HashTable Combined(NextPowerOf2(2 * Weights.size()));
161  for (const Weight &W : Weights)
162    combineWeight(Combined[W.TargetNode.Index], W);
163
164  // Check whether anything changed.
165  if (Weights.size() == Combined.size())
166    return;
167
168  // Fill in the new weights.
169  Weights.clear();
170  Weights.reserve(Combined.size());
171  for (const auto &I : Combined)
172    Weights.push_back(I.second);
173}
174
175static void combineWeights(WeightList &Weights) {
176  // Use a hash table for many successors to keep this linear.
177  if (Weights.size() > 128) {
178    combineWeightsByHashing(Weights);
179    return;
180  }
181
182  combineWeightsBySorting(Weights);
183}
184
185static uint64_t shiftRightAndRound(uint64_t N, int Shift) {
186  assert(Shift >= 0);
187  assert(Shift < 64);
188  if (!Shift)
189    return N;
190  return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1));
191}
192
193void Distribution::normalize() {
194  // Early exit for termination nodes.
195  if (Weights.empty())
196    return;
197
198  // Only bother if there are multiple successors.
199  if (Weights.size() > 1)
200    combineWeights(Weights);
201
202  // Early exit when combined into a single successor.
203  if (Weights.size() == 1) {
204    Total = 1;
205    Weights.front().Amount = 1;
206    return;
207  }
208
209  // Determine how much to shift right so that the total fits into 32-bits.
210  //
211  // If we shift at all, shift by 1 extra.  Otherwise, the lower limit of 1
212  // for each weight can cause a 32-bit overflow.
213  int Shift = 0;
214  if (DidOverflow)
215    Shift = 33;
216  else if (Total > UINT32_MAX)
217    Shift = 33 - countLeadingZeros(Total);
218
219  // Early exit if nothing needs to be scaled.
220  if (!Shift) {
221    // If we didn't overflow then combineWeights() shouldn't have changed the
222    // sum of the weights, but let's double-check.
223    assert(Total == std::accumulate(Weights.begin(), Weights.end(), UINT64_C(0),
224                                    [](uint64_t Sum, const Weight &W) {
225                      return Sum + W.Amount;
226                    }) &&
227           "Expected total to be correct");
228    return;
229  }
230
231  // Recompute the total through accumulation (rather than shifting it) so that
232  // it's accurate after shifting and any changes combineWeights() made above.
233  Total = 0;
234
235  // Sum the weights to each node and shift right if necessary.
236  for (Weight &W : Weights) {
237    // Scale down below UINT32_MAX.  Since Shift is larger than necessary, we
238    // can round here without concern about overflow.
239    assert(W.TargetNode.isValid());
240    W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift));
241    assert(W.Amount <= UINT32_MAX);
242
243    // Update the total.
244    Total += W.Amount;
245  }
246  assert(Total <= UINT32_MAX);
247}
248
249void BlockFrequencyInfoImplBase::clear() {
250  // Swap with a default-constructed std::vector, since std::vector<>::clear()
251  // does not actually clear heap storage.
252  std::vector<FrequencyData>().swap(Freqs);
253  std::vector<WorkingData>().swap(Working);
254  Loops.clear();
255}
256
257/// \brief Clear all memory not needed downstream.
258///
259/// Releases all memory not used downstream.  In particular, saves Freqs.
260static void cleanup(BlockFrequencyInfoImplBase &BFI) {
261  std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
262  BFI.clear();
263  BFI.Freqs = std::move(SavedFreqs);
264}
265
266bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
267                                           const LoopData *OuterLoop,
268                                           const BlockNode &Pred,
269                                           const BlockNode &Succ,
270                                           uint64_t Weight) {
271  if (!Weight)
272    Weight = 1;
273
274  auto isLoopHeader = [&OuterLoop](const BlockNode &Node) {
275    return OuterLoop && OuterLoop->isHeader(Node);
276  };
277
278  BlockNode Resolved = Working[Succ.Index].getResolvedNode();
279
280#ifndef NDEBUG
281  auto debugSuccessor = [&](const char *Type) {
282    dbgs() << "  =>"
283           << " [" << Type << "] weight = " << Weight;
284    if (!isLoopHeader(Resolved))
285      dbgs() << ", succ = " << getBlockName(Succ);
286    if (Resolved != Succ)
287      dbgs() << ", resolved = " << getBlockName(Resolved);
288    dbgs() << "\n";
289  };
290  (void)debugSuccessor;
291#endif
292
293  if (isLoopHeader(Resolved)) {
294    DEBUG(debugSuccessor("backedge"));
295    Dist.addBackedge(Resolved, Weight);
296    return true;
297  }
298
299  if (Working[Resolved.Index].getContainingLoop() != OuterLoop) {
300    DEBUG(debugSuccessor("  exit  "));
301    Dist.addExit(Resolved, Weight);
302    return true;
303  }
304
305  if (Resolved < Pred) {
306    if (!isLoopHeader(Pred)) {
307      // If OuterLoop is an irreducible loop, we can't actually handle this.
308      assert((!OuterLoop || !OuterLoop->isIrreducible()) &&
309             "unhandled irreducible control flow");
310
311      // Irreducible backedge.  Abort.
312      DEBUG(debugSuccessor("abort!!!"));
313      return false;
314    }
315
316    // If "Pred" is a loop header, then this isn't really a backedge; rather,
317    // OuterLoop must be irreducible.  These false backedges can come only from
318    // secondary loop headers.
319    assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) &&
320           "unhandled irreducible control flow");
321  }
322
323  DEBUG(debugSuccessor(" local  "));
324  Dist.addLocal(Resolved, Weight);
325  return true;
326}
327
328bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
329    const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
330  // Copy the exit map into Dist.
331  for (const auto &I : Loop.Exits)
332    if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first,
333                   I.second.getMass()))
334      // Irreducible backedge.
335      return false;
336
337  return true;
338}
339
340/// \brief Compute the loop scale for a loop.
341void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
342  // Compute loop scale.
343  DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
344
345  // Infinite loops need special handling. If we give the back edge an infinite
346  // mass, they may saturate all the other scales in the function down to 1,
347  // making all the other region temperatures look exactly the same. Choose an
348  // arbitrary scale to avoid these issues.
349  //
350  // FIXME: An alternate way would be to select a symbolic scale which is later
351  // replaced to be the maximum of all computed scales plus 1. This would
352  // appropriately describe the loop as having a large scale, without skewing
353  // the final frequency computation.
354  const Scaled64 InfiniteLoopScale(1, 12);
355
356  // LoopScale == 1 / ExitMass
357  // ExitMass == HeadMass - BackedgeMass
358  BlockMass TotalBackedgeMass;
359  for (auto &Mass : Loop.BackedgeMass)
360    TotalBackedgeMass += Mass;
361  BlockMass ExitMass = BlockMass::getFull() - TotalBackedgeMass;
362
363  // Block scale stores the inverse of the scale. If this is an infinite loop,
364  // its exit mass will be zero. In this case, use an arbitrary scale for the
365  // loop scale.
366  Loop.Scale =
367      ExitMass.isEmpty() ? InfiniteLoopScale : ExitMass.toScaled().inverse();
368
369  DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
370               << " - " << TotalBackedgeMass << ")\n"
371               << " - scale = " << Loop.Scale << "\n");
372}
373
374/// \brief Package up a loop.
375void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) {
376  DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n");
377
378  // Clear the subloop exits to prevent quadratic memory usage.
379  for (const BlockNode &M : Loop.Nodes) {
380    if (auto *Loop = Working[M.Index].getPackagedLoop())
381      Loop->Exits.clear();
382    DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n");
383  }
384  Loop.IsPackaged = true;
385}
386
387#ifndef NDEBUG
388static void debugAssign(const BlockFrequencyInfoImplBase &BFI,
389                        const DitheringDistributer &D, const BlockNode &T,
390                        const BlockMass &M, const char *Desc) {
391  dbgs() << "  => assign " << M << " (" << D.RemMass << ")";
392  if (Desc)
393    dbgs() << " [" << Desc << "]";
394  if (T.isValid())
395    dbgs() << " to " << BFI.getBlockName(T);
396  dbgs() << "\n";
397}
398#endif
399
400void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
401                                                LoopData *OuterLoop,
402                                                Distribution &Dist) {
403  BlockMass Mass = Working[Source.Index].getMass();
404  DEBUG(dbgs() << "  => mass:  " << Mass << "\n");
405
406  // Distribute mass to successors as laid out in Dist.
407  DitheringDistributer D(Dist, Mass);
408
409  for (const Weight &W : Dist.Weights) {
410    // Check for a local edge (non-backedge and non-exit).
411    BlockMass Taken = D.takeMass(W.Amount);
412    if (W.Type == Weight::Local) {
413      Working[W.TargetNode.Index].getMass() += Taken;
414      DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
415      continue;
416    }
417
418    // Backedges and exits only make sense if we're processing a loop.
419    assert(OuterLoop && "backedge or exit outside of loop");
420
421    // Check for a backedge.
422    if (W.Type == Weight::Backedge) {
423      OuterLoop->BackedgeMass[OuterLoop->getHeaderIndex(W.TargetNode)] += Taken;
424      DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "back"));
425      continue;
426    }
427
428    // This must be an exit.
429    assert(W.Type == Weight::Exit);
430    OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
431    DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "exit"));
432  }
433}
434
435static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
436                                     const Scaled64 &Min, const Scaled64 &Max) {
437  // Scale the Factor to a size that creates integers.  Ideally, integers would
438  // be scaled so that Max == UINT64_MAX so that they can be best
439  // differentiated.  However, in the presence of large frequency values, small
440  // frequencies are scaled down to 1, making it impossible to differentiate
441  // small, unequal numbers. When the spread between Min and Max frequencies
442  // fits well within MaxBits, we make the scale be at least 8.
443  const unsigned MaxBits = 64;
444  const unsigned SpreadBits = (Max / Min).lg();
445  Scaled64 ScalingFactor;
446  if (SpreadBits <= MaxBits - 3) {
447    // If the values are small enough, make the scaling factor at least 8 to
448    // allow distinguishing small values.
449    ScalingFactor = Min.inverse();
450    ScalingFactor <<= 3;
451  } else {
452    // If the values need more than MaxBits to be represented, saturate small
453    // frequency values down to 1 by using a scaling factor that benefits large
454    // frequency values.
455    ScalingFactor = Scaled64(1, MaxBits) / Max;
456  }
457
458  // Translate the floats to integers.
459  DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
460               << ", factor = " << ScalingFactor << "\n");
461  for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
462    Scaled64 Scaled = BFI.Freqs[Index].Scaled * ScalingFactor;
463    BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());
464    DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
465                 << BFI.Freqs[Index].Scaled << ", scaled = " << Scaled
466                 << ", int = " << BFI.Freqs[Index].Integer << "\n");
467  }
468}
469
470/// \brief Unwrap a loop package.
471///
472/// Visits all the members of a loop, adjusting their BlockData according to
473/// the loop's pseudo-node.
474static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
475  DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop)
476               << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
477               << "\n");
478  Loop.Scale *= Loop.Mass.toScaled();
479  Loop.IsPackaged = false;
480  DEBUG(dbgs() << "  => combined-scale = " << Loop.Scale << "\n");
481
482  // Propagate the head scale through the loop.  Since members are visited in
483  // RPO, the head scale will be updated by the loop scale first, and then the
484  // final head scale will be used for updated the rest of the members.
485  for (const BlockNode &N : Loop.Nodes) {
486    const auto &Working = BFI.Working[N.Index];
487    Scaled64 &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
488                                       : BFI.Freqs[N.Index].Scaled;
489    Scaled64 New = Loop.Scale * F;
490    DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New
491                 << "\n");
492    F = New;
493  }
494}
495
496void BlockFrequencyInfoImplBase::unwrapLoops() {
497  // Set initial frequencies from loop-local masses.
498  for (size_t Index = 0; Index < Working.size(); ++Index)
499    Freqs[Index].Scaled = Working[Index].Mass.toScaled();
500
501  for (LoopData &Loop : Loops)
502    unwrapLoop(*this, Loop);
503}
504
505void BlockFrequencyInfoImplBase::finalizeMetrics() {
506  // Unwrap loop packages in reverse post-order, tracking min and max
507  // frequencies.
508  auto Min = Scaled64::getLargest();
509  auto Max = Scaled64::getZero();
510  for (size_t Index = 0; Index < Working.size(); ++Index) {
511    // Update min/max scale.
512    Min = std::min(Min, Freqs[Index].Scaled);
513    Max = std::max(Max, Freqs[Index].Scaled);
514  }
515
516  // Convert to integers.
517  convertFloatingToInteger(*this, Min, Max);
518
519  // Clean up data structures.
520  cleanup(*this);
521
522  // Print out the final stats.
523  DEBUG(dump());
524}
525
526BlockFrequency
527BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const {
528  if (!Node.isValid())
529    return 0;
530  return Freqs[Node.Index].Integer;
531}
532
533Optional<uint64_t>
534BlockFrequencyInfoImplBase::getBlockProfileCount(const Function &F,
535                                                 const BlockNode &Node) const {
536  auto EntryCount = F.getEntryCount();
537  if (!EntryCount)
538    return None;
539  // Use 128 bit APInt to do the arithmetic to avoid overflow.
540  APInt BlockCount(128, EntryCount.getValue());
541  APInt BlockFreq(128, getBlockFreq(Node).getFrequency());
542  APInt EntryFreq(128, getEntryFreq());
543  BlockCount *= BlockFreq;
544  BlockCount = BlockCount.udiv(EntryFreq);
545  return BlockCount.getLimitedValue();
546}
547
548Scaled64
549BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
550  if (!Node.isValid())
551    return Scaled64::getZero();
552  return Freqs[Node.Index].Scaled;
553}
554
555void BlockFrequencyInfoImplBase::setBlockFreq(const BlockNode &Node,
556                                              uint64_t Freq) {
557  assert(Node.isValid() && "Expected valid node");
558  assert(Node.Index < Freqs.size() && "Expected legal index");
559  Freqs[Node.Index].Integer = Freq;
560}
561
562std::string
563BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const {
564  return std::string();
565}
566
567std::string
568BlockFrequencyInfoImplBase::getLoopName(const LoopData &Loop) const {
569  return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*");
570}
571
572raw_ostream &
573BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
574                                           const BlockNode &Node) const {
575  return OS << getFloatingBlockFreq(Node);
576}
577
578raw_ostream &
579BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
580                                           const BlockFrequency &Freq) const {
581  Scaled64 Block(Freq.getFrequency(), 0);
582  Scaled64 Entry(getEntryFreq(), 0);
583
584  return OS << Block / Entry;
585}
586
587void IrreducibleGraph::addNodesInLoop(const BFIBase::LoopData &OuterLoop) {
588  Start = OuterLoop.getHeader();
589  Nodes.reserve(OuterLoop.Nodes.size());
590  for (auto N : OuterLoop.Nodes)
591    addNode(N);
592  indexNodes();
593}
594
595void IrreducibleGraph::addNodesInFunction() {
596  Start = 0;
597  for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index)
598    if (!BFI.Working[Index].isPackaged())
599      addNode(Index);
600  indexNodes();
601}
602
603void IrreducibleGraph::indexNodes() {
604  for (auto &I : Nodes)
605    Lookup[I.Node.Index] = &I;
606}
607
608void IrreducibleGraph::addEdge(IrrNode &Irr, const BlockNode &Succ,
609                               const BFIBase::LoopData *OuterLoop) {
610  if (OuterLoop && OuterLoop->isHeader(Succ))
611    return;
612  auto L = Lookup.find(Succ.Index);
613  if (L == Lookup.end())
614    return;
615  IrrNode &SuccIrr = *L->second;
616  Irr.Edges.push_back(&SuccIrr);
617  SuccIrr.Edges.push_front(&Irr);
618  ++SuccIrr.NumIn;
619}
620
621namespace llvm {
622template <> struct GraphTraits<IrreducibleGraph> {
623  typedef bfi_detail::IrreducibleGraph GraphT;
624
625  typedef const GraphT::IrrNode NodeType;
626  typedef GraphT::IrrNode::iterator ChildIteratorType;
627
628  static const NodeType *getEntryNode(const GraphT &G) {
629    return G.StartIrr;
630  }
631  static ChildIteratorType child_begin(NodeType *N) { return N->succ_begin(); }
632  static ChildIteratorType child_end(NodeType *N) { return N->succ_end(); }
633};
634} // end namespace llvm
635
636/// \brief Find extra irreducible headers.
637///
638/// Find entry blocks and other blocks with backedges, which exist when \c G
639/// contains irreducible sub-SCCs.
640static void findIrreducibleHeaders(
641    const BlockFrequencyInfoImplBase &BFI,
642    const IrreducibleGraph &G,
643    const std::vector<const IrreducibleGraph::IrrNode *> &SCC,
644    LoopData::NodeList &Headers, LoopData::NodeList &Others) {
645  // Map from nodes in the SCC to whether it's an entry block.
646  SmallDenseMap<const IrreducibleGraph::IrrNode *, bool, 8> InSCC;
647
648  // InSCC also acts the set of nodes in the graph.  Seed it.
649  for (const auto *I : SCC)
650    InSCC[I] = false;
651
652  for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) {
653    auto &Irr = *I->first;
654    for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
655      if (InSCC.count(P))
656        continue;
657
658      // This is an entry block.
659      I->second = true;
660      Headers.push_back(Irr.Node);
661      DEBUG(dbgs() << "  => entry = " << BFI.getBlockName(Irr.Node) << "\n");
662      break;
663    }
664  }
665  assert(Headers.size() >= 2 &&
666         "Expected irreducible CFG; -loop-info is likely invalid");
667  if (Headers.size() == InSCC.size()) {
668    // Every block is a header.
669    std::sort(Headers.begin(), Headers.end());
670    return;
671  }
672
673  // Look for extra headers from irreducible sub-SCCs.
674  for (const auto &I : InSCC) {
675    // Entry blocks are already headers.
676    if (I.second)
677      continue;
678
679    auto &Irr = *I.first;
680    for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
681      // Skip forward edges.
682      if (P->Node < Irr.Node)
683        continue;
684
685      // Skip predecessors from entry blocks.  These can have inverted
686      // ordering.
687      if (InSCC.lookup(P))
688        continue;
689
690      // Store the extra header.
691      Headers.push_back(Irr.Node);
692      DEBUG(dbgs() << "  => extra = " << BFI.getBlockName(Irr.Node) << "\n");
693      break;
694    }
695    if (Headers.back() == Irr.Node)
696      // Added this as a header.
697      continue;
698
699    // This is not a header.
700    Others.push_back(Irr.Node);
701    DEBUG(dbgs() << "  => other = " << BFI.getBlockName(Irr.Node) << "\n");
702  }
703  std::sort(Headers.begin(), Headers.end());
704  std::sort(Others.begin(), Others.end());
705}
706
707static void createIrreducibleLoop(
708    BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G,
709    LoopData *OuterLoop, std::list<LoopData>::iterator Insert,
710    const std::vector<const IrreducibleGraph::IrrNode *> &SCC) {
711  // Translate the SCC into RPO.
712  DEBUG(dbgs() << " - found-scc\n");
713
714  LoopData::NodeList Headers;
715  LoopData::NodeList Others;
716  findIrreducibleHeaders(BFI, G, SCC, Headers, Others);
717
718  auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(),
719                                Headers.end(), Others.begin(), Others.end());
720
721  // Update loop hierarchy.
722  for (const auto &N : Loop->Nodes)
723    if (BFI.Working[N.Index].isLoopHeader())
724      BFI.Working[N.Index].Loop->Parent = &*Loop;
725    else
726      BFI.Working[N.Index].Loop = &*Loop;
727}
728
729iterator_range<std::list<LoopData>::iterator>
730BlockFrequencyInfoImplBase::analyzeIrreducible(
731    const IrreducibleGraph &G, LoopData *OuterLoop,
732    std::list<LoopData>::iterator Insert) {
733  assert((OuterLoop == nullptr) == (Insert == Loops.begin()));
734  auto Prev = OuterLoop ? std::prev(Insert) : Loops.end();
735
736  for (auto I = scc_begin(G); !I.isAtEnd(); ++I) {
737    if (I->size() < 2)
738      continue;
739
740    // Translate the SCC into RPO.
741    createIrreducibleLoop(*this, G, OuterLoop, Insert, *I);
742  }
743
744  if (OuterLoop)
745    return make_range(std::next(Prev), Insert);
746  return make_range(Loops.begin(), Insert);
747}
748
749void
750BlockFrequencyInfoImplBase::updateLoopWithIrreducible(LoopData &OuterLoop) {
751  OuterLoop.Exits.clear();
752  for (auto &Mass : OuterLoop.BackedgeMass)
753    Mass = BlockMass::getEmpty();
754  auto O = OuterLoop.Nodes.begin() + 1;
755  for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I)
756    if (!Working[I->Index].isPackaged())
757      *O++ = *I;
758  OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end());
759}
760
761void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) {
762  assert(Loop.isIrreducible() && "this only makes sense on irreducible loops");
763
764  // Since the loop has more than one header block, the mass flowing back into
765  // each header will be different. Adjust the mass in each header loop to
766  // reflect the masses flowing through back edges.
767  //
768  // To do this, we distribute the initial mass using the backedge masses
769  // as weights for the distribution.
770  BlockMass LoopMass = BlockMass::getFull();
771  Distribution Dist;
772
773  DEBUG(dbgs() << "adjust-loop-header-mass:\n");
774  for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
775    auto &HeaderNode = Loop.Nodes[H];
776    auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)];
777    DEBUG(dbgs() << " - Add back edge mass for node "
778                 << getBlockName(HeaderNode) << ": " << BackedgeMass << "\n");
779    if (BackedgeMass.getMass() > 0)
780      Dist.addLocal(HeaderNode, BackedgeMass.getMass());
781    else
782      DEBUG(dbgs() << "   Nothing added. Back edge mass is zero\n");
783  }
784
785  DitheringDistributer D(Dist, LoopMass);
786
787  DEBUG(dbgs() << " Distribute loop mass " << LoopMass
788               << " to headers using above weights\n");
789  for (const Weight &W : Dist.Weights) {
790    BlockMass Taken = D.takeMass(W.Amount);
791    assert(W.Type == Weight::Local && "all weights should be local");
792    Working[W.TargetNode.Index].getMass() = Taken;
793    DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
794  }
795}
796