1//===-- BranchFolding.cpp - Fold machine code branch instructions ---------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass forwards branches to unconditional branches to make them branch
11// directly to the target block.  This pass often results in dead MBB's, which
12// it then removes.
13//
14// Note that this pass must be run after register allocation, it cannot handle
15// SSA form.
16//
17//===----------------------------------------------------------------------===//
18
19#include "BranchFolding.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
23#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
24#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
25#include "llvm/CodeGen/MachineFunctionPass.h"
26#include "llvm/CodeGen/MachineJumpTableInfo.h"
27#include "llvm/CodeGen/MachineMemOperand.h"
28#include "llvm/CodeGen/MachineModuleInfo.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/CodeGen/Passes.h"
31#include "llvm/CodeGen/RegisterScavenging.h"
32#include "llvm/IR/Function.h"
33#include "llvm/Support/CommandLine.h"
34#include "llvm/Support/Debug.h"
35#include "llvm/Support/ErrorHandling.h"
36#include "llvm/Support/raw_ostream.h"
37#include "llvm/Target/TargetInstrInfo.h"
38#include "llvm/Target/TargetRegisterInfo.h"
39#include "llvm/Target/TargetSubtargetInfo.h"
40#include <algorithm>
41using namespace llvm;
42
43#define DEBUG_TYPE "branchfolding"
44
45STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
46STATISTIC(NumBranchOpts, "Number of branches optimized");
47STATISTIC(NumTailMerge , "Number of block tails merged");
48STATISTIC(NumHoist     , "Number of times common instructions are hoisted");
49
50static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
51                              cl::init(cl::BOU_UNSET), cl::Hidden);
52
53// Throttle for huge numbers of predecessors (compile speed problems)
54static cl::opt<unsigned>
55TailMergeThreshold("tail-merge-threshold",
56          cl::desc("Max number of predecessors to consider tail merging"),
57          cl::init(150), cl::Hidden);
58
59// Heuristic for tail merging (and, inversely, tail duplication).
60// TODO: This should be replaced with a target query.
61static cl::opt<unsigned>
62TailMergeSize("tail-merge-size",
63          cl::desc("Min number of instructions to consider tail merging"),
64                              cl::init(3), cl::Hidden);
65
66namespace {
67  /// BranchFolderPass - Wrap branch folder in a machine function pass.
68  class BranchFolderPass : public MachineFunctionPass {
69  public:
70    static char ID;
71    explicit BranchFolderPass(): MachineFunctionPass(ID) {}
72
73    bool runOnMachineFunction(MachineFunction &MF) override;
74
75    void getAnalysisUsage(AnalysisUsage &AU) const override {
76      AU.addRequired<MachineBlockFrequencyInfo>();
77      AU.addRequired<MachineBranchProbabilityInfo>();
78      AU.addRequired<TargetPassConfig>();
79      MachineFunctionPass::getAnalysisUsage(AU);
80    }
81  };
82}
83
84char BranchFolderPass::ID = 0;
85char &llvm::BranchFolderPassID = BranchFolderPass::ID;
86
87INITIALIZE_PASS(BranchFolderPass, "branch-folder",
88                "Control Flow Optimizer", false, false)
89
90bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
91  if (skipOptnoneFunction(*MF.getFunction()))
92    return false;
93
94  TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
95  // TailMerge can create jump into if branches that make CFG irreducible for
96  // HW that requires structurized CFG.
97  bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
98      PassConfig->getEnableTailMerge();
99  BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true,
100                      getAnalysis<MachineBlockFrequencyInfo>(),
101                      getAnalysis<MachineBranchProbabilityInfo>());
102  return Folder.OptimizeFunction(MF, MF.getSubtarget().getInstrInfo(),
103                                 MF.getSubtarget().getRegisterInfo(),
104                                 getAnalysisIfAvailable<MachineModuleInfo>());
105}
106
107BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
108                           const MachineBlockFrequencyInfo &FreqInfo,
109                           const MachineBranchProbabilityInfo &ProbInfo)
110    : EnableHoistCommonCode(CommonHoist), MBBFreqInfo(FreqInfo),
111      MBPI(ProbInfo) {
112  switch (FlagEnableTailMerge) {
113  case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
114  case cl::BOU_TRUE: EnableTailMerge = true; break;
115  case cl::BOU_FALSE: EnableTailMerge = false; break;
116  }
117}
118
119/// RemoveDeadBlock - Remove the specified dead machine basic block from the
120/// function, updating the CFG.
121void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
122  assert(MBB->pred_empty() && "MBB must be dead!");
123  DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
124
125  MachineFunction *MF = MBB->getParent();
126  // drop all successors.
127  while (!MBB->succ_empty())
128    MBB->removeSuccessor(MBB->succ_end()-1);
129
130  // Avoid matching if this pointer gets reused.
131  TriedMerging.erase(MBB);
132
133  // Remove the block.
134  MF->erase(MBB);
135}
136
137/// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def
138/// followed by terminators, and if the implicitly defined registers are not
139/// used by the terminators, remove those implicit_def's. e.g.
140/// BB1:
141///   r0 = implicit_def
142///   r1 = implicit_def
143///   br
144/// This block can be optimized away later if the implicit instructions are
145/// removed.
146bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
147  SmallSet<unsigned, 4> ImpDefRegs;
148  MachineBasicBlock::iterator I = MBB->begin();
149  while (I != MBB->end()) {
150    if (!I->isImplicitDef())
151      break;
152    unsigned Reg = I->getOperand(0).getReg();
153    for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
154         SubRegs.isValid(); ++SubRegs)
155      ImpDefRegs.insert(*SubRegs);
156    ++I;
157  }
158  if (ImpDefRegs.empty())
159    return false;
160
161  MachineBasicBlock::iterator FirstTerm = I;
162  while (I != MBB->end()) {
163    if (!TII->isUnpredicatedTerminator(I))
164      return false;
165    // See if it uses any of the implicitly defined registers.
166    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
167      MachineOperand &MO = I->getOperand(i);
168      if (!MO.isReg() || !MO.isUse())
169        continue;
170      unsigned Reg = MO.getReg();
171      if (ImpDefRegs.count(Reg))
172        return false;
173    }
174    ++I;
175  }
176
177  I = MBB->begin();
178  while (I != FirstTerm) {
179    MachineInstr *ImpDefMI = &*I;
180    ++I;
181    MBB->erase(ImpDefMI);
182  }
183
184  return true;
185}
186
187/// OptimizeFunction - Perhaps branch folding, tail merging and other
188/// CFG optimizations on the given function.
189bool BranchFolder::OptimizeFunction(MachineFunction &MF,
190                                    const TargetInstrInfo *tii,
191                                    const TargetRegisterInfo *tri,
192                                    MachineModuleInfo *mmi) {
193  if (!tii) return false;
194
195  TriedMerging.clear();
196
197  TII = tii;
198  TRI = tri;
199  MMI = mmi;
200  RS = nullptr;
201
202  // Use a RegScavenger to help update liveness when required.
203  MachineRegisterInfo &MRI = MF.getRegInfo();
204  if (MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF))
205    RS = new RegScavenger();
206  else
207    MRI.invalidateLiveness();
208
209  // Fix CFG.  The later algorithms expect it to be right.
210  bool MadeChange = false;
211  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) {
212    MachineBasicBlock *MBB = I, *TBB = nullptr, *FBB = nullptr;
213    SmallVector<MachineOperand, 4> Cond;
214    if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true))
215      MadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
216    MadeChange |= OptimizeImpDefsBlock(MBB);
217  }
218
219  bool MadeChangeThisIteration = true;
220  while (MadeChangeThisIteration) {
221    MadeChangeThisIteration    = TailMergeBlocks(MF);
222    MadeChangeThisIteration   |= OptimizeBranches(MF);
223    if (EnableHoistCommonCode)
224      MadeChangeThisIteration |= HoistCommonCode(MF);
225    MadeChange |= MadeChangeThisIteration;
226  }
227
228  // See if any jump tables have become dead as the code generator
229  // did its thing.
230  MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
231  if (!JTI) {
232    delete RS;
233    return MadeChange;
234  }
235
236  // Walk the function to find jump tables that are live.
237  BitVector JTIsLive(JTI->getJumpTables().size());
238  for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
239       BB != E; ++BB) {
240    for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
241         I != E; ++I)
242      for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
243        MachineOperand &Op = I->getOperand(op);
244        if (!Op.isJTI()) continue;
245
246        // Remember that this JT is live.
247        JTIsLive.set(Op.getIndex());
248      }
249  }
250
251  // Finally, remove dead jump tables.  This happens when the
252  // indirect jump was unreachable (and thus deleted).
253  for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i)
254    if (!JTIsLive.test(i)) {
255      JTI->RemoveJumpTable(i);
256      MadeChange = true;
257    }
258
259  delete RS;
260  return MadeChange;
261}
262
263//===----------------------------------------------------------------------===//
264//  Tail Merging of Blocks
265//===----------------------------------------------------------------------===//
266
267/// HashMachineInstr - Compute a hash value for MI and its operands.
268static unsigned HashMachineInstr(const MachineInstr *MI) {
269  unsigned Hash = MI->getOpcode();
270  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
271    const MachineOperand &Op = MI->getOperand(i);
272
273    // Merge in bits from the operand if easy.
274    unsigned OperandHash = 0;
275    switch (Op.getType()) {
276    case MachineOperand::MO_Register:          OperandHash = Op.getReg(); break;
277    case MachineOperand::MO_Immediate:         OperandHash = Op.getImm(); break;
278    case MachineOperand::MO_MachineBasicBlock:
279      OperandHash = Op.getMBB()->getNumber();
280      break;
281    case MachineOperand::MO_FrameIndex:
282    case MachineOperand::MO_ConstantPoolIndex:
283    case MachineOperand::MO_JumpTableIndex:
284      OperandHash = Op.getIndex();
285      break;
286    case MachineOperand::MO_GlobalAddress:
287    case MachineOperand::MO_ExternalSymbol:
288      // Global address / external symbol are too hard, don't bother, but do
289      // pull in the offset.
290      OperandHash = Op.getOffset();
291      break;
292    default: break;
293    }
294
295    Hash += ((OperandHash << 3) | Op.getType()) << (i&31);
296  }
297  return Hash;
298}
299
300/// HashEndOfMBB - Hash the last instruction in the MBB.
301static unsigned HashEndOfMBB(const MachineBasicBlock *MBB) {
302  MachineBasicBlock::const_iterator I = MBB->end();
303  if (I == MBB->begin())
304    return 0;   // Empty MBB.
305
306  --I;
307  // Skip debug info so it will not affect codegen.
308  while (I->isDebugValue()) {
309    if (I==MBB->begin())
310      return 0;      // MBB empty except for debug info.
311    --I;
312  }
313
314  return HashMachineInstr(I);
315}
316
317/// ComputeCommonTailLength - Given two machine basic blocks, compute the number
318/// of instructions they actually have in common together at their end.  Return
319/// iterators for the first shared instruction in each block.
320static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
321                                        MachineBasicBlock *MBB2,
322                                        MachineBasicBlock::iterator &I1,
323                                        MachineBasicBlock::iterator &I2) {
324  I1 = MBB1->end();
325  I2 = MBB2->end();
326
327  unsigned TailLen = 0;
328  while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
329    --I1; --I2;
330    // Skip debugging pseudos; necessary to avoid changing the code.
331    while (I1->isDebugValue()) {
332      if (I1==MBB1->begin()) {
333        while (I2->isDebugValue()) {
334          if (I2==MBB2->begin())
335            // I1==DBG at begin; I2==DBG at begin
336            return TailLen;
337          --I2;
338        }
339        ++I2;
340        // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin
341        return TailLen;
342      }
343      --I1;
344    }
345    // I1==first (untested) non-DBG preceding known match
346    while (I2->isDebugValue()) {
347      if (I2==MBB2->begin()) {
348        ++I1;
349        // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin
350        return TailLen;
351      }
352      --I2;
353    }
354    // I1, I2==first (untested) non-DBGs preceding known match
355    if (!I1->isIdenticalTo(I2) ||
356        // FIXME: This check is dubious. It's used to get around a problem where
357        // people incorrectly expect inline asm directives to remain in the same
358        // relative order. This is untenable because normal compiler
359        // optimizations (like this one) may reorder and/or merge these
360        // directives.
361        I1->isInlineAsm()) {
362      ++I1; ++I2;
363      break;
364    }
365    ++TailLen;
366  }
367  // Back past possible debugging pseudos at beginning of block.  This matters
368  // when one block differs from the other only by whether debugging pseudos
369  // are present at the beginning.  (This way, the various checks later for
370  // I1==MBB1->begin() work as expected.)
371  if (I1 == MBB1->begin() && I2 != MBB2->begin()) {
372    --I2;
373    while (I2->isDebugValue()) {
374      if (I2 == MBB2->begin())
375        return TailLen;
376      --I2;
377    }
378    ++I2;
379  }
380  if (I2 == MBB2->begin() && I1 != MBB1->begin()) {
381    --I1;
382    while (I1->isDebugValue()) {
383      if (I1 == MBB1->begin())
384        return TailLen;
385      --I1;
386    }
387    ++I1;
388  }
389  return TailLen;
390}
391
392void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB,
393                                   MachineBasicBlock *NewMBB) {
394  if (RS) {
395    RS->enterBasicBlock(CurMBB);
396    if (!CurMBB->empty())
397      RS->forward(std::prev(CurMBB->end()));
398    for (unsigned int i = 1, e = TRI->getNumRegs(); i != e; i++)
399      if (RS->isRegUsed(i, false))
400        NewMBB->addLiveIn(i);
401  }
402}
403
404/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
405/// after it, replacing it with an unconditional branch to NewDest.
406void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
407                                           MachineBasicBlock *NewDest) {
408  MachineBasicBlock *CurMBB = OldInst->getParent();
409
410  TII->ReplaceTailWithBranchTo(OldInst, NewDest);
411
412  // For targets that use the register scavenger, we must maintain LiveIns.
413  MaintainLiveIns(CurMBB, NewDest);
414
415  ++NumTailMerge;
416}
417
418/// SplitMBBAt - Given a machine basic block and an iterator into it, split the
419/// MBB so that the part before the iterator falls into the part starting at the
420/// iterator.  This returns the new MBB.
421MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
422                                            MachineBasicBlock::iterator BBI1,
423                                            const BasicBlock *BB) {
424  if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1))
425    return nullptr;
426
427  MachineFunction &MF = *CurMBB.getParent();
428
429  // Create the fall-through block.
430  MachineFunction::iterator MBBI = &CurMBB;
431  MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(BB);
432  CurMBB.getParent()->insert(++MBBI, NewMBB);
433
434  // Move all the successors of this block to the specified block.
435  NewMBB->transferSuccessors(&CurMBB);
436
437  // Add an edge from CurMBB to NewMBB for the fall-through.
438  CurMBB.addSuccessor(NewMBB);
439
440  // Splice the code over.
441  NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
442
443  // NewMBB inherits CurMBB's block frequency.
444  MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB));
445
446  // For targets that use the register scavenger, we must maintain LiveIns.
447  MaintainLiveIns(&CurMBB, NewMBB);
448
449  return NewMBB;
450}
451
452/// EstimateRuntime - Make a rough estimate for how long it will take to run
453/// the specified code.
454static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
455                                MachineBasicBlock::iterator E) {
456  unsigned Time = 0;
457  for (; I != E; ++I) {
458    if (I->isDebugValue())
459      continue;
460    if (I->isCall())
461      Time += 10;
462    else if (I->mayLoad() || I->mayStore())
463      Time += 2;
464    else
465      ++Time;
466  }
467  return Time;
468}
469
470// CurMBB needs to add an unconditional branch to SuccMBB (we removed these
471// branches temporarily for tail merging).  In the case where CurMBB ends
472// with a conditional branch to the next block, optimize by reversing the
473// test and conditionally branching to SuccMBB instead.
474static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
475                    const TargetInstrInfo *TII) {
476  MachineFunction *MF = CurMBB->getParent();
477  MachineFunction::iterator I = std::next(MachineFunction::iterator(CurMBB));
478  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
479  SmallVector<MachineOperand, 4> Cond;
480  DebugLoc dl;  // FIXME: this is nowhere
481  if (I != MF->end() &&
482      !TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
483    MachineBasicBlock *NextBB = I;
484    if (TBB == NextBB && !Cond.empty() && !FBB) {
485      if (!TII->ReverseBranchCondition(Cond)) {
486        TII->RemoveBranch(*CurMBB);
487        TII->InsertBranch(*CurMBB, SuccBB, nullptr, Cond, dl);
488        return;
489      }
490    }
491  }
492  TII->InsertBranch(*CurMBB, SuccBB, nullptr,
493                    SmallVector<MachineOperand, 0>(), dl);
494}
495
496bool
497BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const {
498  if (getHash() < o.getHash())
499    return true;
500  if (getHash() > o.getHash())
501    return false;
502  if (getBlock()->getNumber() < o.getBlock()->getNumber())
503    return true;
504  if (getBlock()->getNumber() > o.getBlock()->getNumber())
505    return false;
506  // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
507  // an object with itself.
508#ifndef _GLIBCXX_DEBUG
509  llvm_unreachable("Predecessor appears twice");
510#else
511  return false;
512#endif
513}
514
515BlockFrequency
516BranchFolder::MBFIWrapper::getBlockFreq(const MachineBasicBlock *MBB) const {
517  auto I = MergedBBFreq.find(MBB);
518
519  if (I != MergedBBFreq.end())
520    return I->second;
521
522  return MBFI.getBlockFreq(MBB);
523}
524
525void BranchFolder::MBFIWrapper::setBlockFreq(const MachineBasicBlock *MBB,
526                                             BlockFrequency F) {
527  MergedBBFreq[MBB] = F;
528}
529
530/// CountTerminators - Count the number of terminators in the given
531/// block and set I to the position of the first non-terminator, if there
532/// is one, or MBB->end() otherwise.
533static unsigned CountTerminators(MachineBasicBlock *MBB,
534                                 MachineBasicBlock::iterator &I) {
535  I = MBB->end();
536  unsigned NumTerms = 0;
537  for (;;) {
538    if (I == MBB->begin()) {
539      I = MBB->end();
540      break;
541    }
542    --I;
543    if (!I->isTerminator()) break;
544    ++NumTerms;
545  }
546  return NumTerms;
547}
548
549/// ProfitableToMerge - Check if two machine basic blocks have a common tail
550/// and decide if it would be profitable to merge those tails.  Return the
551/// length of the common tail and iterators to the first common instruction
552/// in each block.
553static bool ProfitableToMerge(MachineBasicBlock *MBB1,
554                              MachineBasicBlock *MBB2,
555                              unsigned minCommonTailLength,
556                              unsigned &CommonTailLen,
557                              MachineBasicBlock::iterator &I1,
558                              MachineBasicBlock::iterator &I2,
559                              MachineBasicBlock *SuccBB,
560                              MachineBasicBlock *PredBB) {
561  CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2);
562  if (CommonTailLen == 0)
563    return false;
564  DEBUG(dbgs() << "Common tail length of BB#" << MBB1->getNumber()
565               << " and BB#" << MBB2->getNumber() << " is " << CommonTailLen
566               << '\n');
567
568  // It's almost always profitable to merge any number of non-terminator
569  // instructions with the block that falls through into the common successor.
570  if (MBB1 == PredBB || MBB2 == PredBB) {
571    MachineBasicBlock::iterator I;
572    unsigned NumTerms = CountTerminators(MBB1 == PredBB ? MBB2 : MBB1, I);
573    if (CommonTailLen > NumTerms)
574      return true;
575  }
576
577  // If one of the blocks can be completely merged and happens to be in
578  // a position where the other could fall through into it, merge any number
579  // of instructions, because it can be done without a branch.
580  // TODO: If the blocks are not adjacent, move one of them so that they are?
581  if (MBB1->isLayoutSuccessor(MBB2) && I2 == MBB2->begin())
582    return true;
583  if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin())
584    return true;
585
586  // If both blocks have an unconditional branch temporarily stripped out,
587  // count that as an additional common instruction for the following
588  // heuristics.
589  unsigned EffectiveTailLen = CommonTailLen;
590  if (SuccBB && MBB1 != PredBB && MBB2 != PredBB &&
591      !MBB1->back().isBarrier() &&
592      !MBB2->back().isBarrier())
593    ++EffectiveTailLen;
594
595  // Check if the common tail is long enough to be worthwhile.
596  if (EffectiveTailLen >= minCommonTailLength)
597    return true;
598
599  // If we are optimizing for code size, 2 instructions in common is enough if
600  // we don't have to split a block.  At worst we will be introducing 1 new
601  // branch instruction, which is likely to be smaller than the 2
602  // instructions that would be deleted in the merge.
603  MachineFunction *MF = MBB1->getParent();
604  if (EffectiveTailLen >= 2 &&
605      MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize) &&
606      (I1 == MBB1->begin() || I2 == MBB2->begin()))
607    return true;
608
609  return false;
610}
611
612/// ComputeSameTails - Look through all the blocks in MergePotentials that have
613/// hash CurHash (guaranteed to match the last element).  Build the vector
614/// SameTails of all those that have the (same) largest number of instructions
615/// in common of any pair of these blocks.  SameTails entries contain an
616/// iterator into MergePotentials (from which the MachineBasicBlock can be
617/// found) and a MachineBasicBlock::iterator into that MBB indicating the
618/// instruction where the matching code sequence begins.
619/// Order of elements in SameTails is the reverse of the order in which
620/// those blocks appear in MergePotentials (where they are not necessarily
621/// consecutive).
622unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
623                                        unsigned minCommonTailLength,
624                                        MachineBasicBlock *SuccBB,
625                                        MachineBasicBlock *PredBB) {
626  unsigned maxCommonTailLength = 0U;
627  SameTails.clear();
628  MachineBasicBlock::iterator TrialBBI1, TrialBBI2;
629  MPIterator HighestMPIter = std::prev(MergePotentials.end());
630  for (MPIterator CurMPIter = std::prev(MergePotentials.end()),
631                  B = MergePotentials.begin();
632       CurMPIter != B && CurMPIter->getHash() == CurHash; --CurMPIter) {
633    for (MPIterator I = std::prev(CurMPIter); I->getHash() == CurHash; --I) {
634      unsigned CommonTailLen;
635      if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(),
636                            minCommonTailLength,
637                            CommonTailLen, TrialBBI1, TrialBBI2,
638                            SuccBB, PredBB)) {
639        if (CommonTailLen > maxCommonTailLength) {
640          SameTails.clear();
641          maxCommonTailLength = CommonTailLen;
642          HighestMPIter = CurMPIter;
643          SameTails.push_back(SameTailElt(CurMPIter, TrialBBI1));
644        }
645        if (HighestMPIter == CurMPIter &&
646            CommonTailLen == maxCommonTailLength)
647          SameTails.push_back(SameTailElt(I, TrialBBI2));
648      }
649      if (I == B)
650        break;
651    }
652  }
653  return maxCommonTailLength;
654}
655
656/// RemoveBlocksWithHash - Remove all blocks with hash CurHash from
657/// MergePotentials, restoring branches at ends of blocks as appropriate.
658void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
659                                        MachineBasicBlock *SuccBB,
660                                        MachineBasicBlock *PredBB) {
661  MPIterator CurMPIter, B;
662  for (CurMPIter = std::prev(MergePotentials.end()),
663      B = MergePotentials.begin();
664       CurMPIter->getHash() == CurHash; --CurMPIter) {
665    // Put the unconditional branch back, if we need one.
666    MachineBasicBlock *CurMBB = CurMPIter->getBlock();
667    if (SuccBB && CurMBB != PredBB)
668      FixTail(CurMBB, SuccBB, TII);
669    if (CurMPIter == B)
670      break;
671  }
672  if (CurMPIter->getHash() != CurHash)
673    CurMPIter++;
674  MergePotentials.erase(CurMPIter, MergePotentials.end());
675}
676
677/// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist
678/// only of the common tail.  Create a block that does by splitting one.
679bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
680                                             MachineBasicBlock *SuccBB,
681                                             unsigned maxCommonTailLength,
682                                             unsigned &commonTailIndex) {
683  commonTailIndex = 0;
684  unsigned TimeEstimate = ~0U;
685  for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
686    // Use PredBB if possible; that doesn't require a new branch.
687    if (SameTails[i].getBlock() == PredBB) {
688      commonTailIndex = i;
689      break;
690    }
691    // Otherwise, make a (fairly bogus) choice based on estimate of
692    // how long it will take the various blocks to execute.
693    unsigned t = EstimateRuntime(SameTails[i].getBlock()->begin(),
694                                 SameTails[i].getTailStartPos());
695    if (t <= TimeEstimate) {
696      TimeEstimate = t;
697      commonTailIndex = i;
698    }
699  }
700
701  MachineBasicBlock::iterator BBI =
702    SameTails[commonTailIndex].getTailStartPos();
703  MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
704
705  // If the common tail includes any debug info we will take it pretty
706  // randomly from one of the inputs.  Might be better to remove it?
707  DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size "
708               << maxCommonTailLength);
709
710  // If the split block unconditionally falls-thru to SuccBB, it will be
711  // merged. In control flow terms it should then take SuccBB's name. e.g. If
712  // SuccBB is an inner loop, the common tail is still part of the inner loop.
713  const BasicBlock *BB = (SuccBB && MBB->succ_size() == 1) ?
714    SuccBB->getBasicBlock() : MBB->getBasicBlock();
715  MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI, BB);
716  if (!newMBB) {
717    DEBUG(dbgs() << "... failed!");
718    return false;
719  }
720
721  SameTails[commonTailIndex].setBlock(newMBB);
722  SameTails[commonTailIndex].setTailStartPos(newMBB->begin());
723
724  // If we split PredBB, newMBB is the new predecessor.
725  if (PredBB == MBB)
726    PredBB = newMBB;
727
728  return true;
729}
730
731static bool hasIdenticalMMOs(const MachineInstr *MI1, const MachineInstr *MI2) {
732  auto I1 = MI1->memoperands_begin(), E1 = MI1->memoperands_end();
733  auto I2 = MI2->memoperands_begin(), E2 = MI2->memoperands_end();
734  if ((E1 - I1) != (E2 - I2))
735    return false;
736  for (; I1 != E1; ++I1, ++I2) {
737    if (**I1 != **I2)
738      return false;
739  }
740  return true;
741}
742
743static void
744removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos,
745                               MachineBasicBlock &MBBCommon) {
746  // Remove MMOs from memory operations in the common block
747  // when they do not match the ones from the block being tail-merged.
748  // This ensures later passes conservatively compute dependencies.
749  MachineBasicBlock *MBB = MBBIStartPos->getParent();
750  // Note CommonTailLen does not necessarily matches the size of
751  // the common BB nor all its instructions because of debug
752  // instructions differences.
753  unsigned CommonTailLen = 0;
754  for (auto E = MBB->end(); MBBIStartPos != E; ++MBBIStartPos)
755    ++CommonTailLen;
756
757  MachineBasicBlock::reverse_iterator MBBI = MBB->rbegin();
758  MachineBasicBlock::reverse_iterator MBBIE = MBB->rend();
759  MachineBasicBlock::reverse_iterator MBBICommon = MBBCommon.rbegin();
760  MachineBasicBlock::reverse_iterator MBBIECommon = MBBCommon.rend();
761
762  while (CommonTailLen--) {
763    assert(MBBI != MBBIE && "Reached BB end within common tail length!");
764    (void)MBBIE;
765
766    if (MBBI->isDebugValue()) {
767      ++MBBI;
768      continue;
769    }
770
771    while ((MBBICommon != MBBIECommon) && MBBICommon->isDebugValue())
772      ++MBBICommon;
773
774    assert(MBBICommon != MBBIECommon &&
775           "Reached BB end within common tail length!");
776    assert(MBBICommon->isIdenticalTo(&*MBBI) && "Expected matching MIIs!");
777
778    if (MBBICommon->mayLoad() || MBBICommon->mayStore())
779      if (!hasIdenticalMMOs(&*MBBI, &*MBBICommon))
780        MBBICommon->clearMemRefs();
781
782    ++MBBI;
783    ++MBBICommon;
784  }
785}
786
787// See if any of the blocks in MergePotentials (which all have a common single
788// successor, or all have no successor) can be tail-merged.  If there is a
789// successor, any blocks in MergePotentials that are not tail-merged and
790// are not immediately before Succ must have an unconditional branch to
791// Succ added (but the predecessor/successor lists need no adjustment).
792// The lone predecessor of Succ that falls through into Succ,
793// if any, is given in PredBB.
794
795bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
796                                      MachineBasicBlock *PredBB) {
797  bool MadeChange = false;
798
799  // Except for the special cases below, tail-merge if there are at least
800  // this many instructions in common.
801  unsigned minCommonTailLength = TailMergeSize;
802
803  DEBUG(dbgs() << "\nTryTailMergeBlocks: ";
804        for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
805          dbgs() << "BB#" << MergePotentials[i].getBlock()->getNumber()
806                 << (i == e-1 ? "" : ", ");
807        dbgs() << "\n";
808        if (SuccBB) {
809          dbgs() << "  with successor BB#" << SuccBB->getNumber() << '\n';
810          if (PredBB)
811            dbgs() << "  which has fall-through from BB#"
812                   << PredBB->getNumber() << "\n";
813        }
814        dbgs() << "Looking for common tails of at least "
815               << minCommonTailLength << " instruction"
816               << (minCommonTailLength == 1 ? "" : "s") << '\n';
817       );
818
819  // Sort by hash value so that blocks with identical end sequences sort
820  // together.
821  array_pod_sort(MergePotentials.begin(), MergePotentials.end());
822
823  // Walk through equivalence sets looking for actual exact matches.
824  while (MergePotentials.size() > 1) {
825    unsigned CurHash = MergePotentials.back().getHash();
826
827    // Build SameTails, identifying the set of blocks with this hash code
828    // and with the maximum number of instructions in common.
829    unsigned maxCommonTailLength = ComputeSameTails(CurHash,
830                                                    minCommonTailLength,
831                                                    SuccBB, PredBB);
832
833    // If we didn't find any pair that has at least minCommonTailLength
834    // instructions in common, remove all blocks with this hash code and retry.
835    if (SameTails.empty()) {
836      RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
837      continue;
838    }
839
840    // If one of the blocks is the entire common tail (and not the entry
841    // block, which we can't jump to), we can treat all blocks with this same
842    // tail at once.  Use PredBB if that is one of the possibilities, as that
843    // will not introduce any extra branches.
844    MachineBasicBlock *EntryBB = MergePotentials.begin()->getBlock()->
845                                 getParent()->begin();
846    unsigned commonTailIndex = SameTails.size();
847    // If there are two blocks, check to see if one can be made to fall through
848    // into the other.
849    if (SameTails.size() == 2 &&
850        SameTails[0].getBlock()->isLayoutSuccessor(SameTails[1].getBlock()) &&
851        SameTails[1].tailIsWholeBlock())
852      commonTailIndex = 1;
853    else if (SameTails.size() == 2 &&
854             SameTails[1].getBlock()->isLayoutSuccessor(
855                                                     SameTails[0].getBlock()) &&
856             SameTails[0].tailIsWholeBlock())
857      commonTailIndex = 0;
858    else {
859      // Otherwise just pick one, favoring the fall-through predecessor if
860      // there is one.
861      for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
862        MachineBasicBlock *MBB = SameTails[i].getBlock();
863        if (MBB == EntryBB && SameTails[i].tailIsWholeBlock())
864          continue;
865        if (MBB == PredBB) {
866          commonTailIndex = i;
867          break;
868        }
869        if (SameTails[i].tailIsWholeBlock())
870          commonTailIndex = i;
871      }
872    }
873
874    if (commonTailIndex == SameTails.size() ||
875        (SameTails[commonTailIndex].getBlock() == PredBB &&
876         !SameTails[commonTailIndex].tailIsWholeBlock())) {
877      // None of the blocks consist entirely of the common tail.
878      // Split a block so that one does.
879      if (!CreateCommonTailOnlyBlock(PredBB, SuccBB,
880                                     maxCommonTailLength, commonTailIndex)) {
881        RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
882        continue;
883      }
884    }
885
886    MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
887
888    // Recompute commont tail MBB's edge weights and block frequency.
889    setCommonTailEdgeWeights(*MBB);
890
891    // MBB is common tail.  Adjust all other BB's to jump to this one.
892    // Traversal must be forwards so erases work.
893    DEBUG(dbgs() << "\nUsing common tail in BB#" << MBB->getNumber()
894                 << " for ");
895    for (unsigned int i=0, e = SameTails.size(); i != e; ++i) {
896      if (commonTailIndex == i)
897        continue;
898      DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber()
899                   << (i == e-1 ? "" : ", "));
900      // Remove MMOs from memory operations as needed.
901      removeMMOsFromMemoryOperations(SameTails[i].getTailStartPos(), *MBB);
902      // Hack the end off BB i, making it jump to BB commonTailIndex instead.
903      ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB);
904      // BB i is no longer a predecessor of SuccBB; remove it from the worklist.
905      MergePotentials.erase(SameTails[i].getMPIter());
906    }
907    DEBUG(dbgs() << "\n");
908    // We leave commonTailIndex in the worklist in case there are other blocks
909    // that match it with a smaller number of instructions.
910    MadeChange = true;
911  }
912  return MadeChange;
913}
914
915bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
916  bool MadeChange = false;
917  if (!EnableTailMerge) return MadeChange;
918
919  // First find blocks with no successors.
920  MergePotentials.clear();
921  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
922       I != E && MergePotentials.size() < TailMergeThreshold; ++I) {
923    if (TriedMerging.count(I))
924      continue;
925    if (I->succ_empty())
926      MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I), I));
927  }
928
929  // If this is a large problem, avoid visiting the same basic blocks
930  // multiple times.
931  if (MergePotentials.size() == TailMergeThreshold)
932    for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
933      TriedMerging.insert(MergePotentials[i].getBlock());
934
935  // See if we can do any tail merging on those.
936  if (MergePotentials.size() >= 2)
937    MadeChange |= TryTailMergeBlocks(nullptr, nullptr);
938
939  // Look at blocks (IBB) with multiple predecessors (PBB).
940  // We change each predecessor to a canonical form, by
941  // (1) temporarily removing any unconditional branch from the predecessor
942  // to IBB, and
943  // (2) alter conditional branches so they branch to the other block
944  // not IBB; this may require adding back an unconditional branch to IBB
945  // later, where there wasn't one coming in.  E.g.
946  //   Bcc IBB
947  //   fallthrough to QBB
948  // here becomes
949  //   Bncc QBB
950  // with a conceptual B to IBB after that, which never actually exists.
951  // With those changes, we see whether the predecessors' tails match,
952  // and merge them if so.  We change things out of canonical form and
953  // back to the way they were later in the process.  (OptimizeBranches
954  // would undo some of this, but we can't use it, because we'd get into
955  // a compile-time infinite loop repeatedly doing and undoing the same
956  // transformations.)
957
958  for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
959       I != E; ++I) {
960    if (I->pred_size() < 2) continue;
961    SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
962    MachineBasicBlock *IBB = I;
963    MachineBasicBlock *PredBB = std::prev(I);
964    MergePotentials.clear();
965    for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
966           E2 = I->pred_end();
967         P != E2 && MergePotentials.size() < TailMergeThreshold; ++P) {
968      MachineBasicBlock *PBB = *P;
969      if (TriedMerging.count(PBB))
970        continue;
971
972      // Skip blocks that loop to themselves, can't tail merge these.
973      if (PBB == IBB)
974        continue;
975
976      // Visit each predecessor only once.
977      if (!UniquePreds.insert(PBB).second)
978        continue;
979
980      // Skip blocks which may jump to a landing pad. Can't tail merge these.
981      if (PBB->getLandingPadSuccessor())
982        continue;
983
984      MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
985      SmallVector<MachineOperand, 4> Cond;
986      if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) {
987        // Failing case: IBB is the target of a cbr, and we cannot reverse the
988        // branch.
989        SmallVector<MachineOperand, 4> NewCond(Cond);
990        if (!Cond.empty() && TBB == IBB) {
991          if (TII->ReverseBranchCondition(NewCond))
992            continue;
993          // This is the QBB case described above
994          if (!FBB)
995            FBB = std::next(MachineFunction::iterator(PBB));
996        }
997
998        // Failing case: the only way IBB can be reached from PBB is via
999        // exception handling.  Happens for landing pads.  Would be nice to have
1000        // a bit in the edge so we didn't have to do all this.
1001        if (IBB->isLandingPad()) {
1002          MachineFunction::iterator IP = PBB;  IP++;
1003          MachineBasicBlock *PredNextBB = nullptr;
1004          if (IP != MF.end())
1005            PredNextBB = IP;
1006          if (!TBB) {
1007            if (IBB != PredNextBB)      // fallthrough
1008              continue;
1009          } else if (FBB) {
1010            if (TBB != IBB && FBB != IBB)   // cbr then ubr
1011              continue;
1012          } else if (Cond.empty()) {
1013            if (TBB != IBB)               // ubr
1014              continue;
1015          } else {
1016            if (TBB != IBB && IBB != PredNextBB)  // cbr
1017              continue;
1018          }
1019        }
1020
1021        // Remove the unconditional branch at the end, if any.
1022        if (TBB && (Cond.empty() || FBB)) {
1023          DebugLoc dl;  // FIXME: this is nowhere
1024          TII->RemoveBranch(*PBB);
1025          if (!Cond.empty())
1026            // reinsert conditional branch only, for now
1027            TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, nullptr,
1028                              NewCond, dl);
1029        }
1030
1031        MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P));
1032      }
1033    }
1034
1035    // If this is a large problem, avoid visiting the same basic blocks multiple
1036    // times.
1037    if (MergePotentials.size() == TailMergeThreshold)
1038      for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
1039        TriedMerging.insert(MergePotentials[i].getBlock());
1040
1041    if (MergePotentials.size() >= 2)
1042      MadeChange |= TryTailMergeBlocks(IBB, PredBB);
1043
1044    // Reinsert an unconditional branch if needed. The 1 below can occur as a
1045    // result of removing blocks in TryTailMergeBlocks.
1046    PredBB = std::prev(I);     // this may have been changed in TryTailMergeBlocks
1047    if (MergePotentials.size() == 1 &&
1048        MergePotentials.begin()->getBlock() != PredBB)
1049      FixTail(MergePotentials.begin()->getBlock(), IBB, TII);
1050  }
1051
1052  return MadeChange;
1053}
1054
1055void BranchFolder::setCommonTailEdgeWeights(MachineBasicBlock &TailMBB) {
1056  SmallVector<BlockFrequency, 2> EdgeFreqLs(TailMBB.succ_size());
1057  BlockFrequency AccumulatedMBBFreq;
1058
1059  // Aggregate edge frequency of successor edge j:
1060  //  edgeFreq(j) = sum (freq(bb) * edgeProb(bb, j)),
1061  //  where bb is a basic block that is in SameTails.
1062  for (const auto &Src : SameTails) {
1063    const MachineBasicBlock *SrcMBB = Src.getBlock();
1064    BlockFrequency BlockFreq = MBBFreqInfo.getBlockFreq(SrcMBB);
1065    AccumulatedMBBFreq += BlockFreq;
1066
1067    // It is not necessary to recompute edge weights if TailBB has less than two
1068    // successors.
1069    if (TailMBB.succ_size() <= 1)
1070      continue;
1071
1072    auto EdgeFreq = EdgeFreqLs.begin();
1073
1074    for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end();
1075         SuccI != SuccE; ++SuccI, ++EdgeFreq)
1076      *EdgeFreq += BlockFreq * MBPI.getEdgeProbability(SrcMBB, *SuccI);
1077  }
1078
1079  MBBFreqInfo.setBlockFreq(&TailMBB, AccumulatedMBBFreq);
1080
1081  if (TailMBB.succ_size() <= 1)
1082    return;
1083
1084  auto MaxEdgeFreq = *std::max_element(EdgeFreqLs.begin(), EdgeFreqLs.end());
1085  uint64_t Scale = MaxEdgeFreq.getFrequency() / UINT32_MAX + 1;
1086  auto EdgeFreq = EdgeFreqLs.begin();
1087
1088  for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end();
1089       SuccI != SuccE; ++SuccI, ++EdgeFreq)
1090    TailMBB.setSuccWeight(SuccI, EdgeFreq->getFrequency() / Scale);
1091}
1092
1093//===----------------------------------------------------------------------===//
1094//  Branch Optimization
1095//===----------------------------------------------------------------------===//
1096
1097bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
1098  bool MadeChange = false;
1099
1100  // Make sure blocks are numbered in order
1101  MF.RenumberBlocks();
1102
1103  for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
1104       I != E; ) {
1105    MachineBasicBlock *MBB = I++;
1106    MadeChange |= OptimizeBlock(MBB);
1107
1108    // If it is dead, remove it.
1109    if (MBB->pred_empty()) {
1110      RemoveDeadBlock(MBB);
1111      MadeChange = true;
1112      ++NumDeadBlocks;
1113    }
1114  }
1115  return MadeChange;
1116}
1117
1118// Blocks should be considered empty if they contain only debug info;
1119// else the debug info would affect codegen.
1120static bool IsEmptyBlock(MachineBasicBlock *MBB) {
1121  if (MBB->empty())
1122    return true;
1123  for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
1124       MBBI!=MBBE; ++MBBI) {
1125    if (!MBBI->isDebugValue())
1126      return false;
1127  }
1128  return true;
1129}
1130
1131// Blocks with only debug info and branches should be considered the same
1132// as blocks with only branches.
1133static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) {
1134  MachineBasicBlock::iterator MBBI, MBBE;
1135  for (MBBI = MBB->begin(), MBBE = MBB->end(); MBBI!=MBBE; ++MBBI) {
1136    if (!MBBI->isDebugValue())
1137      break;
1138  }
1139  return (MBBI->isBranch());
1140}
1141
1142/// IsBetterFallthrough - Return true if it would be clearly better to
1143/// fall-through to MBB1 than to fall through into MBB2.  This has to return
1144/// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will
1145/// result in infinite loops.
1146static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
1147                                MachineBasicBlock *MBB2) {
1148  // Right now, we use a simple heuristic.  If MBB2 ends with a call, and
1149  // MBB1 doesn't, we prefer to fall through into MBB1.  This allows us to
1150  // optimize branches that branch to either a return block or an assert block
1151  // into a fallthrough to the return.
1152  if (IsEmptyBlock(MBB1) || IsEmptyBlock(MBB2)) return false;
1153
1154  // If there is a clear successor ordering we make sure that one block
1155  // will fall through to the next
1156  if (MBB1->isSuccessor(MBB2)) return true;
1157  if (MBB2->isSuccessor(MBB1)) return false;
1158
1159  // Neither block consists entirely of debug info (per IsEmptyBlock check),
1160  // so we needn't test for falling off the beginning here.
1161  MachineBasicBlock::iterator MBB1I = --MBB1->end();
1162  while (MBB1I->isDebugValue())
1163    --MBB1I;
1164  MachineBasicBlock::iterator MBB2I = --MBB2->end();
1165  while (MBB2I->isDebugValue())
1166    --MBB2I;
1167  return MBB2I->isCall() && !MBB1I->isCall();
1168}
1169
1170/// getBranchDebugLoc - Find and return, if any, the DebugLoc of the branch
1171/// instructions on the block. Always use the DebugLoc of the first
1172/// branching instruction found unless its absent, in which case use the
1173/// DebugLoc of the second if present.
1174static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) {
1175  MachineBasicBlock::iterator I = MBB.end();
1176  if (I == MBB.begin())
1177    return DebugLoc();
1178  --I;
1179  while (I->isDebugValue() && I != MBB.begin())
1180    --I;
1181  if (I->isBranch())
1182    return I->getDebugLoc();
1183  return DebugLoc();
1184}
1185
1186/// OptimizeBlock - Analyze and optimize control flow related to the specified
1187/// block.  This is never called on the entry block.
1188bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
1189  bool MadeChange = false;
1190  MachineFunction &MF = *MBB->getParent();
1191ReoptimizeBlock:
1192
1193  MachineFunction::iterator FallThrough = MBB;
1194  ++FallThrough;
1195
1196  // If this block is empty, make everyone use its fall-through, not the block
1197  // explicitly.  Landing pads should not do this since the landing-pad table
1198  // points to this block.  Blocks with their addresses taken shouldn't be
1199  // optimized away.
1200  if (IsEmptyBlock(MBB) && !MBB->isLandingPad() && !MBB->hasAddressTaken()) {
1201    // Dead block?  Leave for cleanup later.
1202    if (MBB->pred_empty()) return MadeChange;
1203
1204    if (FallThrough == MF.end()) {
1205      // TODO: Simplify preds to not branch here if possible!
1206    } else {
1207      // Rewrite all predecessors of the old block to go to the fallthrough
1208      // instead.
1209      while (!MBB->pred_empty()) {
1210        MachineBasicBlock *Pred = *(MBB->pred_end()-1);
1211        Pred->ReplaceUsesOfBlockWith(MBB, FallThrough);
1212      }
1213      // If MBB was the target of a jump table, update jump tables to go to the
1214      // fallthrough instead.
1215      if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())
1216        MJTI->ReplaceMBBInJumpTables(MBB, FallThrough);
1217      MadeChange = true;
1218    }
1219    return MadeChange;
1220  }
1221
1222  // Check to see if we can simplify the terminator of the block before this
1223  // one.
1224  MachineBasicBlock &PrevBB = *std::prev(MachineFunction::iterator(MBB));
1225
1226  MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr;
1227  SmallVector<MachineOperand, 4> PriorCond;
1228  bool PriorUnAnalyzable =
1229    TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true);
1230  if (!PriorUnAnalyzable) {
1231    // If the CFG for the prior block has extra edges, remove them.
1232    MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB,
1233                                              !PriorCond.empty());
1234
1235    // If the previous branch is conditional and both conditions go to the same
1236    // destination, remove the branch, replacing it with an unconditional one or
1237    // a fall-through.
1238    if (PriorTBB && PriorTBB == PriorFBB) {
1239      DebugLoc dl = getBranchDebugLoc(PrevBB);
1240      TII->RemoveBranch(PrevBB);
1241      PriorCond.clear();
1242      if (PriorTBB != MBB)
1243        TII->InsertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl);
1244      MadeChange = true;
1245      ++NumBranchOpts;
1246      goto ReoptimizeBlock;
1247    }
1248
1249    // If the previous block unconditionally falls through to this block and
1250    // this block has no other predecessors, move the contents of this block
1251    // into the prior block. This doesn't usually happen when SimplifyCFG
1252    // has been used, but it can happen if tail merging splits a fall-through
1253    // predecessor of a block.
1254    // This has to check PrevBB->succ_size() because EH edges are ignored by
1255    // AnalyzeBranch.
1256    if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 &&
1257        PrevBB.succ_size() == 1 &&
1258        !MBB->hasAddressTaken() && !MBB->isLandingPad()) {
1259      DEBUG(dbgs() << "\nMerging into block: " << PrevBB
1260                   << "From MBB: " << *MBB);
1261      // Remove redundant DBG_VALUEs first.
1262      if (PrevBB.begin() != PrevBB.end()) {
1263        MachineBasicBlock::iterator PrevBBIter = PrevBB.end();
1264        --PrevBBIter;
1265        MachineBasicBlock::iterator MBBIter = MBB->begin();
1266        // Check if DBG_VALUE at the end of PrevBB is identical to the
1267        // DBG_VALUE at the beginning of MBB.
1268        while (PrevBBIter != PrevBB.begin() && MBBIter != MBB->end()
1269               && PrevBBIter->isDebugValue() && MBBIter->isDebugValue()) {
1270          if (!MBBIter->isIdenticalTo(PrevBBIter))
1271            break;
1272          MachineInstr *DuplicateDbg = MBBIter;
1273          ++MBBIter; -- PrevBBIter;
1274          DuplicateDbg->eraseFromParent();
1275        }
1276      }
1277      PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end());
1278      PrevBB.removeSuccessor(PrevBB.succ_begin());
1279      assert(PrevBB.succ_empty());
1280      PrevBB.transferSuccessors(MBB);
1281      MadeChange = true;
1282      return MadeChange;
1283    }
1284
1285    // If the previous branch *only* branches to *this* block (conditional or
1286    // not) remove the branch.
1287    if (PriorTBB == MBB && !PriorFBB) {
1288      TII->RemoveBranch(PrevBB);
1289      MadeChange = true;
1290      ++NumBranchOpts;
1291      goto ReoptimizeBlock;
1292    }
1293
1294    // If the prior block branches somewhere else on the condition and here if
1295    // the condition is false, remove the uncond second branch.
1296    if (PriorFBB == MBB) {
1297      DebugLoc dl = getBranchDebugLoc(PrevBB);
1298      TII->RemoveBranch(PrevBB);
1299      TII->InsertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl);
1300      MadeChange = true;
1301      ++NumBranchOpts;
1302      goto ReoptimizeBlock;
1303    }
1304
1305    // If the prior block branches here on true and somewhere else on false, and
1306    // if the branch condition is reversible, reverse the branch to create a
1307    // fall-through.
1308    if (PriorTBB == MBB) {
1309      SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
1310      if (!TII->ReverseBranchCondition(NewPriorCond)) {
1311        DebugLoc dl = getBranchDebugLoc(PrevBB);
1312        TII->RemoveBranch(PrevBB);
1313        TII->InsertBranch(PrevBB, PriorFBB, nullptr, NewPriorCond, dl);
1314        MadeChange = true;
1315        ++NumBranchOpts;
1316        goto ReoptimizeBlock;
1317      }
1318    }
1319
1320    // If this block has no successors (e.g. it is a return block or ends with
1321    // a call to a no-return function like abort or __cxa_throw) and if the pred
1322    // falls through into this block, and if it would otherwise fall through
1323    // into the block after this, move this block to the end of the function.
1324    //
1325    // We consider it more likely that execution will stay in the function (e.g.
1326    // due to loops) than it is to exit it.  This asserts in loops etc, moving
1327    // the assert condition out of the loop body.
1328    if (MBB->succ_empty() && !PriorCond.empty() && !PriorFBB &&
1329        MachineFunction::iterator(PriorTBB) == FallThrough &&
1330        !MBB->canFallThrough()) {
1331      bool DoTransform = true;
1332
1333      // We have to be careful that the succs of PredBB aren't both no-successor
1334      // blocks.  If neither have successors and if PredBB is the second from
1335      // last block in the function, we'd just keep swapping the two blocks for
1336      // last.  Only do the swap if one is clearly better to fall through than
1337      // the other.
1338      if (FallThrough == --MF.end() &&
1339          !IsBetterFallthrough(PriorTBB, MBB))
1340        DoTransform = false;
1341
1342      if (DoTransform) {
1343        // Reverse the branch so we will fall through on the previous true cond.
1344        SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
1345        if (!TII->ReverseBranchCondition(NewPriorCond)) {
1346          DEBUG(dbgs() << "\nMoving MBB: " << *MBB
1347                       << "To make fallthrough to: " << *PriorTBB << "\n");
1348
1349          DebugLoc dl = getBranchDebugLoc(PrevBB);
1350          TII->RemoveBranch(PrevBB);
1351          TII->InsertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl);
1352
1353          // Move this block to the end of the function.
1354          MBB->moveAfter(--MF.end());
1355          MadeChange = true;
1356          ++NumBranchOpts;
1357          return MadeChange;
1358        }
1359      }
1360    }
1361  }
1362
1363  // Analyze the branch in the current block.
1364  MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr;
1365  SmallVector<MachineOperand, 4> CurCond;
1366  bool CurUnAnalyzable= TII->AnalyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true);
1367  if (!CurUnAnalyzable) {
1368    // If the CFG for the prior block has extra edges, remove them.
1369    MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty());
1370
1371    // If this is a two-way branch, and the FBB branches to this block, reverse
1372    // the condition so the single-basic-block loop is faster.  Instead of:
1373    //    Loop: xxx; jcc Out; jmp Loop
1374    // we want:
1375    //    Loop: xxx; jncc Loop; jmp Out
1376    if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
1377      SmallVector<MachineOperand, 4> NewCond(CurCond);
1378      if (!TII->ReverseBranchCondition(NewCond)) {
1379        DebugLoc dl = getBranchDebugLoc(*MBB);
1380        TII->RemoveBranch(*MBB);
1381        TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl);
1382        MadeChange = true;
1383        ++NumBranchOpts;
1384        goto ReoptimizeBlock;
1385      }
1386    }
1387
1388    // If this branch is the only thing in its block, see if we can forward
1389    // other blocks across it.
1390    if (CurTBB && CurCond.empty() && !CurFBB &&
1391        IsBranchOnlyBlock(MBB) && CurTBB != MBB &&
1392        !MBB->hasAddressTaken()) {
1393      DebugLoc dl = getBranchDebugLoc(*MBB);
1394      // This block may contain just an unconditional branch.  Because there can
1395      // be 'non-branch terminators' in the block, try removing the branch and
1396      // then seeing if the block is empty.
1397      TII->RemoveBranch(*MBB);
1398      // If the only things remaining in the block are debug info, remove these
1399      // as well, so this will behave the same as an empty block in non-debug
1400      // mode.
1401      if (!MBB->empty()) {
1402        bool NonDebugInfoFound = false;
1403        for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
1404             I != E; ++I) {
1405          if (!I->isDebugValue()) {
1406            NonDebugInfoFound = true;
1407            break;
1408          }
1409        }
1410        if (!NonDebugInfoFound)
1411          // Make the block empty, losing the debug info (we could probably
1412          // improve this in some cases.)
1413          MBB->erase(MBB->begin(), MBB->end());
1414      }
1415      // If this block is just an unconditional branch to CurTBB, we can
1416      // usually completely eliminate the block.  The only case we cannot
1417      // completely eliminate the block is when the block before this one
1418      // falls through into MBB and we can't understand the prior block's branch
1419      // condition.
1420      if (MBB->empty()) {
1421        bool PredHasNoFallThrough = !PrevBB.canFallThrough();
1422        if (PredHasNoFallThrough || !PriorUnAnalyzable ||
1423            !PrevBB.isSuccessor(MBB)) {
1424          // If the prior block falls through into us, turn it into an
1425          // explicit branch to us to make updates simpler.
1426          if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) &&
1427              PriorTBB != MBB && PriorFBB != MBB) {
1428            if (!PriorTBB) {
1429              assert(PriorCond.empty() && !PriorFBB &&
1430                     "Bad branch analysis");
1431              PriorTBB = MBB;
1432            } else {
1433              assert(!PriorFBB && "Machine CFG out of date!");
1434              PriorFBB = MBB;
1435            }
1436            DebugLoc pdl = getBranchDebugLoc(PrevBB);
1437            TII->RemoveBranch(PrevBB);
1438            TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl);
1439          }
1440
1441          // Iterate through all the predecessors, revectoring each in-turn.
1442          size_t PI = 0;
1443          bool DidChange = false;
1444          bool HasBranchToSelf = false;
1445          while(PI != MBB->pred_size()) {
1446            MachineBasicBlock *PMBB = *(MBB->pred_begin() + PI);
1447            if (PMBB == MBB) {
1448              // If this block has an uncond branch to itself, leave it.
1449              ++PI;
1450              HasBranchToSelf = true;
1451            } else {
1452              DidChange = true;
1453              PMBB->ReplaceUsesOfBlockWith(MBB, CurTBB);
1454              // If this change resulted in PMBB ending in a conditional
1455              // branch where both conditions go to the same destination,
1456              // change this to an unconditional branch (and fix the CFG).
1457              MachineBasicBlock *NewCurTBB = nullptr, *NewCurFBB = nullptr;
1458              SmallVector<MachineOperand, 4> NewCurCond;
1459              bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB,
1460                      NewCurFBB, NewCurCond, true);
1461              if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
1462                DebugLoc pdl = getBranchDebugLoc(*PMBB);
1463                TII->RemoveBranch(*PMBB);
1464                NewCurCond.clear();
1465                TII->InsertBranch(*PMBB, NewCurTBB, nullptr, NewCurCond, pdl);
1466                MadeChange = true;
1467                ++NumBranchOpts;
1468                PMBB->CorrectExtraCFGEdges(NewCurTBB, nullptr, false);
1469              }
1470            }
1471          }
1472
1473          // Change any jumptables to go to the new MBB.
1474          if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())
1475            MJTI->ReplaceMBBInJumpTables(MBB, CurTBB);
1476          if (DidChange) {
1477            ++NumBranchOpts;
1478            MadeChange = true;
1479            if (!HasBranchToSelf) return MadeChange;
1480          }
1481        }
1482      }
1483
1484      // Add the branch back if the block is more than just an uncond branch.
1485      TII->InsertBranch(*MBB, CurTBB, nullptr, CurCond, dl);
1486    }
1487  }
1488
1489  // If the prior block doesn't fall through into this block, and if this
1490  // block doesn't fall through into some other block, see if we can find a
1491  // place to move this block where a fall-through will happen.
1492  if (!PrevBB.canFallThrough()) {
1493
1494    // Now we know that there was no fall-through into this block, check to
1495    // see if it has a fall-through into its successor.
1496    bool CurFallsThru = MBB->canFallThrough();
1497
1498    if (!MBB->isLandingPad()) {
1499      // Check all the predecessors of this block.  If one of them has no fall
1500      // throughs, move this block right after it.
1501      for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
1502           E = MBB->pred_end(); PI != E; ++PI) {
1503        // Analyze the branch at the end of the pred.
1504        MachineBasicBlock *PredBB = *PI;
1505        MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough;
1506        MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
1507        SmallVector<MachineOperand, 4> PredCond;
1508        if (PredBB != MBB && !PredBB->canFallThrough() &&
1509            !TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)
1510            && (!CurFallsThru || !CurTBB || !CurFBB)
1511            && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) {
1512          // If the current block doesn't fall through, just move it.
1513          // If the current block can fall through and does not end with a
1514          // conditional branch, we need to append an unconditional jump to
1515          // the (current) next block.  To avoid a possible compile-time
1516          // infinite loop, move blocks only backward in this case.
1517          // Also, if there are already 2 branches here, we cannot add a third;
1518          // this means we have the case
1519          // Bcc next
1520          // B elsewhere
1521          // next:
1522          if (CurFallsThru) {
1523            MachineBasicBlock *NextBB =
1524                std::next(MachineFunction::iterator(MBB));
1525            CurCond.clear();
1526            TII->InsertBranch(*MBB, NextBB, nullptr, CurCond, DebugLoc());
1527          }
1528          MBB->moveAfter(PredBB);
1529          MadeChange = true;
1530          goto ReoptimizeBlock;
1531        }
1532      }
1533    }
1534
1535    if (!CurFallsThru) {
1536      // Check all successors to see if we can move this block before it.
1537      for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
1538           E = MBB->succ_end(); SI != E; ++SI) {
1539        // Analyze the branch at the end of the block before the succ.
1540        MachineBasicBlock *SuccBB = *SI;
1541        MachineFunction::iterator SuccPrev = SuccBB; --SuccPrev;
1542
1543        // If this block doesn't already fall-through to that successor, and if
1544        // the succ doesn't already have a block that can fall through into it,
1545        // and if the successor isn't an EH destination, we can arrange for the
1546        // fallthrough to happen.
1547        if (SuccBB != MBB && &*SuccPrev != MBB &&
1548            !SuccPrev->canFallThrough() && !CurUnAnalyzable &&
1549            !SuccBB->isLandingPad()) {
1550          MBB->moveBefore(SuccBB);
1551          MadeChange = true;
1552          goto ReoptimizeBlock;
1553        }
1554      }
1555
1556      // Okay, there is no really great place to put this block.  If, however,
1557      // the block before this one would be a fall-through if this block were
1558      // removed, move this block to the end of the function.
1559      MachineBasicBlock *PrevTBB = nullptr, *PrevFBB = nullptr;
1560      SmallVector<MachineOperand, 4> PrevCond;
1561      if (FallThrough != MF.end() &&
1562          !TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&
1563          PrevBB.isSuccessor(FallThrough)) {
1564        MBB->moveAfter(--MF.end());
1565        MadeChange = true;
1566        return MadeChange;
1567      }
1568    }
1569  }
1570
1571  return MadeChange;
1572}
1573
1574//===----------------------------------------------------------------------===//
1575//  Hoist Common Code
1576//===----------------------------------------------------------------------===//
1577
1578/// HoistCommonCode - Hoist common instruction sequences at the start of basic
1579/// blocks to their common predecessor.
1580bool BranchFolder::HoistCommonCode(MachineFunction &MF) {
1581  bool MadeChange = false;
1582  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) {
1583    MachineBasicBlock *MBB = I++;
1584    MadeChange |= HoistCommonCodeInSuccs(MBB);
1585  }
1586
1587  return MadeChange;
1588}
1589
1590/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
1591/// its 'true' successor.
1592static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
1593                                         MachineBasicBlock *TrueBB) {
1594  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
1595         E = BB->succ_end(); SI != E; ++SI) {
1596    MachineBasicBlock *SuccBB = *SI;
1597    if (SuccBB != TrueBB)
1598      return SuccBB;
1599  }
1600  return nullptr;
1601}
1602
1603/// findHoistingInsertPosAndDeps - Find the location to move common instructions
1604/// in successors to. The location is usually just before the terminator,
1605/// however if the terminator is a conditional branch and its previous
1606/// instruction is the flag setting instruction, the previous instruction is
1607/// the preferred location. This function also gathers uses and defs of the
1608/// instructions from the insertion point to the end of the block. The data is
1609/// used by HoistCommonCodeInSuccs to ensure safety.
1610static
1611MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
1612                                                  const TargetInstrInfo *TII,
1613                                                  const TargetRegisterInfo *TRI,
1614                                                  SmallSet<unsigned,4> &Uses,
1615                                                  SmallSet<unsigned,4> &Defs) {
1616  MachineBasicBlock::iterator Loc = MBB->getFirstTerminator();
1617  if (!TII->isUnpredicatedTerminator(Loc))
1618    return MBB->end();
1619
1620  for (unsigned i = 0, e = Loc->getNumOperands(); i != e; ++i) {
1621    const MachineOperand &MO = Loc->getOperand(i);
1622    if (!MO.isReg())
1623      continue;
1624    unsigned Reg = MO.getReg();
1625    if (!Reg)
1626      continue;
1627    if (MO.isUse()) {
1628      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
1629        Uses.insert(*AI);
1630    } else {
1631      if (!MO.isDead())
1632        // Don't try to hoist code in the rare case the terminator defines a
1633        // register that is later used.
1634        return MBB->end();
1635
1636      // If the terminator defines a register, make sure we don't hoist
1637      // the instruction whose def might be clobbered by the terminator.
1638      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
1639        Defs.insert(*AI);
1640    }
1641  }
1642
1643  if (Uses.empty())
1644    return Loc;
1645  if (Loc == MBB->begin())
1646    return MBB->end();
1647
1648  // The terminator is probably a conditional branch, try not to separate the
1649  // branch from condition setting instruction.
1650  MachineBasicBlock::iterator PI = Loc;
1651  --PI;
1652  while (PI != MBB->begin() && PI->isDebugValue())
1653    --PI;
1654
1655  bool IsDef = false;
1656  for (unsigned i = 0, e = PI->getNumOperands(); !IsDef && i != e; ++i) {
1657    const MachineOperand &MO = PI->getOperand(i);
1658    // If PI has a regmask operand, it is probably a call. Separate away.
1659    if (MO.isRegMask())
1660      return Loc;
1661    if (!MO.isReg() || MO.isUse())
1662      continue;
1663    unsigned Reg = MO.getReg();
1664    if (!Reg)
1665      continue;
1666    if (Uses.count(Reg))
1667      IsDef = true;
1668  }
1669  if (!IsDef)
1670    // The condition setting instruction is not just before the conditional
1671    // branch.
1672    return Loc;
1673
1674  // Be conservative, don't insert instruction above something that may have
1675  // side-effects. And since it's potentially bad to separate flag setting
1676  // instruction from the conditional branch, just abort the optimization
1677  // completely.
1678  // Also avoid moving code above predicated instruction since it's hard to
1679  // reason about register liveness with predicated instruction.
1680  bool DontMoveAcrossStore = true;
1681  if (!PI->isSafeToMove(TII, nullptr, DontMoveAcrossStore) ||
1682      TII->isPredicated(PI))
1683    return MBB->end();
1684
1685
1686  // Find out what registers are live. Note this routine is ignoring other live
1687  // registers which are only used by instructions in successor blocks.
1688  for (unsigned i = 0, e = PI->getNumOperands(); i != e; ++i) {
1689    const MachineOperand &MO = PI->getOperand(i);
1690    if (!MO.isReg())
1691      continue;
1692    unsigned Reg = MO.getReg();
1693    if (!Reg)
1694      continue;
1695    if (MO.isUse()) {
1696      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
1697        Uses.insert(*AI);
1698    } else {
1699      if (Uses.erase(Reg)) {
1700        for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
1701          Uses.erase(*SubRegs); // Use sub-registers to be conservative
1702      }
1703      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
1704        Defs.insert(*AI);
1705    }
1706  }
1707
1708  return PI;
1709}
1710
1711/// HoistCommonCodeInSuccs - If the successors of MBB has common instruction
1712/// sequence at the start of the function, move the instructions before MBB
1713/// terminator if it's legal.
1714bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
1715  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1716  SmallVector<MachineOperand, 4> Cond;
1717  if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true) || !TBB || Cond.empty())
1718    return false;
1719
1720  if (!FBB) FBB = findFalseBlock(MBB, TBB);
1721  if (!FBB)
1722    // Malformed bcc? True and false blocks are the same?
1723    return false;
1724
1725  // Restrict the optimization to cases where MBB is the only predecessor,
1726  // it is an obvious win.
1727  if (TBB->pred_size() > 1 || FBB->pred_size() > 1)
1728    return false;
1729
1730  // Find a suitable position to hoist the common instructions to. Also figure
1731  // out which registers are used or defined by instructions from the insertion
1732  // point to the end of the block.
1733  SmallSet<unsigned, 4> Uses, Defs;
1734  MachineBasicBlock::iterator Loc =
1735    findHoistingInsertPosAndDeps(MBB, TII, TRI, Uses, Defs);
1736  if (Loc == MBB->end())
1737    return false;
1738
1739  bool HasDups = false;
1740  SmallVector<unsigned, 4> LocalDefs;
1741  SmallSet<unsigned, 4> LocalDefsSet;
1742  MachineBasicBlock::iterator TIB = TBB->begin();
1743  MachineBasicBlock::iterator FIB = FBB->begin();
1744  MachineBasicBlock::iterator TIE = TBB->end();
1745  MachineBasicBlock::iterator FIE = FBB->end();
1746  while (TIB != TIE && FIB != FIE) {
1747    // Skip dbg_value instructions. These do not count.
1748    if (TIB->isDebugValue()) {
1749      while (TIB != TIE && TIB->isDebugValue())
1750        ++TIB;
1751      if (TIB == TIE)
1752        break;
1753    }
1754    if (FIB->isDebugValue()) {
1755      while (FIB != FIE && FIB->isDebugValue())
1756        ++FIB;
1757      if (FIB == FIE)
1758        break;
1759    }
1760    if (!TIB->isIdenticalTo(FIB, MachineInstr::CheckKillDead))
1761      break;
1762
1763    if (TII->isPredicated(TIB))
1764      // Hard to reason about register liveness with predicated instruction.
1765      break;
1766
1767    bool IsSafe = true;
1768    for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
1769      MachineOperand &MO = TIB->getOperand(i);
1770      // Don't attempt to hoist instructions with register masks.
1771      if (MO.isRegMask()) {
1772        IsSafe = false;
1773        break;
1774      }
1775      if (!MO.isReg())
1776        continue;
1777      unsigned Reg = MO.getReg();
1778      if (!Reg)
1779        continue;
1780      if (MO.isDef()) {
1781        if (Uses.count(Reg)) {
1782          // Avoid clobbering a register that's used by the instruction at
1783          // the point of insertion.
1784          IsSafe = false;
1785          break;
1786        }
1787
1788        if (Defs.count(Reg) && !MO.isDead()) {
1789          // Don't hoist the instruction if the def would be clobber by the
1790          // instruction at the point insertion. FIXME: This is overly
1791          // conservative. It should be possible to hoist the instructions
1792          // in BB2 in the following example:
1793          // BB1:
1794          // r1, eflag = op1 r2, r3
1795          // brcc eflag
1796          //
1797          // BB2:
1798          // r1 = op2, ...
1799          //    = op3, r1<kill>
1800          IsSafe = false;
1801          break;
1802        }
1803      } else if (!LocalDefsSet.count(Reg)) {
1804        if (Defs.count(Reg)) {
1805          // Use is defined by the instruction at the point of insertion.
1806          IsSafe = false;
1807          break;
1808        }
1809
1810        if (MO.isKill() && Uses.count(Reg))
1811          // Kills a register that's read by the instruction at the point of
1812          // insertion. Remove the kill marker.
1813          MO.setIsKill(false);
1814      }
1815    }
1816    if (!IsSafe)
1817      break;
1818
1819    bool DontMoveAcrossStore = true;
1820    if (!TIB->isSafeToMove(TII, nullptr, DontMoveAcrossStore))
1821      break;
1822
1823    // Remove kills from LocalDefsSet, these registers had short live ranges.
1824    for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
1825      MachineOperand &MO = TIB->getOperand(i);
1826      if (!MO.isReg() || !MO.isUse() || !MO.isKill())
1827        continue;
1828      unsigned Reg = MO.getReg();
1829      if (!Reg || !LocalDefsSet.count(Reg))
1830        continue;
1831      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
1832        LocalDefsSet.erase(*AI);
1833    }
1834
1835    // Track local defs so we can update liveins.
1836    for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
1837      MachineOperand &MO = TIB->getOperand(i);
1838      if (!MO.isReg() || !MO.isDef() || MO.isDead())
1839        continue;
1840      unsigned Reg = MO.getReg();
1841      if (!Reg)
1842        continue;
1843      LocalDefs.push_back(Reg);
1844      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
1845        LocalDefsSet.insert(*AI);
1846    }
1847
1848    HasDups = true;
1849    ++TIB;
1850    ++FIB;
1851  }
1852
1853  if (!HasDups)
1854    return false;
1855
1856  MBB->splice(Loc, TBB, TBB->begin(), TIB);
1857  FBB->erase(FBB->begin(), FIB);
1858
1859  // Update livein's.
1860  for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
1861    unsigned Def = LocalDefs[i];
1862    if (LocalDefsSet.count(Def)) {
1863      TBB->addLiveIn(Def);
1864      FBB->addLiveIn(Def);
1865    }
1866  }
1867
1868  ++NumHoist;
1869  return true;
1870}
1871