1//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8/// \file
9//==-----------------------------------------------------------------------===//
10
11#include "AMDGPU.h"
12#include "AMDGPUInstrInfo.h"
13#include "R600InstrInfo.h"
14#include "llvm/ADT/DepthFirstIterator.h"
15#include "llvm/ADT/SCCIterator.h"
16#include "llvm/ADT/SmallVector.h"
17#include "llvm/ADT/Statistic.h"
18#include "llvm/CodeGen/MachineDominators.h"
19#include "llvm/CodeGen/MachineFunction.h"
20#include "llvm/CodeGen/MachineFunctionAnalysis.h"
21#include "llvm/CodeGen/MachineFunctionPass.h"
22#include "llvm/CodeGen/MachineInstrBuilder.h"
23#include "llvm/CodeGen/MachineJumpTableInfo.h"
24#include "llvm/CodeGen/MachineLoopInfo.h"
25#include "llvm/CodeGen/MachinePostDominators.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/IR/Dominators.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/raw_ostream.h"
30#include "llvm/Target/TargetInstrInfo.h"
31#include "llvm/Target/TargetMachine.h"
32
33using namespace llvm;
34
35#define DEBUG_TYPE "structcfg"
36
37#define DEFAULT_VEC_SLOTS 8
38
39// TODO: move-begin.
40
41//===----------------------------------------------------------------------===//
42//
43// Statistics for CFGStructurizer.
44//
45//===----------------------------------------------------------------------===//
46
47STATISTIC(numSerialPatternMatch,    "CFGStructurizer number of serial pattern "
48    "matched");
49STATISTIC(numIfPatternMatch,        "CFGStructurizer number of if pattern "
50    "matched");
51STATISTIC(numLoopcontPatternMatch,  "CFGStructurizer number of loop-continue "
52    "pattern matched");
53STATISTIC(numClonedBlock,           "CFGStructurizer cloned blocks");
54STATISTIC(numClonedInstr,           "CFGStructurizer cloned instructions");
55
56namespace llvm {
57  void initializeAMDGPUCFGStructurizerPass(PassRegistry&);
58}
59
60//===----------------------------------------------------------------------===//
61//
62// Miscellaneous utility for CFGStructurizer.
63//
64//===----------------------------------------------------------------------===//
65namespace {
66#define SHOWNEWINSTR(i) \
67  DEBUG(dbgs() << "New instr: " << *i << "\n");
68
69#define SHOWNEWBLK(b, msg) \
70DEBUG( \
71  dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
72  dbgs() << "\n"; \
73);
74
75#define SHOWBLK_DETAIL(b, msg) \
76DEBUG( \
77  if (b) { \
78  dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
79  b->print(dbgs()); \
80  dbgs() << "\n"; \
81  } \
82);
83
84#define INVALIDSCCNUM -1
85
86template<class NodeT>
87void ReverseVector(SmallVectorImpl<NodeT *> &Src) {
88  size_t sz = Src.size();
89  for (size_t i = 0; i < sz/2; ++i) {
90    NodeT *t = Src[i];
91    Src[i] = Src[sz - i - 1];
92    Src[sz - i - 1] = t;
93  }
94}
95
96} // end anonymous namespace
97
98//===----------------------------------------------------------------------===//
99//
100// supporting data structure for CFGStructurizer
101//
102//===----------------------------------------------------------------------===//
103
104
105namespace {
106
107class BlockInformation {
108public:
109  bool IsRetired;
110  int  SccNum;
111  BlockInformation() : IsRetired(false), SccNum(INVALIDSCCNUM) {}
112};
113
114} // end anonymous namespace
115
116//===----------------------------------------------------------------------===//
117//
118// CFGStructurizer
119//
120//===----------------------------------------------------------------------===//
121
122namespace {
123class AMDGPUCFGStructurizer : public MachineFunctionPass {
124public:
125  typedef SmallVector<MachineBasicBlock *, 32> MBBVector;
126  typedef std::map<MachineBasicBlock *, BlockInformation *> MBBInfoMap;
127  typedef std::map<MachineLoop *, MachineBasicBlock *> LoopLandInfoMap;
128
129  enum PathToKind {
130    Not_SinglePath = 0,
131    SinglePath_InPath = 1,
132    SinglePath_NotInPath = 2
133  };
134
135  static char ID;
136
137  AMDGPUCFGStructurizer() :
138      MachineFunctionPass(ID), TII(nullptr), TRI(nullptr) {
139    initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry());
140  }
141
142   const char *getPassName() const override {
143    return "AMDGPU Control Flow Graph structurizer Pass";
144  }
145
146  void getAnalysisUsage(AnalysisUsage &AU) const override {
147    AU.addPreserved<MachineFunctionAnalysis>();
148    AU.addRequired<MachineFunctionAnalysis>();
149    AU.addRequired<MachineDominatorTree>();
150    AU.addRequired<MachinePostDominatorTree>();
151    AU.addRequired<MachineLoopInfo>();
152  }
153
154  /// Perform the CFG structurization
155  bool run();
156
157  /// Perform the CFG preparation
158  /// This step will remove every unconditionnal/dead jump instructions and make
159  /// sure all loops have an exit block
160  bool prepare();
161
162  bool runOnMachineFunction(MachineFunction &MF) override {
163    TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
164    TRI = &TII->getRegisterInfo();
165    DEBUG(MF.dump(););
166    OrderedBlks.clear();
167    FuncRep = &MF;
168    MLI = &getAnalysis<MachineLoopInfo>();
169    DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
170    MDT = &getAnalysis<MachineDominatorTree>();
171    DEBUG(MDT->print(dbgs(), (const llvm::Module*)nullptr););
172    PDT = &getAnalysis<MachinePostDominatorTree>();
173    DEBUG(PDT->print(dbgs()););
174    prepare();
175    run();
176    DEBUG(MF.dump(););
177    return true;
178  }
179
180protected:
181  MachineDominatorTree *MDT;
182  MachinePostDominatorTree *PDT;
183  MachineLoopInfo *MLI;
184  const R600InstrInfo *TII;
185  const AMDGPURegisterInfo *TRI;
186
187  // PRINT FUNCTIONS
188  /// Print the ordered Blocks.
189  void printOrderedBlocks() const {
190    size_t i = 0;
191    for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(),
192        iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) {
193      dbgs() << "BB" << (*iterBlk)->getNumber();
194      dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
195      if (i != 0 && i % 10 == 0) {
196        dbgs() << "\n";
197      } else {
198        dbgs() << " ";
199      }
200    }
201  }
202  static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) {
203    for (MachineLoop::iterator iter = LoopInfo.begin(),
204         iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) {
205      (*iter)->print(dbgs(), 0);
206    }
207  }
208
209  // UTILITY FUNCTIONS
210  int getSCCNum(MachineBasicBlock *MBB) const;
211  MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const;
212  bool hasBackEdge(MachineBasicBlock *MBB) const;
213  static unsigned getLoopDepth(MachineLoop *LoopRep);
214  bool isRetiredBlock(MachineBasicBlock *MBB) const;
215  bool isActiveLoophead(MachineBasicBlock *MBB) const;
216  PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
217      bool AllowSideEntry = true) const;
218  int countActiveBlock(MBBVector::const_iterator It,
219      MBBVector::const_iterator E) const;
220  bool needMigrateBlock(MachineBasicBlock *MBB) const;
221
222  // Utility Functions
223  void reversePredicateSetter(MachineBasicBlock::iterator I);
224  /// Compute the reversed DFS post order of Blocks
225  void orderBlocks(MachineFunction *MF);
226
227  // Function originally from CFGStructTraits
228  void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode,
229      DebugLoc DL = DebugLoc());
230  MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode,
231    DebugLoc DL = DebugLoc());
232  MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode);
233  void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode,
234      DebugLoc DL);
235  void insertCondBranchBefore(MachineBasicBlock *MBB,
236      MachineBasicBlock::iterator I, int NewOpcode, int RegNum,
237      DebugLoc DL);
238  void insertCondBranchEnd(MachineBasicBlock *MBB, int NewOpcode, int RegNum);
239  static int getBranchNzeroOpcode(int OldOpcode);
240  static int getBranchZeroOpcode(int OldOpcode);
241  static int getContinueNzeroOpcode(int OldOpcode);
242  static int getContinueZeroOpcode(int OldOpcode);
243  static MachineBasicBlock *getTrueBranch(MachineInstr *MI);
244  static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB);
245  static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB,
246      MachineInstr *MI);
247  static bool isCondBranch(MachineInstr *MI);
248  static bool isUncondBranch(MachineInstr *MI);
249  static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB);
250  static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB);
251  /// The correct naming for this is getPossibleLoopendBlockBranchInstr.
252  ///
253  /// BB with backward-edge could have move instructions after the branch
254  /// instruction.  Such move instruction "belong to" the loop backward-edge.
255  MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB);
256  static MachineInstr *getReturnInstr(MachineBasicBlock *MBB);
257  static MachineInstr *getContinueInstr(MachineBasicBlock *MBB);
258  static bool isReturnBlock(MachineBasicBlock *MBB);
259  static void cloneSuccessorList(MachineBasicBlock *DstMBB,
260      MachineBasicBlock *SrcMBB) ;
261  static MachineBasicBlock *clone(MachineBasicBlock *MBB);
262  /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose
263  /// because the AMDGPU instruction is not recognized as terminator fix this
264  /// and retire this routine
265  void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB,
266      MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk);
267  static void wrapup(MachineBasicBlock *MBB);
268
269
270  int patternMatch(MachineBasicBlock *MBB);
271  int patternMatchGroup(MachineBasicBlock *MBB);
272  int serialPatternMatch(MachineBasicBlock *MBB);
273  int ifPatternMatch(MachineBasicBlock *MBB);
274  int loopendPatternMatch();
275  int mergeLoop(MachineLoop *LoopRep);
276  int loopcontPatternMatch(MachineLoop *LoopRep, MachineBasicBlock *LoopHeader);
277
278  void handleLoopcontBlock(MachineBasicBlock *ContingMBB,
279      MachineLoop *ContingLoop, MachineBasicBlock *ContMBB,
280      MachineLoop *ContLoop);
281  /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in
282  /// the same loop with LoopLandInfo without explicitly keeping track of
283  /// loopContBlks and loopBreakBlks, this is a method to get the information.
284  bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB,
285      MachineBasicBlock *Src2MBB);
286  int handleJumpintoIf(MachineBasicBlock *HeadMBB,
287      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
288  int handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
289      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
290  int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
291      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
292      MachineBasicBlock **LandMBBPtr);
293  void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
294      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
295      MachineBasicBlock *LandMBB, bool Detail = false);
296  int cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
297      MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB);
298  void mergeSerialBlock(MachineBasicBlock *DstMBB,
299      MachineBasicBlock *SrcMBB);
300
301  void mergeIfthenelseBlock(MachineInstr *BranchMI,
302      MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
303      MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB);
304  void mergeLooplandBlock(MachineBasicBlock *DstMBB,
305      MachineBasicBlock *LandMBB);
306  void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
307      MachineBasicBlock *LandMBB);
308  void settleLoopcontBlock(MachineBasicBlock *ContingMBB,
309      MachineBasicBlock *ContMBB);
310  /// normalizeInfiniteLoopExit change
311  ///   B1:
312  ///        uncond_br LoopHeader
313  ///
314  /// to
315  ///   B1:
316  ///        cond_br 1 LoopHeader dummyExit
317  /// and return the newly added dummy exit block
318  MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep);
319  void removeUnconditionalBranch(MachineBasicBlock *MBB);
320  /// Remove duplicate branches instructions in a block.
321  /// For instance
322  /// B0:
323  ///    cond_br X B1 B2
324  ///    cond_br X B1 B2
325  /// is transformed to
326  /// B0:
327  ///    cond_br X B1 B2
328  void removeRedundantConditionalBranch(MachineBasicBlock *MBB);
329  void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB);
330  void removeSuccessor(MachineBasicBlock *MBB);
331  MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB,
332      MachineBasicBlock *PredMBB);
333  void migrateInstruction(MachineBasicBlock *SrcMBB,
334      MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I);
335  void recordSccnum(MachineBasicBlock *MBB, int SCCNum);
336  void retireBlock(MachineBasicBlock *MBB);
337  void setLoopLandBlock(MachineLoop *LoopRep, MachineBasicBlock *MBB = nullptr);
338
339  MachineBasicBlock *findNearestCommonPostDom(std::set<MachineBasicBlock *>&);
340  /// This is work around solution for findNearestCommonDominator not avaiable
341  /// to post dom a proper fix should go to Dominators.h.
342  MachineBasicBlock *findNearestCommonPostDom(MachineBasicBlock *MBB1,
343      MachineBasicBlock *MBB2);
344
345private:
346  MBBInfoMap BlockInfoMap;
347  LoopLandInfoMap LLInfoMap;
348  std::map<MachineLoop *, bool> Visited;
349  MachineFunction *FuncRep;
350  SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks;
351};
352
353int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {
354  MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
355  if (It == BlockInfoMap.end())
356    return INVALIDSCCNUM;
357  return (*It).second->SccNum;
358}
359
360MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep)
361    const {
362  LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep);
363  if (It == LLInfoMap.end())
364    return nullptr;
365  return (*It).second;
366}
367
368bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const {
369  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
370  if (!LoopRep)
371    return false;
372  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
373  return MBB->isSuccessor(LoopHeader);
374}
375
376unsigned AMDGPUCFGStructurizer::getLoopDepth(MachineLoop *LoopRep) {
377  return LoopRep ? LoopRep->getLoopDepth() : 0;
378}
379
380bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const {
381  MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
382  if (It == BlockInfoMap.end())
383    return false;
384  return (*It).second->IsRetired;
385}
386
387bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
388  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
389  while (LoopRep && LoopRep->getHeader() == MBB) {
390    MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep);
391    if(!LoopLand)
392      return true;
393    if (!isRetiredBlock(LoopLand))
394      return true;
395    LoopRep = LoopRep->getParentLoop();
396  }
397  return false;
398}
399AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
400    MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
401    bool AllowSideEntry) const {
402  assert(DstMBB);
403  if (SrcMBB == DstMBB)
404    return SinglePath_InPath;
405  while (SrcMBB && SrcMBB->succ_size() == 1) {
406    SrcMBB = *SrcMBB->succ_begin();
407    if (SrcMBB == DstMBB)
408      return SinglePath_InPath;
409    if (!AllowSideEntry && SrcMBB->pred_size() > 1)
410      return Not_SinglePath;
411  }
412  if (SrcMBB && SrcMBB->succ_size()==0)
413    return SinglePath_NotInPath;
414  return Not_SinglePath;
415}
416
417int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
418    MBBVector::const_iterator E) const {
419  int Count = 0;
420  while (It != E) {
421    if (!isRetiredBlock(*It))
422      ++Count;
423    ++It;
424  }
425  return Count;
426}
427
428bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const {
429  unsigned BlockSizeThreshold = 30;
430  unsigned CloneInstrThreshold = 100;
431  bool MultiplePreds = MBB && (MBB->pred_size() > 1);
432
433  if(!MultiplePreds)
434    return false;
435  unsigned BlkSize = MBB->size();
436  return ((BlkSize > BlockSizeThreshold) &&
437      (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold));
438}
439
440void AMDGPUCFGStructurizer::reversePredicateSetter(
441    MachineBasicBlock::iterator I) {
442  while (I--) {
443    if (I->getOpcode() == AMDGPU::PRED_X) {
444      switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
445      case OPCODE_IS_ZERO_INT:
446        static_cast<MachineInstr *>(I)->getOperand(2)
447            .setImm(OPCODE_IS_NOT_ZERO_INT);
448        return;
449      case OPCODE_IS_NOT_ZERO_INT:
450        static_cast<MachineInstr *>(I)->getOperand(2)
451            .setImm(OPCODE_IS_ZERO_INT);
452        return;
453      case OPCODE_IS_ZERO:
454        static_cast<MachineInstr *>(I)->getOperand(2)
455            .setImm(OPCODE_IS_NOT_ZERO);
456        return;
457      case OPCODE_IS_NOT_ZERO:
458        static_cast<MachineInstr *>(I)->getOperand(2)
459            .setImm(OPCODE_IS_ZERO);
460        return;
461      default:
462        llvm_unreachable("PRED_X Opcode invalid!");
463      }
464    }
465  }
466}
467
468void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB,
469    int NewOpcode, DebugLoc DL) {
470 MachineInstr *MI = MBB->getParent()
471    ->CreateMachineInstr(TII->get(NewOpcode), DL);
472  MBB->push_back(MI);
473  //assume the instruction doesn't take any reg operand ...
474  SHOWNEWINSTR(MI);
475}
476
477MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB,
478    int NewOpcode, DebugLoc DL) {
479  MachineInstr *MI =
480      MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
481  if (MBB->begin() != MBB->end())
482    MBB->insert(MBB->begin(), MI);
483  else
484    MBB->push_back(MI);
485  SHOWNEWINSTR(MI);
486  return MI;
487}
488
489MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(
490    MachineBasicBlock::iterator I, int NewOpcode) {
491  MachineInstr *OldMI = &(*I);
492  MachineBasicBlock *MBB = OldMI->getParent();
493  MachineInstr *NewMBB =
494      MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
495  MBB->insert(I, NewMBB);
496  //assume the instruction doesn't take any reg operand ...
497  SHOWNEWINSTR(NewMBB);
498  return NewMBB;
499}
500
501void AMDGPUCFGStructurizer::insertCondBranchBefore(
502    MachineBasicBlock::iterator I, int NewOpcode, DebugLoc DL) {
503  MachineInstr *OldMI = &(*I);
504  MachineBasicBlock *MBB = OldMI->getParent();
505  MachineFunction *MF = MBB->getParent();
506  MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
507  MBB->insert(I, NewMI);
508  MachineInstrBuilder MIB(*MF, NewMI);
509  MIB.addReg(OldMI->getOperand(1).getReg(), false);
510  SHOWNEWINSTR(NewMI);
511  //erase later oldInstr->eraseFromParent();
512}
513
514void AMDGPUCFGStructurizer::insertCondBranchBefore(MachineBasicBlock *blk,
515    MachineBasicBlock::iterator I, int NewOpcode, int RegNum,
516    DebugLoc DL) {
517  MachineFunction *MF = blk->getParent();
518  MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
519  //insert before
520  blk->insert(I, NewInstr);
521  MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
522  SHOWNEWINSTR(NewInstr);
523}
524
525void AMDGPUCFGStructurizer::insertCondBranchEnd(MachineBasicBlock *MBB,
526    int NewOpcode, int RegNum) {
527  MachineFunction *MF = MBB->getParent();
528  MachineInstr *NewInstr =
529    MF->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
530  MBB->push_back(NewInstr);
531  MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
532  SHOWNEWINSTR(NewInstr);
533}
534
535int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
536  switch(OldOpcode) {
537  case AMDGPU::JUMP_COND:
538  case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
539  case AMDGPU::BRANCH_COND_i32:
540  case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
541  default: llvm_unreachable("internal error");
542  }
543  return -1;
544}
545
546int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
547  switch(OldOpcode) {
548  case AMDGPU::JUMP_COND:
549  case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
550  case AMDGPU::BRANCH_COND_i32:
551  case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
552  default: llvm_unreachable("internal error");
553  }
554  return -1;
555}
556
557int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
558  switch(OldOpcode) {
559  case AMDGPU::JUMP_COND:
560  case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
561  default: llvm_unreachable("internal error");
562  };
563  return -1;
564}
565
566int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
567  switch(OldOpcode) {
568  case AMDGPU::JUMP_COND:
569  case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
570  default: llvm_unreachable("internal error");
571  }
572  return -1;
573}
574
575MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) {
576  return MI->getOperand(0).getMBB();
577}
578
579void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI,
580    MachineBasicBlock *MBB) {
581  MI->getOperand(0).setMBB(MBB);
582}
583
584MachineBasicBlock *
585AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB,
586    MachineInstr *MI) {
587  assert(MBB->succ_size() == 2);
588  MachineBasicBlock *TrueBranch = getTrueBranch(MI);
589  MachineBasicBlock::succ_iterator It = MBB->succ_begin();
590  MachineBasicBlock::succ_iterator Next = It;
591  ++Next;
592  return (*It == TrueBranch) ? *Next : *It;
593}
594
595bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
596  switch (MI->getOpcode()) {
597    case AMDGPU::JUMP_COND:
598    case AMDGPU::BRANCH_COND_i32:
599    case AMDGPU::BRANCH_COND_f32: return true;
600  default:
601    return false;
602  }
603  return false;
604}
605
606bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
607  switch (MI->getOpcode()) {
608  case AMDGPU::JUMP:
609  case AMDGPU::BRANCH:
610    return true;
611  default:
612    return false;
613  }
614  return false;
615}
616
617DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
618  //get DebugLoc from the first MachineBasicBlock instruction with debug info
619  DebugLoc DL;
620  for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
621      ++It) {
622    MachineInstr *instr = &(*It);
623    if (instr->getDebugLoc().isUnknown() == false)
624      DL = instr->getDebugLoc();
625  }
626  return DL;
627}
628
629MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
630    MachineBasicBlock *MBB) {
631  MachineBasicBlock::reverse_iterator It = MBB->rbegin();
632  MachineInstr *MI = &*It;
633  if (MI && (isCondBranch(MI) || isUncondBranch(MI)))
634    return MI;
635  return nullptr;
636}
637
638MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
639    MachineBasicBlock *MBB) {
640  for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend();
641      It != E; ++It) {
642    // FIXME: Simplify
643    MachineInstr *MI = &*It;
644    if (MI) {
645      if (isCondBranch(MI) || isUncondBranch(MI))
646        return MI;
647      else if (!TII->isMov(MI->getOpcode()))
648        break;
649    }
650  }
651  return nullptr;
652}
653
654MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
655  MachineBasicBlock::reverse_iterator It = MBB->rbegin();
656  if (It != MBB->rend()) {
657    MachineInstr *instr = &(*It);
658    if (instr->getOpcode() == AMDGPU::RETURN)
659      return instr;
660  }
661  return nullptr;
662}
663
664MachineInstr *AMDGPUCFGStructurizer::getContinueInstr(MachineBasicBlock *MBB) {
665  MachineBasicBlock::reverse_iterator It = MBB->rbegin();
666  if (It != MBB->rend()) {
667    MachineInstr *MI = &(*It);
668    if (MI->getOpcode() == AMDGPU::CONTINUE)
669      return MI;
670  }
671  return nullptr;
672}
673
674bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
675  MachineInstr *MI = getReturnInstr(MBB);
676  bool IsReturn = (MBB->succ_size() == 0);
677  if (MI)
678    assert(IsReturn);
679  else if (IsReturn)
680    DEBUG(
681      dbgs() << "BB" << MBB->getNumber()
682             <<" is return block without RETURN instr\n";);
683  return  IsReturn;
684}
685
686void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB,
687    MachineBasicBlock *SrcMBB) {
688  for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(),
689       iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It)
690    DstMBB->addSuccessor(*It);  // *iter's predecessor is also taken care of
691}
692
693MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) {
694  MachineFunction *Func = MBB->getParent();
695  MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock();
696  Func->push_back(NewMBB);  //insert to function
697  for (MachineBasicBlock::iterator It = MBB->begin(), E = MBB->end();
698      It != E; ++It) {
699    MachineInstr *MI = Func->CloneMachineInstr(It);
700    NewMBB->push_back(MI);
701  }
702  return NewMBB;
703}
704
705void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith(
706    MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB,
707    MachineBasicBlock *NewBlk) {
708  MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB);
709  if (BranchMI && isCondBranch(BranchMI) &&
710      getTrueBranch(BranchMI) == OldMBB)
711    setTrueBranch(BranchMI, NewBlk);
712}
713
714void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
715  assert((!MBB->getParent()->getJumpTableInfo()
716          || MBB->getParent()->getJumpTableInfo()->isEmpty())
717         && "found a jump table");
718
719   //collect continue right before endloop
720   SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> ContInstr;
721   MachineBasicBlock::iterator Pre = MBB->begin();
722   MachineBasicBlock::iterator E = MBB->end();
723   MachineBasicBlock::iterator It = Pre;
724   while (It != E) {
725     if (Pre->getOpcode() == AMDGPU::CONTINUE
726         && It->getOpcode() == AMDGPU::ENDLOOP)
727       ContInstr.push_back(Pre);
728     Pre = It;
729     ++It;
730   }
731
732   //delete continue right before endloop
733   for (unsigned i = 0; i < ContInstr.size(); ++i)
734      ContInstr[i]->eraseFromParent();
735
736   // TODO to fix up jump table so later phase won't be confused.  if
737   // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
738   // there isn't such an interface yet.  alternatively, replace all the other
739   // blocks in the jump table with the entryBlk //}
740
741}
742
743
744bool AMDGPUCFGStructurizer::prepare() {
745  bool Changed = false;
746
747  //FIXME: if not reducible flow graph, make it so ???
748
749  DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";);
750
751  orderBlocks(FuncRep);
752
753  SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> RetBlks;
754
755  // Add an ExitBlk to loop that don't have one
756  for (MachineLoopInfo::iterator It = MLI->begin(),
757       E = MLI->end(); It != E; ++It) {
758    MachineLoop *LoopRep = (*It);
759    MBBVector ExitingMBBs;
760    LoopRep->getExitingBlocks(ExitingMBBs);
761
762    if (ExitingMBBs.size() == 0) {
763      MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep);
764      if (DummyExitBlk)
765        RetBlks.push_back(DummyExitBlk);
766    }
767  }
768
769  // Remove unconditional branch instr.
770  // Add dummy exit block iff there are multiple returns.
771  for (SmallVectorImpl<MachineBasicBlock *>::const_iterator
772       It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) {
773    MachineBasicBlock *MBB = *It;
774    removeUnconditionalBranch(MBB);
775    removeRedundantConditionalBranch(MBB);
776    if (isReturnBlock(MBB)) {
777      RetBlks.push_back(MBB);
778    }
779    assert(MBB->succ_size() <= 2);
780  }
781
782  if (RetBlks.size() >= 2) {
783    addDummyExitBlock(RetBlks);
784    Changed = true;
785  }
786
787  return Changed;
788}
789
790bool AMDGPUCFGStructurizer::run() {
791
792  //Assume reducible CFG...
793  DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n");
794
795#ifdef STRESSTEST
796  //Use the worse block ordering to test the algorithm.
797  ReverseVector(orderedBlks);
798#endif
799
800  DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks(););
801  int NumIter = 0;
802  bool Finish = false;
803  MachineBasicBlock *MBB;
804  bool MakeProgress = false;
805  int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(),
806                                        OrderedBlks.end());
807
808  do {
809    ++NumIter;
810    DEBUG(
811      dbgs() << "numIter = " << NumIter
812             << ", numRemaintedBlk = " << NumRemainedBlk << "\n";
813    );
814
815    SmallVectorImpl<MachineBasicBlock *>::const_iterator It =
816        OrderedBlks.begin();
817    SmallVectorImpl<MachineBasicBlock *>::const_iterator E =
818        OrderedBlks.end();
819
820    SmallVectorImpl<MachineBasicBlock *>::const_iterator SccBeginIter =
821        It;
822    MachineBasicBlock *SccBeginMBB = nullptr;
823    int SccNumBlk = 0;  // The number of active blocks, init to a
824                        // maximum possible number.
825    int SccNumIter;     // Number of iteration in this SCC.
826
827    while (It != E) {
828      MBB = *It;
829
830      if (!SccBeginMBB) {
831        SccBeginIter = It;
832        SccBeginMBB = MBB;
833        SccNumIter = 0;
834        SccNumBlk = NumRemainedBlk; // Init to maximum possible number.
835        DEBUG(
836              dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB);
837              dbgs() << "\n";
838        );
839      }
840
841      if (!isRetiredBlock(MBB))
842        patternMatch(MBB);
843
844      ++It;
845
846      bool ContNextScc = true;
847      if (It == E
848          || getSCCNum(SccBeginMBB) != getSCCNum(*It)) {
849        // Just finish one scc.
850        ++SccNumIter;
851        int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It);
852        if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) {
853          DEBUG(
854            dbgs() << "Can't reduce SCC " << getSCCNum(MBB)
855                   << ", sccNumIter = " << SccNumIter;
856            dbgs() << "doesn't make any progress\n";
857          );
858          ContNextScc = true;
859        } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) {
860          SccNumBlk = sccRemainedNumBlk;
861          It = SccBeginIter;
862          ContNextScc = false;
863          DEBUG(
864            dbgs() << "repeat processing SCC" << getSCCNum(MBB)
865                   << "sccNumIter = " << SccNumIter << '\n';
866          );
867        } else {
868          // Finish the current scc.
869          ContNextScc = true;
870        }
871      } else {
872        // Continue on next component in the current scc.
873        ContNextScc = false;
874      }
875
876      if (ContNextScc)
877        SccBeginMBB = nullptr;
878    } //while, "one iteration" over the function.
879
880    MachineBasicBlock *EntryMBB =
881        GraphTraits<MachineFunction *>::nodes_begin(FuncRep);
882    if (EntryMBB->succ_size() == 0) {
883      Finish = true;
884      DEBUG(
885        dbgs() << "Reduce to one block\n";
886      );
887    } else {
888      int NewnumRemainedBlk
889        = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end());
890      // consider cloned blocks ??
891      if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) {
892        MakeProgress = true;
893        NumRemainedBlk = NewnumRemainedBlk;
894      } else {
895        MakeProgress = false;
896        DEBUG(
897          dbgs() << "No progress\n";
898        );
899      }
900    }
901  } while (!Finish && MakeProgress);
902
903  // Misc wrap up to maintain the consistency of the Function representation.
904  wrapup(GraphTraits<MachineFunction *>::nodes_begin(FuncRep));
905
906  // Detach retired Block, release memory.
907  for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end();
908      It != E; ++It) {
909    if ((*It).second && (*It).second->IsRetired) {
910      assert(((*It).first)->getNumber() != -1);
911      DEBUG(
912        dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n";
913      );
914      (*It).first->eraseFromParent();  //Remove from the parent Function.
915    }
916    delete (*It).second;
917  }
918  BlockInfoMap.clear();
919  LLInfoMap.clear();
920
921  if (!Finish) {
922    DEBUG(FuncRep->viewCFG());
923    llvm_unreachable("IRREDUCIBLE_CFG");
924  }
925
926  return true;
927}
928
929
930
931void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
932  int SccNum = 0;
933  MachineBasicBlock *MBB;
934  for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd();
935       ++It, ++SccNum) {
936    const std::vector<MachineBasicBlock *> &SccNext = *It;
937    for (std::vector<MachineBasicBlock *>::const_iterator
938         blockIter = SccNext.begin(), blockEnd = SccNext.end();
939         blockIter != blockEnd; ++blockIter) {
940      MBB = *blockIter;
941      OrderedBlks.push_back(MBB);
942      recordSccnum(MBB, SccNum);
943    }
944  }
945
946  //walk through all the block in func to check for unreachable
947  typedef GraphTraits<MachineFunction *> GTM;
948  MachineFunction::iterator It = GTM::nodes_begin(MF), E = GTM::nodes_end(MF);
949  for (; It != E; ++It) {
950    MachineBasicBlock *MBB = &(*It);
951    SccNum = getSCCNum(MBB);
952    if (SccNum == INVALIDSCCNUM)
953      dbgs() << "unreachable block BB" << MBB->getNumber() << "\n";
954  }
955}
956
957int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) {
958  int NumMatch = 0;
959  int CurMatch;
960
961  DEBUG(
962        dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";
963  );
964
965  while ((CurMatch = patternMatchGroup(MBB)) > 0)
966    NumMatch += CurMatch;
967
968  DEBUG(
969        dbgs() << "End patternMatch BB" << MBB->getNumber()
970      << ", numMatch = " << NumMatch << "\n";
971  );
972
973  return NumMatch;
974}
975
976int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
977  int NumMatch = 0;
978  NumMatch += loopendPatternMatch();
979  NumMatch += serialPatternMatch(MBB);
980  NumMatch += ifPatternMatch(MBB);
981  return NumMatch;
982}
983
984
985int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
986  if (MBB->succ_size() != 1)
987    return 0;
988
989  MachineBasicBlock *childBlk = *MBB->succ_begin();
990  if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk))
991    return 0;
992
993  mergeSerialBlock(MBB, childBlk);
994  ++numSerialPatternMatch;
995  return 1;
996}
997
998int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
999  //two edges
1000  if (MBB->succ_size() != 2)
1001    return 0;
1002  if (hasBackEdge(MBB))
1003    return 0;
1004  MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
1005  if (!BranchMI)
1006    return 0;
1007
1008  assert(isCondBranch(BranchMI));
1009  int NumMatch = 0;
1010
1011  MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI);
1012  NumMatch += serialPatternMatch(TrueMBB);
1013  NumMatch += ifPatternMatch(TrueMBB);
1014  MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI);
1015  NumMatch += serialPatternMatch(FalseMBB);
1016  NumMatch += ifPatternMatch(FalseMBB);
1017  MachineBasicBlock *LandBlk;
1018  int Cloned = 0;
1019
1020  assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty());
1021  // TODO: Simplify
1022  if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1
1023    && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) {
1024    // Diamond pattern
1025    LandBlk = *TrueMBB->succ_begin();
1026  } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) {
1027    // Triangle pattern, false is empty
1028    LandBlk = FalseMBB;
1029    FalseMBB = nullptr;
1030  } else if (FalseMBB->succ_size() == 1
1031             && *FalseMBB->succ_begin() == TrueMBB) {
1032    // Triangle pattern, true is empty
1033    // We reverse the predicate to make a triangle, empty false pattern;
1034    std::swap(TrueMBB, FalseMBB);
1035    reversePredicateSetter(MBB->end());
1036    LandBlk = FalseMBB;
1037    FalseMBB = nullptr;
1038  } else if (FalseMBB->succ_size() == 1
1039             && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
1040    LandBlk = *FalseMBB->succ_begin();
1041  } else if (TrueMBB->succ_size() == 1
1042    && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
1043    LandBlk = *TrueMBB->succ_begin();
1044  } else {
1045    return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB);
1046  }
1047
1048  // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
1049  // new BB created for landBlk==NULL may introduce new challenge to the
1050  // reduction process.
1051  if (LandBlk &&
1052      ((TrueMBB && TrueMBB->pred_size() > 1)
1053      || (FalseMBB && FalseMBB->pred_size() > 1))) {
1054     Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk);
1055  }
1056
1057  if (TrueMBB && TrueMBB->pred_size() > 1) {
1058    TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB);
1059    ++Cloned;
1060  }
1061
1062  if (FalseMBB && FalseMBB->pred_size() > 1) {
1063    FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB);
1064    ++Cloned;
1065  }
1066
1067  mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk);
1068
1069  ++numIfPatternMatch;
1070
1071  numClonedBlock += Cloned;
1072
1073  return 1 + Cloned + NumMatch;
1074}
1075
1076int AMDGPUCFGStructurizer::loopendPatternMatch() {
1077  std::vector<MachineLoop *> NestedLoops;
1078  for (MachineLoopInfo::iterator It = MLI->begin(), E = MLI->end(); It != E;
1079       ++It)
1080    for (MachineLoop *ML : depth_first(*It))
1081      NestedLoops.push_back(ML);
1082
1083  if (NestedLoops.size() == 0)
1084    return 0;
1085
1086  // Process nested loop outside->inside, so "continue" to a outside loop won't
1087  // be mistaken as "break" of the current loop.
1088  int Num = 0;
1089  for (std::vector<MachineLoop *>::reverse_iterator It = NestedLoops.rbegin(),
1090      E = NestedLoops.rend(); It != E; ++It) {
1091    MachineLoop *ExaminedLoop = *It;
1092    if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop])
1093      continue;
1094    DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
1095    int NumBreak = mergeLoop(ExaminedLoop);
1096    if (NumBreak == -1)
1097      break;
1098    Num += NumBreak;
1099  }
1100  return Num;
1101}
1102
1103int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
1104  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
1105  MBBVector ExitingMBBs;
1106  LoopRep->getExitingBlocks(ExitingMBBs);
1107  assert(!ExitingMBBs.empty() && "Infinite Loop not supported");
1108  DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() << " exiting blocks\n";);
1109  // We assume a single ExitBlk
1110  MBBVector ExitBlks;
1111  LoopRep->getExitBlocks(ExitBlks);
1112  SmallPtrSet<MachineBasicBlock *, 2> ExitBlkSet;
1113  for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i)
1114    ExitBlkSet.insert(ExitBlks[i]);
1115  assert(ExitBlkSet.size() == 1);
1116  MachineBasicBlock *ExitBlk = *ExitBlks.begin();
1117  assert(ExitBlk && "Loop has several exit block");
1118  MBBVector LatchBlks;
1119  typedef GraphTraits<Inverse<MachineBasicBlock*> > InvMBBTraits;
1120  InvMBBTraits::ChildIteratorType PI = InvMBBTraits::child_begin(LoopHeader),
1121      PE = InvMBBTraits::child_end(LoopHeader);
1122  for (; PI != PE; PI++) {
1123    if (LoopRep->contains(*PI))
1124      LatchBlks.push_back(*PI);
1125  }
1126
1127  for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i)
1128    mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk);
1129  for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i)
1130    settleLoopcontBlock(LatchBlks[i], LoopHeader);
1131  int Match = 0;
1132  do {
1133    Match = 0;
1134    Match += serialPatternMatch(LoopHeader);
1135    Match += ifPatternMatch(LoopHeader);
1136  } while (Match > 0);
1137  mergeLooplandBlock(LoopHeader, ExitBlk);
1138  MachineLoop *ParentLoop = LoopRep->getParentLoop();
1139  if (ParentLoop)
1140    MLI->changeLoopFor(LoopHeader, ParentLoop);
1141  else
1142    MLI->removeBlock(LoopHeader);
1143  Visited[LoopRep] = true;
1144  return 1;
1145}
1146
1147int AMDGPUCFGStructurizer::loopcontPatternMatch(MachineLoop *LoopRep,
1148    MachineBasicBlock *LoopHeader) {
1149  int NumCont = 0;
1150  SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> ContMBB;
1151  typedef GraphTraits<Inverse<MachineBasicBlock *> > GTIM;
1152  GTIM::ChildIteratorType It = GTIM::child_begin(LoopHeader),
1153      E = GTIM::child_end(LoopHeader);
1154  for (; It != E; ++It) {
1155    MachineBasicBlock *MBB = *It;
1156    if (LoopRep->contains(MBB)) {
1157      handleLoopcontBlock(MBB, MLI->getLoopFor(MBB),
1158                          LoopHeader, LoopRep);
1159      ContMBB.push_back(MBB);
1160      ++NumCont;
1161    }
1162  }
1163
1164  for (SmallVectorImpl<MachineBasicBlock *>::iterator It = ContMBB.begin(),
1165      E = ContMBB.end(); It != E; ++It) {
1166    (*It)->removeSuccessor(LoopHeader);
1167  }
1168
1169  numLoopcontPatternMatch += NumCont;
1170
1171  return NumCont;
1172}
1173
1174
1175bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
1176    MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) {
1177  if (Src1MBB->succ_size() == 0) {
1178    MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB);
1179    if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
1180      MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep];
1181      if (TheEntry) {
1182        DEBUG(
1183          dbgs() << "isLoopContBreakBlock yes src1 = BB"
1184                 << Src1MBB->getNumber()
1185                 << " src2 = BB" << Src2MBB->getNumber() << "\n";
1186        );
1187        return true;
1188      }
1189    }
1190  }
1191  return false;
1192}
1193
1194int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB,
1195    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
1196  int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
1197  if (Num == 0) {
1198    DEBUG(
1199      dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
1200    );
1201    Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB);
1202  }
1203  return Num;
1204}
1205
1206int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
1207    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
1208  int Num = 0;
1209  MachineBasicBlock *DownBlk;
1210
1211  //trueBlk could be the common post dominator
1212  DownBlk = TrueMBB;
1213
1214  DEBUG(
1215    dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber()
1216           << " true = BB" << TrueMBB->getNumber()
1217           << ", numSucc=" << TrueMBB->succ_size()
1218           << " false = BB" << FalseMBB->getNumber() << "\n";
1219  );
1220
1221  while (DownBlk) {
1222    DEBUG(
1223      dbgs() << "check down = BB" << DownBlk->getNumber();
1224    );
1225
1226    if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) {
1227      DEBUG(
1228        dbgs() << " working\n";
1229      );
1230
1231      Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk);
1232      Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk);
1233
1234      numClonedBlock += Num;
1235      Num += serialPatternMatch(*HeadMBB->succ_begin());
1236      Num += serialPatternMatch(*std::next(HeadMBB->succ_begin()));
1237      Num += ifPatternMatch(HeadMBB);
1238      assert(Num > 0);
1239
1240      break;
1241    }
1242    DEBUG(
1243      dbgs() << " not working\n";
1244    );
1245    DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr;
1246  } // walk down the postDomTree
1247
1248  return Num;
1249}
1250
1251void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
1252    MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB,
1253    MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) {
1254  dbgs() << "head = BB" << HeadMBB->getNumber()
1255         << " size = " << HeadMBB->size();
1256  if (Detail) {
1257    dbgs() << "\n";
1258    HeadMBB->print(dbgs());
1259    dbgs() << "\n";
1260  }
1261
1262  if (TrueMBB) {
1263    dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = "
1264           << TrueMBB->size() << " numPred = " << TrueMBB->pred_size();
1265    if (Detail) {
1266      dbgs() << "\n";
1267      TrueMBB->print(dbgs());
1268      dbgs() << "\n";
1269    }
1270  }
1271  if (FalseMBB) {
1272    dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = "
1273           << FalseMBB->size() << " numPred = " << FalseMBB->pred_size();
1274    if (Detail) {
1275      dbgs() << "\n";
1276      FalseMBB->print(dbgs());
1277      dbgs() << "\n";
1278    }
1279  }
1280  if (LandMBB) {
1281    dbgs() << ", land = BB" << LandMBB->getNumber() << " size = "
1282           << LandMBB->size() << " numPred = " << LandMBB->pred_size();
1283    if (Detail) {
1284      dbgs() << "\n";
1285      LandMBB->print(dbgs());
1286      dbgs() << "\n";
1287    }
1288  }
1289
1290    dbgs() << "\n";
1291}
1292
1293int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
1294    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
1295    MachineBasicBlock **LandMBBPtr) {
1296  bool MigrateTrue = false;
1297  bool MigrateFalse = false;
1298
1299  MachineBasicBlock *LandBlk = *LandMBBPtr;
1300
1301  assert((!TrueMBB || TrueMBB->succ_size() <= 1)
1302         && (!FalseMBB || FalseMBB->succ_size() <= 1));
1303
1304  if (TrueMBB == FalseMBB)
1305    return 0;
1306
1307  MigrateTrue = needMigrateBlock(TrueMBB);
1308  MigrateFalse = needMigrateBlock(FalseMBB);
1309
1310  if (!MigrateTrue && !MigrateFalse)
1311    return 0;
1312
1313  // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1314  // have more than one predecessors.  without doing this, its predecessor
1315  // rather than headBlk will have undefined value in initReg.
1316  if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1)
1317    MigrateTrue = true;
1318  if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1)
1319    MigrateFalse = true;
1320
1321  DEBUG(
1322    dbgs() << "before improveSimpleJumpintoIf: ";
1323    showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
1324  );
1325
1326  // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1327  //
1328  // new: headBlk => if () {initReg = 1; org trueBlk branch} else
1329  //      {initReg = 0; org falseBlk branch }
1330  //      => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
1331  //      => org landBlk
1332  //      if landBlk->pred_size() > 2, put the about if-else inside
1333  //      if (initReg !=2) {...}
1334  //
1335  // add initReg = initVal to headBlk
1336
1337  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1338  if (!MigrateTrue || !MigrateFalse) {
1339    // XXX: We have an opportunity here to optimize the "branch into if" case
1340    // here.  Branch into if looks like this:
1341    //                        entry
1342    //                       /     |
1343    //           diamond_head       branch_from
1344    //             /      \           |
1345    // diamond_false        diamond_true
1346    //             \      /
1347    //               done
1348    //
1349    // The diamond_head block begins the "if" and the diamond_true block
1350    // is the block being "branched into".
1351    //
1352    // If MigrateTrue is true, then TrueBB is the block being "branched into"
1353    // and if MigrateFalse is true, then FalseBB is the block being
1354    // "branched into"
1355    //
1356    // Here is the pseudo code for how I think the optimization should work:
1357    // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
1358    // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
1359    // 3. Move the branch instruction from diamond_head into its own basic
1360    //    block (new_block).
1361    // 4. Add an unconditional branch from diamond_head to new_block
1362    // 5. Replace the branch instruction in branch_from with an unconditional
1363    //    branch to new_block.  If branch_from has multiple predecessors, then
1364    //    we need to replace the True/False block in the branch
1365    //    instruction instead of replacing it.
1366    // 6. Change the condition of the branch instruction in new_block from
1367    //    COND to (COND || GPR0)
1368    //
1369    // In order insert these MOV instruction, we will need to use the
1370    // RegisterScavenger.  Usually liveness stops being tracked during
1371    // the late machine optimization passes, however if we implement
1372    // bool TargetRegisterInfo::requiresRegisterScavenging(
1373    //                                                const MachineFunction &MF)
1374    // and have it return true, liveness will be tracked correctly
1375    // by generic optimization passes.  We will also need to make sure that
1376    // all of our target-specific passes that run after regalloc and before
1377    // the CFGStructurizer track liveness and we will need to modify this pass
1378    // to correctly track liveness.
1379    //
1380    // After the above changes, the new CFG should look like this:
1381    //                        entry
1382    //                       /     |
1383    //           diamond_head       branch_from
1384    //                       \     /
1385    //                      new_block
1386    //                      /      |
1387    //         diamond_false        diamond_true
1388    //                      \      /
1389    //                        done
1390    //
1391    // Without this optimization, we are forced to duplicate the diamond_true
1392    // block and we will end up with a CFG like this:
1393    //
1394    //                        entry
1395    //                       /     |
1396    //           diamond_head       branch_from
1397    //             /      \                   |
1398    // diamond_false        diamond_true      diamond_true (duplicate)
1399    //             \      /                   |
1400    //               done --------------------|
1401    //
1402    // Duplicating diamond_true can be very costly especially if it has a
1403    // lot of instructions.
1404    return 0;
1405  }
1406
1407  int NumNewBlk = 0;
1408
1409  bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
1410
1411  //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
1412  MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF);
1413
1414  if (LandBlkHasOtherPred) {
1415    llvm_unreachable("Extra register needed to handle CFG");
1416    unsigned CmpResReg =
1417      HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1418    llvm_unreachable("Extra compare instruction needed to handle CFG");
1419    insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET,
1420        CmpResReg, DebugLoc());
1421  }
1422
1423  // XXX: We are running this after RA, so creating virtual registers will
1424  // cause an assertion failure in the PostRA scheduling pass.
1425  unsigned InitReg =
1426    HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1427  insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
1428      DebugLoc());
1429
1430  if (MigrateTrue) {
1431    migrateInstruction(TrueMBB, LandBlk, I);
1432    // need to uncondionally insert the assignment to ensure a path from its
1433    // predecessor rather than headBlk has valid value in initReg if
1434    // (initVal != 1).
1435    llvm_unreachable("Extra register needed to handle CFG");
1436  }
1437  insertInstrBefore(I, AMDGPU::ELSE);
1438
1439  if (MigrateFalse) {
1440    migrateInstruction(FalseMBB, LandBlk, I);
1441    // need to uncondionally insert the assignment to ensure a path from its
1442    // predecessor rather than headBlk has valid value in initReg if
1443    // (initVal != 0)
1444    llvm_unreachable("Extra register needed to handle CFG");
1445  }
1446
1447  if (LandBlkHasOtherPred) {
1448    // add endif
1449    insertInstrBefore(I, AMDGPU::ENDIF);
1450
1451    // put initReg = 2 to other predecessors of landBlk
1452    for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
1453         PE = LandBlk->pred_end(); PI != PE; ++PI) {
1454      MachineBasicBlock *MBB = *PI;
1455      if (MBB != TrueMBB && MBB != FalseMBB)
1456        llvm_unreachable("Extra register needed to handle CFG");
1457    }
1458  }
1459  DEBUG(
1460    dbgs() << "result from improveSimpleJumpintoIf: ";
1461    showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
1462  );
1463
1464  // update landBlk
1465  *LandMBBPtr = LandBlk;
1466
1467  return NumNewBlk;
1468}
1469
1470void AMDGPUCFGStructurizer::handleLoopcontBlock(MachineBasicBlock *ContingMBB,
1471    MachineLoop *ContingLoop, MachineBasicBlock *ContMBB,
1472    MachineLoop *ContLoop) {
1473  DEBUG(dbgs() << "loopcontPattern cont = BB" << ContingMBB->getNumber()
1474               << " header = BB" << ContMBB->getNumber() << "\n";
1475        dbgs() << "Trying to continue loop-depth = "
1476               << getLoopDepth(ContLoop)
1477               << " from loop-depth = " << getLoopDepth(ContingLoop) << "\n";);
1478  settleLoopcontBlock(ContingMBB, ContMBB);
1479}
1480
1481void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
1482    MachineBasicBlock *SrcMBB) {
1483  DEBUG(
1484    dbgs() << "serialPattern BB" << DstMBB->getNumber()
1485           << " <= BB" << SrcMBB->getNumber() << "\n";
1486  );
1487  DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end());
1488
1489  DstMBB->removeSuccessor(SrcMBB);
1490  cloneSuccessorList(DstMBB, SrcMBB);
1491
1492  removeSuccessor(SrcMBB);
1493  MLI->removeBlock(SrcMBB);
1494  retireBlock(SrcMBB);
1495}
1496
1497void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
1498    MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
1499    MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
1500  assert (TrueMBB);
1501  DEBUG(
1502    dbgs() << "ifPattern BB" << MBB->getNumber();
1503    dbgs() << "{  ";
1504    if (TrueMBB) {
1505      dbgs() << "BB" << TrueMBB->getNumber();
1506    }
1507    dbgs() << "  } else ";
1508    dbgs() << "{  ";
1509    if (FalseMBB) {
1510      dbgs() << "BB" << FalseMBB->getNumber();
1511    }
1512    dbgs() << "  }\n ";
1513    dbgs() << "landBlock: ";
1514    if (!LandMBB) {
1515      dbgs() << "NULL";
1516    } else {
1517      dbgs() << "BB" << LandMBB->getNumber();
1518    }
1519    dbgs() << "\n";
1520  );
1521
1522  int OldOpcode = BranchMI->getOpcode();
1523  DebugLoc BranchDL = BranchMI->getDebugLoc();
1524
1525//    transform to
1526//    if cond
1527//       trueBlk
1528//    else
1529//       falseBlk
1530//    endif
1531//    landBlk
1532
1533  MachineBasicBlock::iterator I = BranchMI;
1534  insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode),
1535      BranchDL);
1536
1537  if (TrueMBB) {
1538    MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end());
1539    MBB->removeSuccessor(TrueMBB);
1540    if (LandMBB && TrueMBB->succ_size()!=0)
1541      TrueMBB->removeSuccessor(LandMBB);
1542    retireBlock(TrueMBB);
1543    MLI->removeBlock(TrueMBB);
1544  }
1545
1546  if (FalseMBB) {
1547    insertInstrBefore(I, AMDGPU::ELSE);
1548    MBB->splice(I, FalseMBB, FalseMBB->begin(),
1549                   FalseMBB->end());
1550    MBB->removeSuccessor(FalseMBB);
1551    if (LandMBB && FalseMBB->succ_size() != 0)
1552      FalseMBB->removeSuccessor(LandMBB);
1553    retireBlock(FalseMBB);
1554    MLI->removeBlock(FalseMBB);
1555  }
1556  insertInstrBefore(I, AMDGPU::ENDIF);
1557
1558  BranchMI->eraseFromParent();
1559
1560  if (LandMBB && TrueMBB && FalseMBB)
1561    MBB->addSuccessor(LandMBB);
1562
1563}
1564
1565void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
1566    MachineBasicBlock *LandMBB) {
1567  DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
1568               << " land = BB" << LandMBB->getNumber() << "\n";);
1569
1570  insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc());
1571  insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc());
1572  DstBlk->addSuccessor(LandMBB);
1573  DstBlk->removeSuccessor(DstBlk);
1574}
1575
1576
1577void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
1578    MachineBasicBlock *LandMBB) {
1579  DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber()
1580               << " land = BB" << LandMBB->getNumber() << "\n";);
1581  MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB);
1582  assert(BranchMI && isCondBranch(BranchMI));
1583  DebugLoc DL = BranchMI->getDebugLoc();
1584  MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI);
1585  MachineBasicBlock::iterator I = BranchMI;
1586  if (TrueBranch != LandMBB)
1587    reversePredicateSetter(I);
1588  insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL);
1589  insertInstrBefore(I, AMDGPU::BREAK);
1590  insertInstrBefore(I, AMDGPU::ENDIF);
1591  //now branchInst can be erase safely
1592  BranchMI->eraseFromParent();
1593  //now take care of successors, retire blocks
1594  ExitingMBB->removeSuccessor(LandMBB);
1595}
1596
1597void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
1598    MachineBasicBlock *ContMBB) {
1599  DEBUG(dbgs() << "settleLoopcontBlock conting = BB"
1600               << ContingMBB->getNumber()
1601               << ", cont = BB" << ContMBB->getNumber() << "\n";);
1602
1603  MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB);
1604  if (MI) {
1605    assert(isCondBranch(MI));
1606    MachineBasicBlock::iterator I = MI;
1607    MachineBasicBlock *TrueBranch = getTrueBranch(MI);
1608    int OldOpcode = MI->getOpcode();
1609    DebugLoc DL = MI->getDebugLoc();
1610
1611    bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI);
1612
1613    if (UseContinueLogical == false) {
1614      int BranchOpcode =
1615          TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
1616          getBranchZeroOpcode(OldOpcode);
1617      insertCondBranchBefore(I, BranchOpcode, DL);
1618      // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1619      insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL);
1620      insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL);
1621    } else {
1622      int BranchOpcode =
1623          TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
1624          getContinueZeroOpcode(OldOpcode);
1625      insertCondBranchBefore(I, BranchOpcode, DL);
1626    }
1627
1628    MI->eraseFromParent();
1629  } else {
1630    // if we've arrived here then we've already erased the branch instruction
1631    // travel back up the basic block to see the last reference of our debug
1632    // location we've just inserted that reference here so it should be
1633    // representative insertEnd to ensure phi-moves, if exist, go before the
1634    // continue-instr.
1635    insertInstrEnd(ContingMBB, AMDGPU::CONTINUE,
1636        getLastDebugLocInBB(ContingMBB));
1637  }
1638}
1639
1640int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
1641    MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) {
1642  int Cloned = 0;
1643  assert(PreMBB->isSuccessor(SrcMBB));
1644  while (SrcMBB && SrcMBB != DstMBB) {
1645    assert(SrcMBB->succ_size() == 1);
1646    if (SrcMBB->pred_size() > 1) {
1647      SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB);
1648      ++Cloned;
1649    }
1650
1651    PreMBB = SrcMBB;
1652    SrcMBB = *SrcMBB->succ_begin();
1653  }
1654
1655  return Cloned;
1656}
1657
1658MachineBasicBlock *
1659AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
1660    MachineBasicBlock *PredMBB) {
1661  assert(PredMBB->isSuccessor(MBB) &&
1662         "succBlk is not a prececessor of curBlk");
1663
1664  MachineBasicBlock *CloneMBB = clone(MBB);  //clone instructions
1665  replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
1666  //srcBlk, oldBlk, newBlk
1667
1668  PredMBB->removeSuccessor(MBB);
1669  PredMBB->addSuccessor(CloneMBB);
1670
1671  // add all successor to cloneBlk
1672  cloneSuccessorList(CloneMBB, MBB);
1673
1674  numClonedInstr += MBB->size();
1675
1676  DEBUG(
1677    dbgs() << "Cloned block: " << "BB"
1678           << MBB->getNumber() << "size " << MBB->size() << "\n";
1679  );
1680
1681  SHOWNEWBLK(CloneMBB, "result of Cloned block: ");
1682
1683  return CloneMBB;
1684}
1685
1686void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
1687    MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) {
1688  MachineBasicBlock::iterator SpliceEnd;
1689  //look for the input branchinstr, not the AMDGPU branchinstr
1690  MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
1691  if (!BranchMI) {
1692    DEBUG(
1693      dbgs() << "migrateInstruction don't see branch instr\n" ;
1694    );
1695    SpliceEnd = SrcMBB->end();
1696  } else {
1697    DEBUG(
1698      dbgs() << "migrateInstruction see branch instr\n" ;
1699      BranchMI->dump();
1700    );
1701    SpliceEnd = BranchMI;
1702  }
1703  DEBUG(
1704    dbgs() << "migrateInstruction before splice dstSize = " << DstMBB->size()
1705      << "srcSize = " << SrcMBB->size() << "\n";
1706  );
1707
1708  //splice insert before insertPos
1709  DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd);
1710
1711  DEBUG(
1712    dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size()
1713      << "srcSize = " << SrcMBB->size() << "\n";
1714  );
1715}
1716
1717MachineBasicBlock *
1718AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
1719  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
1720  MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch();
1721  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1722
1723  if (!LoopHeader || !LoopLatch)
1724    return nullptr;
1725  MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch);
1726  // Is LoopRep an infinite loop ?
1727  if (!BranchMI || !isUncondBranch(BranchMI))
1728    return nullptr;
1729
1730  MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
1731  FuncRep->push_back(DummyExitBlk);  //insert to function
1732  SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
1733  DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";);
1734  MachineBasicBlock::iterator I = BranchMI;
1735  unsigned ImmReg = FuncRep->getRegInfo().createVirtualRegister(I32RC);
1736  llvm_unreachable("Extra register needed to handle CFG");
1737  MachineInstr *NewMI = insertInstrBefore(I, AMDGPU::BRANCH_COND_i32);
1738  MachineInstrBuilder MIB(*FuncRep, NewMI);
1739  MIB.addMBB(LoopHeader);
1740  MIB.addReg(ImmReg, false);
1741  SHOWNEWINSTR(NewMI);
1742  BranchMI->eraseFromParent();
1743  LoopLatch->addSuccessor(DummyExitBlk);
1744
1745  return DummyExitBlk;
1746}
1747
1748void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
1749  MachineInstr *BranchMI;
1750
1751  // I saw two unconditional branch in one basic block in example
1752  // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
1753  while ((BranchMI = getLoopendBlockBranchInstr(MBB))
1754          && isUncondBranch(BranchMI)) {
1755    DEBUG(dbgs() << "Removing uncond branch instr"; BranchMI->dump(););
1756    BranchMI->eraseFromParent();
1757  }
1758}
1759
1760void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
1761    MachineBasicBlock *MBB) {
1762  if (MBB->succ_size() != 2)
1763    return;
1764  MachineBasicBlock *MBB1 = *MBB->succ_begin();
1765  MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin());
1766  if (MBB1 != MBB2)
1767    return;
1768
1769  MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
1770  assert(BranchMI && isCondBranch(BranchMI));
1771  DEBUG(dbgs() << "Removing unneeded cond branch instr"; BranchMI->dump(););
1772  BranchMI->eraseFromParent();
1773  SHOWNEWBLK(MBB1, "Removing redundant successor");
1774  MBB->removeSuccessor(MBB1);
1775}
1776
1777void AMDGPUCFGStructurizer::addDummyExitBlock(
1778    SmallVectorImpl<MachineBasicBlock*> &RetMBB) {
1779  MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
1780  FuncRep->push_back(DummyExitBlk);  //insert to function
1781  insertInstrEnd(DummyExitBlk, AMDGPU::RETURN);
1782
1783  for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(),
1784       E = RetMBB.end(); It != E; ++It) {
1785    MachineBasicBlock *MBB = *It;
1786    MachineInstr *MI = getReturnInstr(MBB);
1787    if (MI)
1788      MI->eraseFromParent();
1789    MBB->addSuccessor(DummyExitBlk);
1790    DEBUG(
1791      dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber()
1792             << " successors\n";
1793    );
1794  }
1795  SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: ");
1796}
1797
1798void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) {
1799  while (MBB->succ_size())
1800    MBB->removeSuccessor(*MBB->succ_begin());
1801}
1802
1803void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB,
1804    int SccNum) {
1805  BlockInformation *&srcBlkInfo = BlockInfoMap[MBB];
1806  if (!srcBlkInfo)
1807    srcBlkInfo = new BlockInformation();
1808  srcBlkInfo->SccNum = SccNum;
1809}
1810
1811void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
1812  DEBUG(
1813        dbgs() << "Retiring BB" << MBB->getNumber() << "\n";
1814  );
1815
1816  BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB];
1817
1818  if (!SrcBlkInfo)
1819    SrcBlkInfo = new BlockInformation();
1820
1821  SrcBlkInfo->IsRetired = true;
1822  assert(MBB->succ_size() == 0 && MBB->pred_size() == 0
1823         && "can't retire block yet");
1824}
1825
1826void AMDGPUCFGStructurizer::setLoopLandBlock(MachineLoop *loopRep,
1827    MachineBasicBlock *MBB) {
1828  MachineBasicBlock *&TheEntry = LLInfoMap[loopRep];
1829  if (!MBB) {
1830    MBB = FuncRep->CreateMachineBasicBlock();
1831    FuncRep->push_back(MBB);  //insert to function
1832    SHOWNEWBLK(MBB, "DummyLandingBlock for loop without break: ");
1833  }
1834  TheEntry = MBB;
1835  DEBUG(
1836    dbgs() << "setLoopLandBlock loop-header = BB"
1837           << loopRep->getHeader()->getNumber()
1838           << "  landing-block = BB" << MBB->getNumber() << "\n";
1839  );
1840}
1841
1842MachineBasicBlock *
1843AMDGPUCFGStructurizer::findNearestCommonPostDom(MachineBasicBlock *MBB1,
1844    MachineBasicBlock *MBB2) {
1845
1846  if (PDT->dominates(MBB1, MBB2))
1847    return MBB1;
1848  if (PDT->dominates(MBB2, MBB1))
1849    return MBB2;
1850
1851  MachineDomTreeNode *Node1 = PDT->getNode(MBB1);
1852  MachineDomTreeNode *Node2 = PDT->getNode(MBB2);
1853
1854  // Handle newly cloned node.
1855  if (!Node1 && MBB1->succ_size() == 1)
1856    return findNearestCommonPostDom(*MBB1->succ_begin(), MBB2);
1857  if (!Node2 && MBB2->succ_size() == 1)
1858    return findNearestCommonPostDom(MBB1, *MBB2->succ_begin());
1859
1860  if (!Node1 || !Node2)
1861    return nullptr;
1862
1863  Node1 = Node1->getIDom();
1864  while (Node1) {
1865    if (PDT->dominates(Node1, Node2))
1866      return Node1->getBlock();
1867    Node1 = Node1->getIDom();
1868  }
1869
1870  return nullptr;
1871}
1872
1873MachineBasicBlock *
1874AMDGPUCFGStructurizer::findNearestCommonPostDom(
1875    std::set<MachineBasicBlock *> &MBBs) {
1876  MachineBasicBlock *CommonDom;
1877  std::set<MachineBasicBlock *>::const_iterator It = MBBs.begin();
1878  std::set<MachineBasicBlock *>::const_iterator E = MBBs.end();
1879  for (CommonDom = *It; It != E && CommonDom; ++It) {
1880    MachineBasicBlock *MBB = *It;
1881    if (MBB != CommonDom)
1882      CommonDom = findNearestCommonPostDom(MBB, CommonDom);
1883  }
1884
1885  DEBUG(
1886    dbgs() << "Common post dominator for exit blocks is ";
1887    if (CommonDom)
1888          dbgs() << "BB" << CommonDom->getNumber() << "\n";
1889    else
1890      dbgs() << "NULL\n";
1891  );
1892
1893  return CommonDom;
1894}
1895
1896char AMDGPUCFGStructurizer::ID = 0;
1897
1898} // end anonymous namespace
1899
1900
1901INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer",
1902                      "AMDGPU CFG Structurizer", false, false)
1903INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
1904INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
1905INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
1906INITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer",
1907                      "AMDGPU CFG Structurizer", false, false)
1908
1909FunctionPass *llvm::createAMDGPUCFGStructurizerPass() {
1910  return new AMDGPUCFGStructurizer();
1911}
1912