AMDILCFGStructurizer.cpp revision 0eca5fd919b0a31ea926b5f5072e5e56f7a55269
1//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9
10#define DEBUGME 0
11#define DEBUG_TYPE "structcfg"
12
13#include "AMDGPUInstrInfo.h"
14#include "AMDIL.h"
15#include "AMDILUtilityFunctions.h"
16#include "llvm/ADT/SCCIterator.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/Analysis/DominatorInternals.h"
20#include "llvm/Analysis/Dominators.h"
21#include "llvm/CodeGen/MachineDominators.h"
22#include "llvm/CodeGen/MachineDominators.h"
23#include "llvm/CodeGen/MachineFunction.h"
24#include "llvm/CodeGen/MachineFunctionAnalysis.h"
25#include "llvm/CodeGen/MachineFunctionPass.h"
26#include "llvm/CodeGen/MachineFunctionPass.h"
27#include "llvm/CodeGen/MachineInstrBuilder.h"
28#include "llvm/CodeGen/MachineJumpTableInfo.h"
29#include "llvm/CodeGen/MachineLoopInfo.h"
30#include "llvm/CodeGen/MachineRegisterInfo.h"
31#include "llvm/Target/TargetInstrInfo.h"
32
33#define FirstNonDebugInstr(A) A->begin()
34using namespace llvm;
35
36// TODO: move-begin.
37
38//===----------------------------------------------------------------------===//
39//
40// Statistics for CFGStructurizer.
41//
42//===----------------------------------------------------------------------===//
43
44STATISTIC(numSerialPatternMatch,    "CFGStructurizer number of serial pattern "
45    "matched");
46STATISTIC(numIfPatternMatch,        "CFGStructurizer number of if pattern "
47    "matched");
48STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
49    "pattern matched");
50STATISTIC(numLoopcontPatternMatch,  "CFGStructurizer number of loop-continue "
51    "pattern matched");
52STATISTIC(numLoopPatternMatch,      "CFGStructurizer number of loop pattern "
53    "matched");
54STATISTIC(numClonedBlock,           "CFGStructurizer cloned blocks");
55STATISTIC(numClonedInstr,           "CFGStructurizer cloned instructions");
56
57//===----------------------------------------------------------------------===//
58//
59// Miscellaneous utility for CFGStructurizer.
60//
61//===----------------------------------------------------------------------===//
62namespace llvmCFGStruct
63{
64#define SHOWNEWINSTR(i) \
65  if (DEBUGME) errs() << "New instr: " << *i << "\n"
66
67#define SHOWNEWBLK(b, msg) \
68if (DEBUGME) { \
69  errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
70  errs() << "\n"; \
71}
72
73#define SHOWBLK_DETAIL(b, msg) \
74if (DEBUGME) { \
75  if (b) { \
76  errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
77  b->print(errs()); \
78  errs() << "\n"; \
79  } \
80}
81
82#define INVALIDSCCNUM -1
83#define INVALIDREGNUM 0
84
85template<class LoopinfoT>
86void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
87  for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
88       iterEnd = LoopInfo.end();
89       iter != iterEnd; ++iter) {
90    (*iter)->print(OS, 0);
91  }
92}
93
94template<class NodeT>
95void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
96  size_t sz = Src.size();
97  for (size_t i = 0; i < sz/2; ++i) {
98    NodeT *t = Src[i];
99    Src[i] = Src[sz - i - 1];
100    Src[sz - i - 1] = t;
101  }
102}
103
104} //end namespace llvmCFGStruct
105
106
107//===----------------------------------------------------------------------===//
108//
109// MachinePostDominatorTree
110//
111//===----------------------------------------------------------------------===//
112
113namespace llvm {
114
115/// PostDominatorTree Class - Concrete subclass of DominatorTree that is used
116/// to compute the a post-dominator tree.
117///
118struct MachinePostDominatorTree : public MachineFunctionPass {
119  static char ID; // Pass identification, replacement for typeid
120  DominatorTreeBase<MachineBasicBlock> *DT;
121  MachinePostDominatorTree() : MachineFunctionPass(ID)
122  {
123    DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate
124    // postdominator
125  }
126
127  ~MachinePostDominatorTree();
128
129  virtual bool runOnMachineFunction(MachineFunction &MF);
130
131  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
132    AU.setPreservesAll();
133    MachineFunctionPass::getAnalysisUsage(AU);
134  }
135
136  inline const std::vector<MachineBasicBlock *> &getRoots() const {
137    return DT->getRoots();
138  }
139
140  inline MachineDomTreeNode *getRootNode() const {
141    return DT->getRootNode();
142  }
143
144  inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const {
145    return DT->getNode(BB);
146  }
147
148  inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const {
149    return DT->getNode(BB);
150  }
151
152  inline bool dominates(MachineDomTreeNode *A, MachineDomTreeNode *B) const {
153    return DT->dominates(A, B);
154  }
155
156  inline bool dominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
157    return DT->dominates(A, B);
158  }
159
160  inline bool
161  properlyDominates(const MachineDomTreeNode *A, MachineDomTreeNode *B) const {
162    return DT->properlyDominates(A, B);
163  }
164
165  inline bool
166  properlyDominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
167    return DT->properlyDominates(A, B);
168  }
169
170  inline MachineBasicBlock *
171  findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B) {
172    return DT->findNearestCommonDominator(A, B);
173  }
174
175  virtual void print(llvm::raw_ostream &OS, const Module *M = 0) const {
176    DT->print(OS);
177  }
178};
179} //end of namespace llvm
180
181char MachinePostDominatorTree::ID = 0;
182static RegisterPass<MachinePostDominatorTree>
183machinePostDominatorTreePass("machinepostdomtree",
184                             "MachinePostDominator Tree Construction",
185                             true, true);
186
187//const PassInfo *const llvm::MachinePostDominatorsID
188//= &machinePostDominatorTreePass;
189
190bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) {
191  DT->recalculate(F);
192  //DEBUG(DT->dump());
193  return false;
194}
195
196MachinePostDominatorTree::~MachinePostDominatorTree() {
197  delete DT;
198}
199
200//===----------------------------------------------------------------------===//
201//
202// supporting data structure for CFGStructurizer
203//
204//===----------------------------------------------------------------------===//
205
206namespace llvmCFGStruct
207{
208template<class PassT>
209struct CFGStructTraits {
210};
211
212template <class InstrT>
213class BlockInformation {
214public:
215  bool isRetired;
216  int  sccNum;
217  //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
218  //Instructions defining the corresponding successor.
219  BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
220};
221
222template <class BlockT, class InstrT, class RegiT>
223class LandInformation {
224public:
225  BlockT *landBlk;
226  std::set<RegiT> breakInitRegs;  //Registers that need to "reg = 0", before
227                                  //WHILELOOP(thisloop) init before entering
228                                  //thisloop.
229  std::set<RegiT> contInitRegs;   //Registers that need to "reg = 0", after
230                                  //WHILELOOP(thisloop) init after entering
231                                  //thisloop.
232  std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
233                                     //land block, branch cond on this reg.
234  std::set<RegiT> breakOnRegs;       //registers that need to "if (reg) break
235                                     //endif" after ENDLOOP(thisloop) break
236                                     //outerLoopOf(thisLoop).
237  std::set<RegiT> contOnRegs;       //registers that need to "if (reg) continue
238                                    //endif" after ENDLOOP(thisloop) continue on
239                                    //outerLoopOf(thisLoop).
240  LandInformation() : landBlk(NULL) {}
241};
242
243} //end of namespace llvmCFGStruct
244
245//===----------------------------------------------------------------------===//
246//
247// CFGStructurizer
248//
249//===----------------------------------------------------------------------===//
250
251namespace llvmCFGStruct
252{
253// bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
254template<class PassT>
255class  CFGStructurizer
256{
257public:
258  typedef enum {
259    Not_SinglePath = 0,
260    SinglePath_InPath = 1,
261    SinglePath_NotInPath = 2
262  } PathToKind;
263
264public:
265  typedef typename PassT::InstructionType         InstrT;
266  typedef typename PassT::FunctionType            FuncT;
267  typedef typename PassT::DominatortreeType       DomTreeT;
268  typedef typename PassT::PostDominatortreeType   PostDomTreeT;
269  typedef typename PassT::DomTreeNodeType         DomTreeNodeT;
270  typedef typename PassT::LoopinfoType            LoopInfoT;
271
272  typedef GraphTraits<FuncT *>                    FuncGTraits;
273  //typedef FuncGTraits::nodes_iterator BlockIterator;
274  typedef typename FuncT::iterator                BlockIterator;
275
276  typedef typename FuncGTraits::NodeType          BlockT;
277  typedef GraphTraits<BlockT *>                   BlockGTraits;
278  typedef GraphTraits<Inverse<BlockT *> >         InvBlockGTraits;
279  //typedef BlockGTraits::succ_iterator InstructionIterator;
280  typedef typename BlockT::iterator               InstrIterator;
281
282  typedef CFGStructTraits<PassT>                  CFGTraits;
283  typedef BlockInformation<InstrT>                BlockInfo;
284  typedef std::map<BlockT *, BlockInfo *>         BlockInfoMap;
285
286  typedef int                                     RegiT;
287  typedef typename PassT::LoopType                LoopT;
288  typedef LandInformation<BlockT, InstrT, RegiT>  LoopLandInfo;
289        typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
290        //landing info for loop break
291  typedef SmallVector<BlockT *, 32>               BlockTSmallerVector;
292
293public:
294  CFGStructurizer();
295  ~CFGStructurizer();
296
297  /// Perform the CFG structurization
298  bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
299
300  /// Perform the CFG preparation
301  bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
302
303private:
304  void reversePredicateSetter(typename BlockT::iterator);
305  void   orderBlocks();
306  void   printOrderedBlocks(llvm::raw_ostream &OS);
307  int patternMatch(BlockT *CurBlock);
308  int patternMatchGroup(BlockT *CurBlock);
309
310  int serialPatternMatch(BlockT *CurBlock);
311  int ifPatternMatch(BlockT *CurBlock);
312  int switchPatternMatch(BlockT *CurBlock);
313  int loopendPatternMatch(BlockT *CurBlock);
314  int loopPatternMatch(BlockT *CurBlock);
315
316  int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
317  int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
318  //int loopWithoutBreak(BlockT *);
319
320  void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
321                        BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
322  void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
323                           BlockT *ContBlock, LoopT *contLoop);
324  bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
325  int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
326                       BlockT *FalseBlock);
327  int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
328                          BlockT *FalseBlock);
329  int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
330                              BlockT *FalseBlock, BlockT **LandBlockPtr);
331  void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
332                                   BlockT *FalseBlock, BlockT *LandBlock,
333                                   bool Detail = false);
334  PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
335                          bool AllowSideEntry = true);
336  BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
337                        bool AllowSideEntry = true);
338  int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
339  void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
340
341  void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
342                            BlockT *TrueBlock, BlockT *FalseBlock,
343                            BlockT *LandBlock);
344  void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
345  void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
346                           BlockT *ExitLandBlock, RegiT SetReg);
347  void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
348                           RegiT SetReg);
349  BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
350                                std::set<BlockT*> &ExitBlockSet,
351                                BlockT *ExitLandBlk);
352  BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
353                                BlockTSmallerVector &ExitingBlocks,
354                                BlockTSmallerVector &ExitBlocks);
355  BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
356  void removeUnconditionalBranch(BlockT *SrcBlock);
357  void removeRedundantConditionalBranch(BlockT *SrcBlock);
358  void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
359
360  void removeSuccessor(BlockT *SrcBlock);
361  BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
362  BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
363
364  void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
365                          InstrIterator InsertPos);
366
367  void recordSccnum(BlockT *SrcBlock, int SCCNum);
368  int getSCCNum(BlockT *srcBlk);
369
370  void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
371  bool isRetiredBlock(BlockT *SrcBlock);
372  bool isActiveLoophead(BlockT *CurBlock);
373  bool needMigrateBlock(BlockT *Block);
374
375  BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
376                              BlockTSmallerVector &exitBlocks,
377                              std::set<BlockT*> &ExitBlockSet);
378  void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
379  BlockT *getLoopLandBlock(LoopT *LoopRep);
380  LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
381
382  void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
383  void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
384  void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
385  void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
386  void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
387
388  bool hasBackEdge(BlockT *curBlock);
389  unsigned getLoopDepth  (LoopT *LoopRep);
390  int countActiveBlock(
391    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
392    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
393    BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
394  BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
395
396private:
397  DomTreeT *domTree;
398  PostDomTreeT *postDomTree;
399  LoopInfoT *loopInfo;
400  PassT *passRep;
401  FuncT *funcRep;
402
403  BlockInfoMap blockInfoMap;
404  LoopLandInfoMap loopLandInfoMap;
405  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
406  const AMDGPURegisterInfo *TRI;
407
408};  //template class CFGStructurizer
409
410template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
411  : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
412}
413
414template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
415  for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
416       E = blockInfoMap.end(); I != E; ++I) {
417    delete I->second;
418  }
419}
420
421template<class PassT>
422bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
423                                     const AMDGPURegisterInfo * tri) {
424  passRep = &pass;
425  funcRep = &func;
426  TRI = tri;
427
428  bool changed = false;
429  //func.RenumberBlocks();
430
431  //to do, if not reducible flow graph, make it so ???
432
433  if (DEBUGME) {
434        errs() << "AMDGPUCFGStructurizer::prepare\n";
435    //func.viewCFG();
436    //func.viewCFGOnly();
437    //func.dump();
438  }
439
440  //FIXME: gcc complains on this.
441  //domTree = &pass.getAnalysis<DomTreeT>();
442      //domTree = CFGTraits::getDominatorTree(pass);
443      //if (DEBUGME) {
444      //    domTree->print(errs());
445    //}
446
447  //FIXME: gcc complains on this.
448  //domTree = &pass.getAnalysis<DomTreeT>();
449      //postDomTree = CFGTraits::getPostDominatorTree(pass);
450      //if (DEBUGME) {
451      //   postDomTree->print(errs());
452    //}
453
454  //FIXME: gcc complains on this.
455  //loopInfo = &pass.getAnalysis<LoopInfoT>();
456  loopInfo = CFGTraits::getLoopInfo(pass);
457  if (DEBUGME) {
458    errs() << "LoopInfo:\n";
459    PrintLoopinfo(*loopInfo, errs());
460  }
461
462  orderBlocks();
463  if (DEBUGME) {
464    errs() << "Ordered blocks:\n";
465    printOrderedBlocks(errs());
466  }
467
468  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
469
470  for (typename LoopInfoT::iterator iter = loopInfo->begin(),
471       iterEnd = loopInfo->end();
472       iter != iterEnd; ++iter) {
473    LoopT* loopRep = (*iter);
474    BlockTSmallerVector exitingBlks;
475    loopRep->getExitingBlocks(exitingBlks);
476
477    if (exitingBlks.size() == 0) {
478      BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
479      if (dummyExitBlk != NULL)
480        retBlks.push_back(dummyExitBlk);
481    }
482  }
483
484  // Remove unconditional branch instr.
485  // Add dummy exit block iff there are multiple returns.
486
487  for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
488       iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
489       iterBlk != iterEndBlk;
490       ++iterBlk) {
491    BlockT *curBlk = *iterBlk;
492    removeUnconditionalBranch(curBlk);
493    removeRedundantConditionalBranch(curBlk);
494    if (CFGTraits::isReturnBlock(curBlk)) {
495      retBlks.push_back(curBlk);
496    }
497    assert(curBlk->succ_size() <= 2);
498    //assert(curBlk->size() > 0);
499    //removeEmptyBlock(curBlk) ??
500  } //for
501
502  if (retBlks.size() >= 2) {
503    addDummyExitBlock(retBlks);
504    changed = true;
505  }
506
507  return changed;
508} //CFGStructurizer::prepare
509
510template<class PassT>
511bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
512    const AMDGPURegisterInfo * tri) {
513  passRep = &pass;
514  funcRep = &func;
515  TRI = tri;
516
517  //func.RenumberBlocks();
518
519  //Assume reducible CFG...
520  if (DEBUGME) {
521    errs() << "AMDGPUCFGStructurizer::run\n";
522    //errs() << func.getFunction()->getNameStr() << "\n";
523    func.viewCFG();
524    //func.viewCFGOnly();
525    //func.dump();
526  }
527
528#if 1
529  //FIXME: gcc complains on this.
530  //domTree = &pass.getAnalysis<DomTreeT>();
531  domTree = CFGTraits::getDominatorTree(pass);
532  if (DEBUGME) {
533    domTree->print(errs(), (const llvm::Module*)0);
534  }
535#endif
536
537  //FIXME: gcc complains on this.
538  //domTree = &pass.getAnalysis<DomTreeT>();
539  postDomTree = CFGTraits::getPostDominatorTree(pass);
540  if (DEBUGME) {
541    postDomTree->print(errs());
542  }
543
544  //FIXME: gcc complains on this.
545  //loopInfo = &pass.getAnalysis<LoopInfoT>();
546  loopInfo = CFGTraits::getLoopInfo(pass);
547  if (DEBUGME) {
548    errs() << "LoopInfo:\n";
549    PrintLoopinfo(*loopInfo, errs());
550  }
551
552  orderBlocks();
553//#define STRESSTEST
554#ifdef STRESSTEST
555  //Use the worse block ordering to test the algorithm.
556  ReverseVector(orderedBlks);
557#endif
558
559  if (DEBUGME) {
560    errs() << "Ordered blocks:\n";
561    printOrderedBlocks(errs());
562  }
563  int numIter = 0;
564  bool finish = false;
565  BlockT *curBlk;
566  bool makeProgress = false;
567  int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
568                                        orderedBlks.end());
569
570  do {
571    ++numIter;
572    if (DEBUGME) {
573      errs() << "numIter = " << numIter
574             << ", numRemaintedBlk = " << numRemainedBlk << "\n";
575    }
576
577    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
578      iterBlk = orderedBlks.begin();
579    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
580      iterBlkEnd = orderedBlks.end();
581
582    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
583      sccBeginIter = iterBlk;
584    BlockT *sccBeginBlk = NULL;
585    int sccNumBlk = 0;  // The number of active blocks, init to a
586                        // maximum possible number.
587    int sccNumIter;     // Number of iteration in this SCC.
588
589    while (iterBlk != iterBlkEnd) {
590      curBlk = *iterBlk;
591
592      if (sccBeginBlk == NULL) {
593        sccBeginIter = iterBlk;
594        sccBeginBlk = curBlk;
595        sccNumIter = 0;
596        sccNumBlk = numRemainedBlk; // Init to maximum possible number.
597        if (DEBUGME) {
598              errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
599              errs() << "\n";
600        }
601      }
602
603      if (!isRetiredBlock(curBlk)) {
604        patternMatch(curBlk);
605      }
606
607      ++iterBlk;
608
609      bool contNextScc = true;
610      if (iterBlk == iterBlkEnd
611          || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
612        // Just finish one scc.
613        ++sccNumIter;
614        int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
615        if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
616          if (DEBUGME) {
617            errs() << "Can't reduce SCC " << getSCCNum(curBlk)
618                   << ", sccNumIter = " << sccNumIter;
619            errs() << "doesn't make any progress\n";
620          }
621          contNextScc = true;
622        } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
623          sccNumBlk = sccRemainedNumBlk;
624          iterBlk = sccBeginIter;
625          contNextScc = false;
626          if (DEBUGME) {
627            errs() << "repeat processing SCC" << getSCCNum(curBlk)
628                   << "sccNumIter = " << sccNumIter << "\n";
629            func.viewCFG();
630            //func.viewCFGOnly();
631          }
632        } else {
633          // Finish the current scc.
634          contNextScc = true;
635        }
636      } else {
637        // Continue on next component in the current scc.
638        contNextScc = false;
639      }
640
641      if (contNextScc) {
642        sccBeginBlk = NULL;
643      }
644    } //while, "one iteration" over the function.
645
646    BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
647    if (entryBlk->succ_size() == 0) {
648      finish = true;
649      if (DEBUGME) {
650        errs() << "Reduce to one block\n";
651      }
652    } else {
653      int newnumRemainedBlk
654        = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
655      // consider cloned blocks ??
656      if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
657        makeProgress = true;
658        numRemainedBlk = newnumRemainedBlk;
659      } else {
660        makeProgress = false;
661        if (DEBUGME) {
662          errs() << "No progress\n";
663        }
664      }
665    }
666  } while (!finish && makeProgress);
667
668  // Misc wrap up to maintain the consistency of the Function representation.
669  CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
670
671  // Detach retired Block, release memory.
672  for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
673       iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
674    if ((*iterMap).second && (*iterMap).second->isRetired) {
675      assert(((*iterMap).first)->getNumber() != -1);
676      if (DEBUGME) {
677        errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
678      }
679      (*iterMap).first->eraseFromParent();  //Remove from the parent Function.
680    }
681    delete (*iterMap).second;
682  }
683  blockInfoMap.clear();
684
685  // clear loopLandInfoMap
686  for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
687       iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
688    delete (*iterMap).second;
689  }
690  loopLandInfoMap.clear();
691
692  if (DEBUGME) {
693    func.viewCFG();
694    //func.dump();
695  }
696
697  if (!finish) {
698    assert(!"IRREDUCIBL_CF");
699  }
700
701  return true;
702} //CFGStructurizer::run
703
704/// Print the ordered Blocks.
705///
706template<class PassT>
707void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
708  size_t i = 0;
709  for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
710      iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
711       iterBlk != iterBlkEnd;
712       ++iterBlk, ++i) {
713    os << "BB" << (*iterBlk)->getNumber();
714    os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
715    if (i != 0 && i % 10 == 0) {
716      os << "\n";
717    } else {
718      os << " ";
719    }
720  }
721} //printOrderedBlocks
722
723/// Compute the reversed DFS post order of Blocks
724///
725template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
726  int sccNum = 0;
727  BlockT *bb;
728  for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
729       sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
730    std::vector<BlockT *> &sccNext = *sccIter;
731    for (typename std::vector<BlockT *>::const_iterator
732         blockIter = sccNext.begin(), blockEnd = sccNext.end();
733         blockIter != blockEnd; ++blockIter) {
734      bb = *blockIter;
735      orderedBlks.push_back(bb);
736      recordSccnum(bb, sccNum);
737    }
738  }
739
740  //walk through all the block in func to check for unreachable
741  for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
742       blockEnd1 = FuncGTraits::nodes_end(funcRep);
743       blockIter1 != blockEnd1; ++blockIter1) {
744    BlockT *bb = &(*blockIter1);
745    sccNum = getSCCNum(bb);
746    if (sccNum == INVALIDSCCNUM) {
747      errs() << "unreachable block BB" << bb->getNumber() << "\n";
748    }
749  } //end of for
750} //orderBlocks
751
752template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
753  int numMatch = 0;
754  int curMatch;
755
756  if (DEBUGME) {
757        errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
758  }
759
760  while ((curMatch = patternMatchGroup(curBlk)) > 0) {
761    numMatch += curMatch;
762  }
763
764  if (DEBUGME) {
765        errs() << "End patternMatch BB" << curBlk->getNumber()
766      << ", numMatch = " << numMatch << "\n";
767  }
768
769  return numMatch;
770} //patternMatch
771
772template<class PassT>
773int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
774  int numMatch = 0;
775  numMatch += serialPatternMatch(curBlk);
776  numMatch += ifPatternMatch(curBlk);
777  //numMatch += switchPatternMatch(curBlk);
778  numMatch += loopendPatternMatch(curBlk);
779  numMatch += loopPatternMatch(curBlk);
780  return numMatch;
781}//patternMatchGroup
782
783template<class PassT>
784int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
785  if (curBlk->succ_size() != 1) {
786    return 0;
787  }
788
789  BlockT *childBlk = *curBlk->succ_begin();
790  if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
791    return 0;
792  }
793
794  mergeSerialBlock(curBlk, childBlk);
795  ++numSerialPatternMatch;
796  return 1;
797} //serialPatternMatch
798
799template<class PassT>
800int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
801  //two edges
802  if (curBlk->succ_size() != 2) {
803    return 0;
804  }
805
806  if (hasBackEdge(curBlk)) {
807    return 0;
808  }
809
810  InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
811  if (branchInstr == NULL) {
812    return 0;
813  }
814
815  assert(CFGTraits::isCondBranch(branchInstr));
816
817  BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
818  BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
819  BlockT *landBlk;
820  int cloned = 0;
821
822  // TODO: Simplify
823  if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
824    && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
825    landBlk = *trueBlk->succ_begin();
826  } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
827    landBlk = NULL;
828  } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
829    landBlk = falseBlk;
830    falseBlk = NULL;
831  } else if (falseBlk->succ_size() == 1
832             && *falseBlk->succ_begin() == trueBlk) {
833    landBlk = trueBlk;
834    trueBlk = NULL;
835  } else if (falseBlk->succ_size() == 1
836             && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
837    landBlk = *falseBlk->succ_begin();
838  } else if (trueBlk->succ_size() == 1
839    && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
840    landBlk = *trueBlk->succ_begin();
841  } else {
842    return handleJumpintoIf(curBlk, trueBlk, falseBlk);
843  }
844
845  // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
846  // new BB created for landBlk==NULL may introduce new challenge to the
847  // reduction process.
848  if (landBlk != NULL &&
849      ((trueBlk && trueBlk->pred_size() > 1)
850      || (falseBlk && falseBlk->pred_size() > 1))) {
851     cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
852  }
853
854  if (trueBlk && trueBlk->pred_size() > 1) {
855    trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
856    ++cloned;
857  }
858
859  if (falseBlk && falseBlk->pred_size() > 1) {
860    falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
861    ++cloned;
862  }
863
864  mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
865
866  ++numIfPatternMatch;
867
868  numClonedBlock += cloned;
869
870  return 1 + cloned;
871} //ifPatternMatch
872
873template<class PassT>
874int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
875  return 0;
876} //switchPatternMatch
877
878template<class PassT>
879int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
880  LoopT *loopRep = loopInfo->getLoopFor(curBlk);
881  typename std::vector<LoopT *> nestedLoops;
882  while (loopRep) {
883    nestedLoops.push_back(loopRep);
884    loopRep = loopRep->getParentLoop();
885  }
886
887  if (nestedLoops.size() == 0) {
888    return 0;
889  }
890
891  // Process nested loop outside->inside, so "continue" to a outside loop won't
892  // be mistaken as "break" of the current loop.
893  int num = 0;
894  for (typename std::vector<LoopT *>::reverse_iterator
895       iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
896       iter != iterEnd; ++iter) {
897    loopRep = *iter;
898
899    if (getLoopLandBlock(loopRep) != NULL) {
900      continue;
901    }
902
903    BlockT *loopHeader = loopRep->getHeader();
904
905    int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
906
907    if (numBreak == -1) {
908      break;
909    }
910
911    int numCont = loopcontPatternMatch(loopRep, loopHeader);
912    num += numBreak + numCont;
913  }
914
915  return num;
916} //loopendPatternMatch
917
918template<class PassT>
919int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
920  if (curBlk->succ_size() != 0) {
921    return 0;
922  }
923
924  int numLoop = 0;
925  LoopT *loopRep = loopInfo->getLoopFor(curBlk);
926  while (loopRep && loopRep->getHeader() == curBlk) {
927    LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
928    if (loopLand) {
929      BlockT *landBlk = loopLand->landBlk;
930      assert(landBlk);
931      if (!isRetiredBlock(landBlk)) {
932        mergeLooplandBlock(curBlk, loopLand);
933        ++numLoop;
934      }
935    }
936    loopRep = loopRep->getParentLoop();
937  }
938
939  numLoopPatternMatch += numLoop;
940
941  return numLoop;
942} //loopPatternMatch
943
944template<class PassT>
945int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
946                                                  BlockT *loopHeader) {
947  BlockTSmallerVector exitingBlks;
948  loopRep->getExitingBlocks(exitingBlks);
949
950  if (DEBUGME) {
951    errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
952  }
953
954  if (exitingBlks.size() == 0) {
955    setLoopLandBlock(loopRep);
956    return 0;
957  }
958
959  // Compute the corresponding exitBlks and exit block set.
960  BlockTSmallerVector exitBlks;
961  std::set<BlockT *> exitBlkSet;
962  for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
963       iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
964    BlockT *exitingBlk = *iter;
965    BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
966    exitBlks.push_back(exitBlk);
967    exitBlkSet.insert(exitBlk);  //non-duplicate insert
968  }
969
970  assert(exitBlkSet.size() > 0);
971  assert(exitBlks.size() == exitingBlks.size());
972
973  if (DEBUGME) {
974    errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
975  }
976
977  // Find exitLandBlk.
978  BlockT *exitLandBlk = NULL;
979  int numCloned = 0;
980  int numSerial = 0;
981
982  if (exitBlkSet.size() == 1)
983  {
984    exitLandBlk = *exitBlkSet.begin();
985  } else {
986    exitLandBlk = findNearestCommonPostDom(exitBlkSet);
987
988    if (exitLandBlk == NULL) {
989      return -1;
990    }
991
992    bool allInPath = true;
993    bool allNotInPath = true;
994    for (typename std::set<BlockT*>::const_iterator
995         iter = exitBlkSet.begin(),
996         iterEnd = exitBlkSet.end();
997         iter != iterEnd; ++iter) {
998      BlockT *exitBlk = *iter;
999
1000      PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
1001      if (DEBUGME) {
1002        errs() << "BB" << exitBlk->getNumber()
1003               << " to BB" << exitLandBlk->getNumber() << " PathToKind="
1004               << pathKind << "\n";
1005      }
1006
1007      allInPath = allInPath && (pathKind == SinglePath_InPath);
1008      allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
1009
1010      if (!allInPath && !allNotInPath) {
1011        if (DEBUGME) {
1012              errs() << "singlePath check fail\n";
1013        }
1014        return -1;
1015      }
1016    } // check all exit blocks
1017
1018    if (allNotInPath) {
1019#if 1
1020
1021      // TODO: Simplify, maybe separate function?
1022      //funcRep->viewCFG();
1023      LoopT *parentLoopRep = loopRep->getParentLoop();
1024      BlockT *parentLoopHeader = NULL;
1025      if (parentLoopRep)
1026        parentLoopHeader = parentLoopRep->getHeader();
1027
1028      if (exitLandBlk == parentLoopHeader &&
1029          (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
1030                                               loopRep,
1031                                               exitBlkSet,
1032                                               exitLandBlk)) != NULL) {
1033        if (DEBUGME) {
1034          errs() << "relocateLoopcontBlock success\n";
1035        }
1036      } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
1037                                                      exitingBlks,
1038                                                      exitBlks)) != NULL) {
1039        if (DEBUGME) {
1040          errs() << "insertEndbranchBlock success\n";
1041        }
1042      } else {
1043        if (DEBUGME) {
1044          errs() << "loop exit fail\n";
1045        }
1046        return -1;
1047      }
1048#else
1049      return -1;
1050#endif
1051    }
1052
1053    // Handle side entry to exit path.
1054    exitBlks.clear();
1055    exitBlkSet.clear();
1056    for (typename BlockTSmallerVector::iterator iterExiting =
1057           exitingBlks.begin(),
1058         iterExitingEnd = exitingBlks.end();
1059         iterExiting != iterExitingEnd; ++iterExiting) {
1060      BlockT *exitingBlk = *iterExiting;
1061      BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
1062      BlockT *newExitBlk = exitBlk;
1063
1064      if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
1065        newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
1066        ++numCloned;
1067      }
1068
1069      numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
1070
1071      exitBlks.push_back(newExitBlk);
1072      exitBlkSet.insert(newExitBlk);
1073    }
1074
1075    for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
1076         iterExitEnd = exitBlks.end();
1077         iterExit != iterExitEnd; ++iterExit) {
1078      BlockT *exitBlk = *iterExit;
1079      numSerial += serialPatternMatch(exitBlk);
1080    }
1081
1082    for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
1083         iterExitEnd = exitBlks.end();
1084         iterExit != iterExitEnd; ++iterExit) {
1085      BlockT *exitBlk = *iterExit;
1086      if (exitBlk->pred_size() > 1) {
1087        if (exitBlk != exitLandBlk) {
1088          return -1;
1089        }
1090      } else {
1091        if (exitBlk != exitLandBlk &&
1092            (exitBlk->succ_size() != 1 ||
1093            *exitBlk->succ_begin() != exitLandBlk)) {
1094          return -1;
1095        }
1096      }
1097    }
1098  } // else
1099
1100  // LoopT *exitLandLoop = loopInfo->getLoopFor(exitLandBlk);
1101  exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
1102
1103  // Fold break into the breaking block. Leverage across level breaks.
1104  assert(exitingBlks.size() == exitBlks.size());
1105  for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
1106       iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
1107       iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
1108    BlockT *exitBlk = *iterExit;
1109    BlockT *exitingBlk = *iterExiting;
1110    assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
1111    LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
1112    handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
1113  }
1114
1115  int numBreak = static_cast<int>(exitingBlks.size());
1116  numLoopbreakPatternMatch += numBreak;
1117  numClonedBlock += numCloned;
1118  return numBreak + numSerial + numCloned;
1119} //loopbreakPatternMatch
1120
1121template<class PassT>
1122int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
1123                                                 BlockT *loopHeader) {
1124  int numCont = 0;
1125  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
1126  for (typename InvBlockGTraits::ChildIteratorType iter =
1127       InvBlockGTraits::child_begin(loopHeader),
1128       iterEnd = InvBlockGTraits::child_end(loopHeader);
1129       iter != iterEnd; ++iter) {
1130    BlockT *curBlk = *iter;
1131    if (loopRep->contains(curBlk)) {
1132      handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
1133                          loopHeader, loopRep);
1134      contBlk.push_back(curBlk);
1135      ++numCont;
1136    }
1137  }
1138
1139  for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator
1140       iter = contBlk.begin(), iterEnd = contBlk.end();
1141       iter != iterEnd; ++iter) {
1142    (*iter)->removeSuccessor(loopHeader);
1143  }
1144
1145  numLoopcontPatternMatch += numCont;
1146
1147  return numCont;
1148} //loopcontPatternMatch
1149
1150
1151template<class PassT>
1152bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
1153                                                         BlockT *src2Blk) {
1154  // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
1155  // same loop with LoopLandInfo without explicitly keeping track of
1156  // loopContBlks and loopBreakBlks, this is a method to get the information.
1157  //
1158  if (src1Blk->succ_size() == 0) {
1159    LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
1160    if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
1161      LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
1162      if (theEntry != NULL) {
1163        if (DEBUGME) {
1164          errs() << "isLoopContBreakBlock yes src1 = BB"
1165                 << src1Blk->getNumber()
1166                 << " src2 = BB" << src2Blk->getNumber() << "\n";
1167        }
1168        return true;
1169      }
1170    }
1171  }
1172  return false;
1173}  //isSameloopDetachedContbreak
1174
1175template<class PassT>
1176int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
1177                                             BlockT *trueBlk,
1178                                             BlockT *falseBlk) {
1179  int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
1180  if (num == 0) {
1181    if (DEBUGME) {
1182      errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
1183    }
1184    num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
1185  }
1186  return num;
1187}
1188
1189template<class PassT>
1190int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
1191                                                BlockT *trueBlk,
1192                                                BlockT *falseBlk) {
1193  int num = 0;
1194  BlockT *downBlk;
1195
1196  //trueBlk could be the common post dominator
1197  downBlk = trueBlk;
1198
1199  if (DEBUGME) {
1200    errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
1201           << " true = BB" << trueBlk->getNumber()
1202           << ", numSucc=" << trueBlk->succ_size()
1203           << " false = BB" << falseBlk->getNumber() << "\n";
1204  }
1205
1206  while (downBlk) {
1207    if (DEBUGME) {
1208      errs() << "check down = BB" << downBlk->getNumber();
1209    }
1210
1211    if (//postDomTree->dominates(downBlk, falseBlk) &&
1212        singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
1213      if (DEBUGME) {
1214        errs() << " working\n";
1215      }
1216
1217      num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
1218      num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
1219
1220      numClonedBlock += num;
1221      num += serialPatternMatch(*headBlk->succ_begin());
1222      num += serialPatternMatch(*(++headBlk->succ_begin()));
1223      num += ifPatternMatch(headBlk);
1224      assert(num > 0); //
1225
1226      break;
1227    }
1228    if (DEBUGME) {
1229      errs() << " not working\n";
1230    }
1231    downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
1232  } // walk down the postDomTree
1233
1234  return num;
1235} //handleJumpintoIf
1236
1237template<class PassT>
1238void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
1239                                                         BlockT *trueBlk,
1240                                                         BlockT *falseBlk,
1241                                                         BlockT *landBlk,
1242                                                         bool detail) {
1243  errs() << "head = BB" << headBlk->getNumber()
1244         << " size = " << headBlk->size();
1245  if (detail) {
1246    errs() << "\n";
1247    headBlk->print(errs());
1248    errs() << "\n";
1249  }
1250
1251  if (trueBlk) {
1252    errs() << ", true = BB" << trueBlk->getNumber() << " size = "
1253           << trueBlk->size() << " numPred = " << trueBlk->pred_size();
1254    if (detail) {
1255      errs() << "\n";
1256      trueBlk->print(errs());
1257      errs() << "\n";
1258    }
1259  }
1260  if (falseBlk) {
1261    errs() << ", false = BB" << falseBlk->getNumber() << " size = "
1262           << falseBlk->size() << " numPred = " << falseBlk->pred_size();
1263    if (detail) {
1264      errs() << "\n";
1265      falseBlk->print(errs());
1266      errs() << "\n";
1267    }
1268  }
1269  if (landBlk) {
1270    errs() << ", land = BB" << landBlk->getNumber() << " size = "
1271           << landBlk->size() << " numPred = " << landBlk->pred_size();
1272    if (detail) {
1273      errs() << "\n";
1274      landBlk->print(errs());
1275      errs() << "\n";
1276    }
1277  }
1278
1279    errs() << "\n";
1280} //showImproveSimpleJumpintoIf
1281
1282template<class PassT>
1283int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
1284                                                    BlockT *trueBlk,
1285                                                    BlockT *falseBlk,
1286                                                    BlockT **plandBlk) {
1287  bool migrateTrue = false;
1288  bool migrateFalse = false;
1289
1290  BlockT *landBlk = *plandBlk;
1291
1292  assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
1293         && (falseBlk == NULL || falseBlk->succ_size() <= 1));
1294
1295  if (trueBlk == falseBlk) {
1296    return 0;
1297  }
1298
1299#if 0
1300  if (DEBUGME) {
1301    errs() << "improveSimpleJumpintoIf: ";
1302    showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1303  }
1304#endif
1305
1306  // unsigned landPredSize = landBlk ? landBlk->pred_size() : 0;
1307  // May consider the # landBlk->pred_size() as it represents the number of
1308  // assignment initReg = .. needed to insert.
1309  migrateTrue = needMigrateBlock(trueBlk);
1310  migrateFalse = needMigrateBlock(falseBlk);
1311
1312  if (!migrateTrue && !migrateFalse) {
1313    return 0;
1314  }
1315
1316  // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1317  // have more than one predecessors.  without doing this, its predecessor
1318  // rather than headBlk will have undefined value in initReg.
1319  if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
1320    migrateTrue = true;
1321  }
1322  if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
1323    migrateFalse = true;
1324  }
1325
1326  if (DEBUGME) {
1327    errs() << "before improveSimpleJumpintoIf: ";
1328    showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1329    //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
1330  }
1331
1332  // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1333  //
1334  // new: headBlk => if () {initReg = 1; org trueBlk branch} else
1335  //      {initReg = 0; org falseBlk branch }
1336  //      => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
1337  //      => org landBlk
1338  //      if landBlk->pred_size() > 2, put the about if-else inside
1339  //      if (initReg !=2) {...}
1340  //
1341  // add initReg = initVal to headBlk
1342
1343  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1344  unsigned initReg =
1345    funcRep->getRegInfo().createVirtualRegister(I32RC);
1346  if (!migrateTrue || !migrateFalse) {
1347    int initVal = migrateTrue ? 0 : 1;
1348    CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
1349  }
1350
1351  int numNewBlk = 0;
1352
1353  if (landBlk == NULL) {
1354    landBlk = funcRep->CreateMachineBasicBlock();
1355    funcRep->push_back(landBlk);  //insert to function
1356
1357    if (trueBlk) {
1358      trueBlk->addSuccessor(landBlk);
1359    } else {
1360      headBlk->addSuccessor(landBlk);
1361    }
1362
1363    if (falseBlk) {
1364      falseBlk->addSuccessor(landBlk);
1365    } else {
1366      headBlk->addSuccessor(landBlk);
1367    }
1368
1369    numNewBlk ++;
1370  }
1371
1372  bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
1373
1374  //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
1375  typename BlockT::iterator insertPos =
1376    CFGTraits::getInstrPos
1377    (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep));
1378
1379  if (landBlkHasOtherPred) {
1380    unsigned immReg =
1381      funcRep->getRegInfo().createVirtualRegister(I32RC);
1382    CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
1383    unsigned cmpResReg =
1384      funcRep->getRegInfo().createVirtualRegister(I32RC);
1385
1386    CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
1387                                        initReg, immReg);
1388    CFGTraits::insertCondBranchBefore(landBlk, insertPos,
1389                                      AMDGPU::IF_LOGICALZ_i32, passRep,
1390                                      cmpResReg, DebugLoc());
1391  }
1392
1393  CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_LOGICALNZ_i32,
1394                                    passRep, initReg, DebugLoc());
1395
1396  if (migrateTrue) {
1397    migrateInstruction(trueBlk, landBlk, insertPos);
1398    // need to uncondionally insert the assignment to ensure a path from its
1399    // predecessor rather than headBlk has valid value in initReg if
1400    // (initVal != 1).
1401    CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
1402  }
1403  CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep);
1404
1405  if (migrateFalse) {
1406    migrateInstruction(falseBlk, landBlk, insertPos);
1407    // need to uncondionally insert the assignment to ensure a path from its
1408    // predecessor rather than headBlk has valid value in initReg if
1409    // (initVal != 0)
1410    CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
1411  }
1412  //CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
1413
1414  if (landBlkHasOtherPred) {
1415    // add endif
1416    CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
1417
1418    // put initReg = 2 to other predecessors of landBlk
1419    for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
1420         predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
1421         ++predIter) {
1422      BlockT *curBlk = *predIter;
1423      if (curBlk != trueBlk && curBlk != falseBlk) {
1424        CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
1425      }
1426    } //for
1427  }
1428  if (DEBUGME) {
1429    errs() << "result from improveSimpleJumpintoIf: ";
1430    showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1431    //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
1432  }
1433
1434  // update landBlk
1435  *plandBlk = landBlk;
1436
1437  return numNewBlk;
1438} //improveSimpleJumpintoIf
1439
1440template<class PassT>
1441void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
1442                                              LoopT *exitingLoop,
1443                                             BlockT *exitBlk,
1444                                              LoopT *exitLoop,
1445                                             BlockT *landBlk) {
1446  if (DEBUGME) {
1447    errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
1448           << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
1449  }
1450  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1451
1452  RegiT initReg = INVALIDREGNUM;
1453  if (exitingLoop != exitLoop) {
1454    initReg = static_cast<int>
1455      (funcRep->getRegInfo().createVirtualRegister(I32RC));
1456    assert(initReg != INVALIDREGNUM);
1457    addLoopBreakInitReg(exitLoop, initReg);
1458    while (exitingLoop != exitLoop && exitingLoop) {
1459      addLoopBreakOnReg(exitingLoop, initReg);
1460      exitingLoop = exitingLoop->getParentLoop();
1461    }
1462    assert(exitingLoop == exitLoop);
1463  }
1464
1465  mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
1466
1467} //handleLoopbreak
1468
1469template<class PassT>
1470void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
1471                                                  LoopT *contingLoop,
1472                                                 BlockT *contBlk,
1473                                                  LoopT *contLoop) {
1474  if (DEBUGME) {
1475    errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
1476           << " header = BB" << contBlk->getNumber() << "\n";
1477
1478    errs() << "Trying to continue loop-depth = "
1479           << getLoopDepth(contLoop)
1480           << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
1481  }
1482
1483  RegiT initReg = INVALIDREGNUM;
1484  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1485  if (contingLoop != contLoop) {
1486    initReg = static_cast<int>
1487      (funcRep->getRegInfo().createVirtualRegister(I32RC));
1488    assert(initReg != INVALIDREGNUM);
1489    addLoopContInitReg(contLoop, initReg);
1490    while (contingLoop && contingLoop->getParentLoop() != contLoop) {
1491      addLoopBreakOnReg(contingLoop, initReg);  //not addLoopContOnReg
1492      contingLoop = contingLoop->getParentLoop();
1493    }
1494    assert(contingLoop && contingLoop->getParentLoop() == contLoop);
1495    addLoopContOnReg(contingLoop, initReg);
1496  }
1497
1498  settleLoopcontBlock(contingBlk, contBlk, initReg);
1499  //contingBlk->removeSuccessor(loopHeader);
1500} //handleLoopcontBlock
1501
1502template<class PassT>
1503void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
1504  if (DEBUGME) {
1505    errs() << "serialPattern BB" << dstBlk->getNumber()
1506           << " <= BB" << srcBlk->getNumber() << "\n";
1507  }
1508  //removeUnconditionalBranch(dstBlk);
1509  dstBlk->splice(dstBlk->end(), srcBlk, FirstNonDebugInstr(srcBlk), srcBlk->end());
1510
1511  dstBlk->removeSuccessor(srcBlk);
1512  CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
1513
1514  removeSuccessor(srcBlk);
1515  retireBlock(dstBlk, srcBlk);
1516} //mergeSerialBlock
1517
1518template<class PassT>
1519void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
1520                                                  BlockT *curBlk,
1521                                                  BlockT *trueBlk,
1522                                                  BlockT *falseBlk,
1523                                                  BlockT *landBlk) {
1524  if (DEBUGME) {
1525    errs() << "ifPattern BB" << curBlk->getNumber();
1526    errs() << "{  ";
1527    if (trueBlk) {
1528      errs() << "BB" << trueBlk->getNumber();
1529    }
1530    errs() << "  } else ";
1531    errs() << "{  ";
1532    if (falseBlk) {
1533      errs() << "BB" << falseBlk->getNumber();
1534    }
1535    errs() << "  }\n ";
1536    errs() << "landBlock: ";
1537    if (landBlk == NULL) {
1538      errs() << "NULL";
1539    } else {
1540      errs() << "BB" << landBlk->getNumber();
1541    }
1542    errs() << "\n";
1543  }
1544
1545  int oldOpcode = branchInstr->getOpcode();
1546  DebugLoc branchDL = branchInstr->getDebugLoc();
1547
1548//    transform to
1549//    if cond
1550//       trueBlk
1551//    else
1552//       falseBlk
1553//    endif
1554//    landBlk
1555
1556  typename BlockT::iterator branchInstrPos =
1557    CFGTraits::getInstrPos(curBlk, branchInstr);
1558  CFGTraits::insertCondBranchBefore(branchInstrPos,
1559                                    CFGTraits::getBranchNzeroOpcode(oldOpcode),
1560                                    passRep,
1561									branchDL);
1562
1563  if (trueBlk) {
1564    curBlk->splice(branchInstrPos, trueBlk, FirstNonDebugInstr(trueBlk), trueBlk->end());
1565    curBlk->removeSuccessor(trueBlk);
1566    if (landBlk && trueBlk->succ_size()!=0) {
1567      trueBlk->removeSuccessor(landBlk);
1568    }
1569    retireBlock(curBlk, trueBlk);
1570  }
1571  CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep);
1572
1573  if (falseBlk) {
1574    curBlk->splice(branchInstrPos, falseBlk, FirstNonDebugInstr(falseBlk),
1575                   falseBlk->end());
1576    curBlk->removeSuccessor(falseBlk);
1577    if (landBlk && falseBlk->succ_size() != 0) {
1578      falseBlk->removeSuccessor(landBlk);
1579    }
1580    retireBlock(curBlk, falseBlk);
1581  }
1582  CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
1583
1584  //curBlk->remove(branchInstrPos);
1585  branchInstr->eraseFromParent();
1586
1587  if (landBlk && trueBlk && falseBlk) {
1588    curBlk->addSuccessor(landBlk);
1589  }
1590
1591} //mergeIfthenelseBlock
1592
1593template<class PassT>
1594void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
1595                                                LoopLandInfo *loopLand) {
1596  BlockT *landBlk = loopLand->landBlk;
1597
1598  if (DEBUGME) {
1599    errs() << "loopPattern header = BB" << dstBlk->getNumber()
1600           << " land = BB" << landBlk->getNumber() << "\n";
1601  }
1602
1603  // Loop contInitRegs are init at the beginning of the loop.
1604  for (typename std::set<RegiT>::const_iterator iter =
1605         loopLand->contInitRegs.begin(),
1606       iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
1607    CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1608  }
1609
1610  /* we last inserterd the DebugLoc in the
1611   * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk.
1612   * search for the DebugLoc in the that statement.
1613   * if not found, we have to insert the empty/default DebugLoc */
1614  InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
1615  DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
1616
1617  CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak);
1618  // Loop breakInitRegs are init before entering the loop.
1619  for (typename std::set<RegiT>::const_iterator iter =
1620         loopLand->breakInitRegs.begin(),
1621       iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter)
1622  {
1623    CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1624  }
1625  // Loop endbranchInitRegs are init before entering the loop.
1626  for (typename std::set<RegiT>::const_iterator iter =
1627         loopLand->endbranchInitRegs.begin(),
1628       iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
1629    CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1630  }
1631
1632  /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
1633   * search for the DebugLoc in the continue statement.
1634   * if not found, we have to insert the empty/default DebugLoc */
1635  InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
1636  DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
1637
1638  CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue);
1639  // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
1640  // loop.
1641  for (typename std::set<RegiT>::const_iterator iter =
1642         loopLand->breakOnRegs.begin(),
1643       iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
1644    CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::BREAK_LOGICALNZ_i32, passRep,
1645                                   *iter);
1646  }
1647
1648  // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
1649  // loop.
1650  for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
1651       iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
1652    CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32,
1653                                   passRep, *iter);
1654  }
1655
1656  dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
1657
1658  for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
1659       iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
1660    dstBlk->addSuccessor(*iter);  // *iter's predecessor is also taken care of.
1661  }
1662
1663  removeSuccessor(landBlk);
1664  retireBlock(dstBlk, landBlk);
1665} //mergeLooplandBlock
1666
1667template<class PassT>
1668void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I)
1669{
1670  while (I--) {
1671    if (I->getOpcode() == AMDGPU::PRED_X) {
1672      switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
1673      case OPCODE_IS_ZERO_INT:
1674        static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT);
1675        return;
1676      case OPCODE_IS_NOT_ZERO_INT:
1677        static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT);
1678        return;
1679      case OPCODE_IS_ZERO:
1680        static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO);
1681        return;
1682      case OPCODE_IS_NOT_ZERO:
1683        static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
1684        return;
1685      default:
1686        assert(0 && "PRED_X Opcode invalid!");
1687      }
1688    }
1689  }
1690}
1691
1692template<class PassT>
1693void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
1694                                                 BlockT *exitBlk,
1695                                                 BlockT *exitLandBlk,
1696                                                 RegiT  setReg) {
1697  if (DEBUGME) {
1698    errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
1699           << " exit = BB" << exitBlk->getNumber()
1700           << " land = BB" << exitLandBlk->getNumber() << "\n";
1701  }
1702
1703  InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
1704  assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
1705
1706  DebugLoc DL = branchInstr->getDebugLoc();
1707
1708  BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
1709  int oldOpcode = branchInstr->getOpcode();
1710
1711  //    transform exitingBlk to
1712  //    if ( ) {
1713  //       exitBlk (if exitBlk != exitLandBlk)
1714  //       setReg = 1
1715  //       break
1716  //    }endif
1717  //    successor = {orgSuccessor(exitingBlk) - exitBlk}
1718
1719  typename BlockT::iterator branchInstrPos =
1720    CFGTraits::getInstrPos(exitingBlk, branchInstr);
1721
1722  if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
1723    //break_logical
1724
1725    if (trueBranch != exitBlk) {
1726      reversePredicateSetter(branchInstrPos);
1727    }
1728    int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode);
1729    CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
1730  } else {
1731    if (trueBranch != exitBlk) {
1732      reversePredicateSetter(branchInstr);
1733    }
1734    int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode);
1735    CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
1736    if (exitBlk != exitLandBlk) {
1737      //splice is insert-before ...
1738      exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
1739                         exitBlk->end());
1740    }
1741    if (setReg != INVALIDREGNUM) {
1742      CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
1743    }
1744    CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep);
1745    CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
1746  } //if_logical
1747
1748  //now branchInst can be erase safely
1749  //exitingBlk->eraseFromParent(branchInstr);
1750  branchInstr->eraseFromParent();
1751
1752  //now take care of successors, retire blocks
1753  exitingBlk->removeSuccessor(exitBlk);
1754  if (exitBlk != exitLandBlk) {
1755    //splice is insert-before ...
1756    exitBlk->removeSuccessor(exitLandBlk);
1757    retireBlock(exitingBlk, exitBlk);
1758  }
1759
1760} //mergeLoopbreakBlock
1761
1762template<class PassT>
1763void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
1764                                                 BlockT *contBlk,
1765                                                 RegiT   setReg) {
1766  if (DEBUGME) {
1767    errs() << "settleLoopcontBlock conting = BB"
1768           << contingBlk->getNumber()
1769           << ", cont = BB" << contBlk->getNumber() << "\n";
1770  }
1771
1772  InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
1773  if (branchInstr) {
1774    assert(CFGTraits::isCondBranch(branchInstr));
1775    typename BlockT::iterator branchInstrPos =
1776      CFGTraits::getInstrPos(contingBlk, branchInstr);
1777    BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
1778    int oldOpcode = branchInstr->getOpcode();
1779	DebugLoc DL = branchInstr->getDebugLoc();
1780
1781    //    transform contingBlk to
1782    //     if () {
1783    //          move instr after branchInstr
1784    //          continue
1785    //        or
1786    //          setReg = 1
1787    //          break
1788    //     }endif
1789    //     successor = {orgSuccessor(contingBlk) - loopHeader}
1790
1791    bool useContinueLogical =
1792      (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
1793
1794    if (useContinueLogical == false)
1795    {
1796      int branchOpcode =
1797        trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
1798                              : CFGTraits::getBranchZeroOpcode(oldOpcode);
1799
1800      CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
1801
1802      if (setReg != INVALIDREGNUM) {
1803        CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
1804        // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1805        CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL);
1806      } else {
1807        // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1808        CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL);
1809      }
1810
1811      CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL);
1812    } else {
1813      int branchOpcode =
1814        trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
1815                              : CFGTraits::getContinueZeroOpcode(oldOpcode);
1816
1817      CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
1818    }
1819
1820    //contingBlk->eraseFromParent(branchInstr);
1821    branchInstr->eraseFromParent();
1822  } else {
1823    /* if we've arrived here then we've already erased the branch instruction
1824	 * travel back up the basic block to see the last reference of our debug location
1825	 * we've just inserted that reference here so it should be representative */
1826    if (setReg != INVALIDREGNUM) {
1827      CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
1828      // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1829      CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
1830    } else {
1831      // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1832      CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
1833    }
1834  } //else
1835
1836} //settleLoopcontBlock
1837
1838// BBs in exitBlkSet are determined as in break-path for loopRep,
1839// before we can put code for BBs as inside loop-body for loopRep
1840// check whether those BBs are determined as cont-BB for parentLoopRep
1841// earlier.
1842// If so, generate a new BB newBlk
1843//    (1) set newBlk common successor of BBs in exitBlkSet
1844//    (2) change the continue-instr in BBs in exitBlkSet to break-instr
1845//    (3) generate continue-instr in newBlk
1846//
1847template<class PassT>
1848typename CFGStructurizer<PassT>::BlockT *
1849CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
1850                                              LoopT *loopRep,
1851                                              std::set<BlockT *> &exitBlkSet,
1852                                              BlockT *exitLandBlk) {
1853  std::set<BlockT *> endBlkSet;
1854
1855//  BlockT *parentLoopHead = parentLoopRep->getHeader();
1856
1857
1858  for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
1859       iterEnd = exitBlkSet.end();
1860       iter != iterEnd; ++iter) {
1861    BlockT *exitBlk = *iter;
1862    BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
1863
1864    if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
1865      return NULL;
1866
1867    endBlkSet.insert(endBlk);
1868  }
1869
1870  BlockT *newBlk = funcRep->CreateMachineBasicBlock();
1871  funcRep->push_back(newBlk);  //insert to function
1872  CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep);
1873  SHOWNEWBLK(newBlk, "New continue block: ");
1874
1875  for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
1876       iterEnd = endBlkSet.end();
1877       iter != iterEnd; ++iter) {
1878      BlockT *endBlk = *iter;
1879      InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
1880      if (contInstr) {
1881        contInstr->eraseFromParent();
1882      }
1883      endBlk->addSuccessor(newBlk);
1884      if (DEBUGME) {
1885        errs() << "Add new continue Block to BB"
1886               << endBlk->getNumber() << " successors\n";
1887      }
1888  }
1889
1890  return newBlk;
1891} //relocateLoopcontBlock
1892
1893
1894// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
1895// LoopLandBlock. This BB branch on the loop endBranchInit register to the
1896// pathes corresponding to the loop exiting branches.
1897
1898template<class PassT>
1899typename CFGStructurizer<PassT>::BlockT *
1900CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
1901                                              BlockTSmallerVector &exitingBlks,
1902                                              BlockTSmallerVector &exitBlks) {
1903  const AMDGPUInstrInfo *tii =
1904             static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
1905  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1906
1907  RegiT endBranchReg = static_cast<int>
1908    (funcRep->getRegInfo().createVirtualRegister(I32RC));
1909  assert(endBranchReg >= 0);
1910
1911  // reg = 0 before entering the loop
1912  addLoopEndbranchInitReg(loopRep, endBranchReg);
1913
1914  uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
1915  assert(numBlks >=2 && numBlks == exitBlks.size());
1916
1917  BlockT *preExitingBlk = exitingBlks[0];
1918  BlockT *preExitBlk = exitBlks[0];
1919  BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
1920  funcRep->push_back(preBranchBlk);  //insert to function
1921  SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
1922
1923  BlockT *newLandBlk = preBranchBlk;
1924
1925      CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
1926        newLandBlk);
1927  preExitingBlk->removeSuccessor(preExitBlk);
1928  preExitingBlk->addSuccessor(newLandBlk);
1929
1930  //it is redundant to add reg = 0 to exitingBlks[0]
1931
1932  // For 1..n th exiting path (the last iteration handles two pathes) create the
1933  // branch to the previous path and the current path.
1934  for (uint32_t i = 1; i < numBlks; ++i) {
1935    BlockT *curExitingBlk = exitingBlks[i];
1936    BlockT *curExitBlk = exitBlks[i];
1937    BlockT *curBranchBlk;
1938
1939    if (i == numBlks - 1) {
1940      curBranchBlk = curExitBlk;
1941    } else {
1942      curBranchBlk = funcRep->CreateMachineBasicBlock();
1943      funcRep->push_back(curBranchBlk);  //insert to function
1944      SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
1945    }
1946
1947    // Add reg = i to exitingBlks[i].
1948    CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
1949                                       endBranchReg, i);
1950
1951    // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
1952    // (exitingBlks[i], newLandBlk).
1953    CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
1954                                          newLandBlk);
1955    curExitingBlk->removeSuccessor(curExitBlk);
1956    curExitingBlk->addSuccessor(newLandBlk);
1957
1958    // add to preBranchBlk the branch instruction:
1959    // if (endBranchReg == preVal)
1960    //    preExitBlk
1961    // else
1962    //    curBranchBlk
1963    //
1964    // preValReg = i - 1
1965
1966  DebugLoc DL;
1967  RegiT preValReg = static_cast<int>
1968    (funcRep->getRegInfo().createVirtualRegister(I32RC));
1969
1970  preBranchBlk->insert(preBranchBlk->begin(),
1971                       tii->getMovImmInstr(preBranchBlk->getParent(), preValReg,
1972                       i - 1));
1973
1974  // condResReg = (endBranchReg == preValReg)
1975    RegiT condResReg = static_cast<int>
1976      (funcRep->getRegInfo().createVirtualRegister(I32RC));
1977    BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg)
1978      .addReg(endBranchReg).addReg(preValReg);
1979
1980    BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32))
1981      .addMBB(preExitBlk).addReg(condResReg);
1982
1983    preBranchBlk->addSuccessor(preExitBlk);
1984    preBranchBlk->addSuccessor(curBranchBlk);
1985
1986    // Update preExitingBlk, preExitBlk, preBranchBlk.
1987    preExitingBlk = curExitingBlk;
1988    preExitBlk = curExitBlk;
1989    preBranchBlk = curBranchBlk;
1990
1991  }  //end for 1 .. n blocks
1992
1993  return newLandBlk;
1994} //addLoopEndbranchBlock
1995
1996template<class PassT>
1997typename CFGStructurizer<PassT>::PathToKind
1998CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
1999                                     bool allowSideEntry) {
2000  assert(dstBlk);
2001
2002  if (srcBlk == dstBlk) {
2003    return SinglePath_InPath;
2004  }
2005
2006  while (srcBlk && srcBlk->succ_size() == 1) {
2007    srcBlk = *srcBlk->succ_begin();
2008    if (srcBlk == dstBlk) {
2009      return SinglePath_InPath;
2010    }
2011
2012    if (!allowSideEntry && srcBlk->pred_size() > 1) {
2013      return Not_SinglePath;
2014    }
2015  }
2016
2017  if (srcBlk && srcBlk->succ_size()==0) {
2018    return SinglePath_NotInPath;
2019  }
2020
2021  return Not_SinglePath;
2022} //singlePathTo
2023
2024// If there is a single path from srcBlk to dstBlk, return the last block before
2025// dstBlk If there is a single path from srcBlk->end without dstBlk, return the
2026// last block in the path Otherwise, return NULL
2027template<class PassT>
2028typename CFGStructurizer<PassT>::BlockT *
2029CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
2030                                      bool allowSideEntry) {
2031  assert(dstBlk);
2032
2033  if (srcBlk == dstBlk) {
2034    return srcBlk;
2035  }
2036
2037  if (srcBlk->succ_size() == 0) {
2038    return srcBlk;
2039  }
2040
2041  while (srcBlk && srcBlk->succ_size() == 1) {
2042    BlockT *preBlk = srcBlk;
2043
2044    srcBlk = *srcBlk->succ_begin();
2045    if (srcBlk == NULL) {
2046      return preBlk;
2047    }
2048
2049    if (!allowSideEntry && srcBlk->pred_size() > 1) {
2050      return NULL;
2051    }
2052  }
2053
2054  if (srcBlk && srcBlk->succ_size()==0) {
2055    return srcBlk;
2056  }
2057
2058  return NULL;
2059
2060} //singlePathEnd
2061
2062template<class PassT>
2063int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
2064                                               BlockT *dstBlk) {
2065  int cloned = 0;
2066  assert(preBlk->isSuccessor(srcBlk));
2067  while (srcBlk && srcBlk != dstBlk) {
2068    assert(srcBlk->succ_size() == 1);
2069    if (srcBlk->pred_size() > 1) {
2070      srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
2071      ++cloned;
2072    }
2073
2074    preBlk = srcBlk;
2075    srcBlk = *srcBlk->succ_begin();
2076  }
2077
2078  return cloned;
2079} //cloneOnSideEntryTo
2080
2081template<class PassT>
2082typename CFGStructurizer<PassT>::BlockT *
2083CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
2084                                                 BlockT *predBlk) {
2085  assert(predBlk->isSuccessor(curBlk) &&
2086         "succBlk is not a prececessor of curBlk");
2087
2088  BlockT *cloneBlk = CFGTraits::clone(curBlk);  //clone instructions
2089  CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
2090  //srcBlk, oldBlk, newBlk
2091
2092  predBlk->removeSuccessor(curBlk);
2093  predBlk->addSuccessor(cloneBlk);
2094
2095  // add all successor to cloneBlk
2096  CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
2097
2098  numClonedInstr += curBlk->size();
2099
2100  if (DEBUGME) {
2101    errs() << "Cloned block: " << "BB"
2102           << curBlk->getNumber() << "size " << curBlk->size() << "\n";
2103  }
2104
2105  SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
2106
2107  return cloneBlk;
2108} //cloneBlockForPredecessor
2109
2110template<class PassT>
2111typename CFGStructurizer<PassT>::BlockT *
2112CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
2113                                               BlockT *exitingBlk) {
2114  BlockT *exitBlk = NULL;
2115
2116  for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
2117       iterSuccEnd = exitingBlk->succ_end();
2118       iterSucc != iterSuccEnd; ++iterSucc) {
2119    BlockT *curBlk = *iterSucc;
2120    if (!loopRep->contains(curBlk)) {
2121      assert(exitBlk == NULL);
2122      exitBlk = curBlk;
2123    }
2124  }
2125
2126  assert(exitBlk != NULL);
2127
2128  return exitBlk;
2129} //exitingBlock2ExitBlock
2130
2131template<class PassT>
2132void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
2133                                                BlockT *dstBlk,
2134                                                InstrIterator insertPos) {
2135  InstrIterator spliceEnd;
2136  //look for the input branchinstr, not the AMDGPU branchinstr
2137  InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
2138  if (branchInstr == NULL) {
2139    if (DEBUGME) {
2140      errs() << "migrateInstruction don't see branch instr\n" ;
2141    }
2142    spliceEnd = srcBlk->end();
2143  } else {
2144    if (DEBUGME) {
2145      errs() << "migrateInstruction see branch instr\n" ;
2146      branchInstr->dump();
2147    }
2148    spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
2149  }
2150  if (DEBUGME) {
2151    errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
2152      << "srcSize = " << srcBlk->size() << "\n";
2153  }
2154
2155  //splice insert before insertPos
2156  dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
2157
2158  if (DEBUGME) {
2159    errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
2160      << "srcSize = " << srcBlk->size() << "\n";
2161  }
2162} //migrateInstruction
2163
2164// normalizeInfiniteLoopExit change
2165//   B1:
2166//        uncond_br LoopHeader
2167//
2168// to
2169//   B1:
2170//        cond_br 1 LoopHeader dummyExit
2171// and return the newly added dummy exit block
2172//
2173template<class PassT>
2174typename CFGStructurizer<PassT>::BlockT *
2175CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
2176  BlockT *loopHeader;
2177  BlockT *loopLatch;
2178  loopHeader = LoopRep->getHeader();
2179  loopLatch = LoopRep->getLoopLatch();
2180  BlockT *dummyExitBlk = NULL;
2181  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
2182  if (loopHeader!=NULL && loopLatch!=NULL) {
2183    InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
2184    if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
2185      dummyExitBlk = funcRep->CreateMachineBasicBlock();
2186      funcRep->push_back(dummyExitBlk);  //insert to function
2187      SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
2188
2189      if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
2190
2191      typename BlockT::iterator insertPos =
2192        CFGTraits::getInstrPos(loopLatch, branchInstr);
2193      unsigned immReg =
2194        funcRep->getRegInfo().createVirtualRegister(I32RC);
2195      CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
2196      InstrT *newInstr =
2197        CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep);
2198      MachineInstrBuilder(newInstr).addMBB(loopHeader).addReg(immReg, false);
2199
2200      SHOWNEWINSTR(newInstr);
2201
2202      branchInstr->eraseFromParent();
2203      loopLatch->addSuccessor(dummyExitBlk);
2204    }
2205  }
2206
2207  return dummyExitBlk;
2208} //normalizeInfiniteLoopExit
2209
2210template<class PassT>
2211void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
2212  InstrT *branchInstr;
2213
2214  // I saw two unconditional branch in one basic block in example
2215  // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
2216  while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
2217          && CFGTraits::isUncondBranch(branchInstr)) {
2218    if (DEBUGME) {
2219          errs() << "Removing unconditional branch instruction" ;
2220      branchInstr->dump();
2221    }
2222    branchInstr->eraseFromParent();
2223  }
2224} //removeUnconditionalBranch
2225
2226template<class PassT>
2227void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
2228  if (srcBlk->succ_size() == 2) {
2229    BlockT *blk1 = *srcBlk->succ_begin();
2230    BlockT *blk2 = *(++srcBlk->succ_begin());
2231
2232    if (blk1 == blk2) {
2233      InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
2234      assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
2235      if (DEBUGME) {
2236        errs() << "Removing unneeded conditional branch instruction" ;
2237        branchInstr->dump();
2238      }
2239      branchInstr->eraseFromParent();
2240      SHOWNEWBLK(blk1, "Removing redundant successor");
2241      srcBlk->removeSuccessor(blk1);
2242    }
2243  }
2244} //removeRedundantConditionalBranch
2245
2246template<class PassT>
2247void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
2248                                               DEFAULT_VEC_SLOTS> &retBlks) {
2249  BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
2250  funcRep->push_back(dummyExitBlk);  //insert to function
2251  CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep);
2252
2253  for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter =
2254         retBlks.begin(),
2255       iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
2256    BlockT *curBlk = *iter;
2257    InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
2258    if (curInstr) {
2259      curInstr->eraseFromParent();
2260    }
2261#if 0
2262    if (curBlk->size()==0 && curBlk->pred_size() == 1) {
2263      if (DEBUGME) {
2264        errs() << "Replace empty block BB" <<  curBlk->getNumber()
2265          << " with dummyExitBlock\n";
2266      }
2267      BlockT *predb = *curBlk->pred_begin();
2268      predb->removeSuccessor(curBlk);
2269      curBlk = predb;
2270    } //handle empty curBlk
2271#endif
2272    curBlk->addSuccessor(dummyExitBlk);
2273    if (DEBUGME) {
2274      errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
2275             << " successors\n";
2276    }
2277  } //for
2278
2279  SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
2280} //addDummyExitBlock
2281
2282template<class PassT>
2283void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
2284  while (srcBlk->succ_size()) {
2285    srcBlk->removeSuccessor(*srcBlk->succ_begin());
2286  }
2287}
2288
2289template<class PassT>
2290void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
2291  BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
2292
2293  if (srcBlkInfo == NULL) {
2294    srcBlkInfo = new BlockInfo();
2295  }
2296
2297  srcBlkInfo->sccNum = sccNum;
2298}
2299
2300template<class PassT>
2301int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
2302  BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
2303  return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
2304}
2305
2306template<class PassT>
2307void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
2308  if (DEBUGME) {
2309        errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
2310  }
2311
2312  BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
2313
2314  if (srcBlkInfo == NULL) {
2315    srcBlkInfo = new BlockInfo();
2316  }
2317
2318  srcBlkInfo->isRetired = true;
2319  //int i = srcBlk->succ_size();
2320  //int j = srcBlk->pred_size();
2321  assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
2322         && "can't retire block yet");
2323}
2324
2325template<class PassT>
2326bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
2327  BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
2328  return (srcBlkInfo && srcBlkInfo->isRetired);
2329}
2330
2331template<class PassT>
2332bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
2333  LoopT *loopRep = loopInfo->getLoopFor(curBlk);
2334  while (loopRep && loopRep->getHeader() == curBlk) {
2335    LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
2336
2337    if(loopLand == NULL)
2338      return true;
2339
2340    BlockT *landBlk = loopLand->landBlk;
2341    assert(landBlk);
2342    if (!isRetiredBlock(landBlk)) {
2343      return true;
2344    }
2345
2346    loopRep = loopRep->getParentLoop();
2347  }
2348
2349  return false;
2350} //isActiveLoophead
2351
2352template<class PassT>
2353bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
2354  const unsigned blockSizeThreshold = 30;
2355  const unsigned cloneInstrThreshold = 100;
2356
2357  bool multiplePreds = blk && (blk->pred_size() > 1);
2358
2359  if(!multiplePreds)
2360    return false;
2361
2362  unsigned blkSize = blk->size();
2363  return ((blkSize > blockSizeThreshold)
2364          && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
2365} //needMigrateBlock
2366
2367template<class PassT>
2368typename CFGStructurizer<PassT>::BlockT *
2369CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
2370                                            BlockTSmallerVector &exitBlks,
2371                                            std::set<BlockT *> &exitBlkSet) {
2372  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks;  //in exit path blocks
2373
2374  for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
2375       predIterEnd = landBlk->pred_end();
2376       predIter != predIterEnd; ++predIter) {
2377    BlockT *curBlk = *predIter;
2378    if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
2379      inpathBlks.push_back(curBlk);
2380    }
2381  } //for
2382
2383  //if landBlk has predecessors that are not in the given loop,
2384  //create a new block
2385  BlockT *newLandBlk = landBlk;
2386  if (inpathBlks.size() != landBlk->pred_size()) {
2387    newLandBlk = funcRep->CreateMachineBasicBlock();
2388    funcRep->push_back(newLandBlk);  //insert to function
2389    newLandBlk->addSuccessor(landBlk);
2390    for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter =
2391         inpathBlks.begin(),
2392         iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
2393      BlockT *curBlk = *iter;
2394      CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
2395      //srcBlk, oldBlk, newBlk
2396      curBlk->removeSuccessor(landBlk);
2397      curBlk->addSuccessor(newLandBlk);
2398    }
2399    for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
2400      if (exitBlks[i] == landBlk) {
2401        exitBlks[i] = newLandBlk;
2402      }
2403    }
2404    SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
2405  }
2406
2407  setLoopLandBlock(loopRep, newLandBlk);
2408
2409  return newLandBlk;
2410} // recordLoopbreakLand
2411
2412template<class PassT>
2413void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
2414  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2415
2416  if (theEntry == NULL) {
2417    theEntry = new LoopLandInfo();
2418  }
2419  assert(theEntry->landBlk == NULL);
2420
2421  if (blk == NULL) {
2422    blk = funcRep->CreateMachineBasicBlock();
2423    funcRep->push_back(blk);  //insert to function
2424    SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
2425  }
2426
2427  theEntry->landBlk = blk;
2428
2429  if (DEBUGME) {
2430    errs() << "setLoopLandBlock loop-header = BB"
2431           << loopRep->getHeader()->getNumber()
2432           << "  landing-block = BB" << blk->getNumber() << "\n";
2433  }
2434} // setLoopLandBlock
2435
2436template<class PassT>
2437void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
2438  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2439
2440  if (theEntry == NULL) {
2441    theEntry = new LoopLandInfo();
2442  }
2443
2444  theEntry->breakOnRegs.insert(regNum);
2445
2446  if (DEBUGME) {
2447    errs() << "addLoopBreakOnReg loop-header = BB"
2448           << loopRep->getHeader()->getNumber()
2449           << "  regNum = " << regNum << "\n";
2450  }
2451} // addLoopBreakOnReg
2452
2453template<class PassT>
2454void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
2455  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2456
2457  if (theEntry == NULL) {
2458    theEntry = new LoopLandInfo();
2459  }
2460  theEntry->contOnRegs.insert(regNum);
2461
2462  if (DEBUGME) {
2463    errs() << "addLoopContOnReg loop-header = BB"
2464           << loopRep->getHeader()->getNumber()
2465           << "  regNum = " << regNum << "\n";
2466  }
2467} // addLoopContOnReg
2468
2469template<class PassT>
2470void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
2471  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2472
2473  if (theEntry == NULL) {
2474    theEntry = new LoopLandInfo();
2475  }
2476  theEntry->breakInitRegs.insert(regNum);
2477
2478  if (DEBUGME) {
2479    errs() << "addLoopBreakInitReg loop-header = BB"
2480           << loopRep->getHeader()->getNumber()
2481           << "  regNum = " << regNum << "\n";
2482  }
2483} // addLoopBreakInitReg
2484
2485template<class PassT>
2486void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
2487  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2488
2489  if (theEntry == NULL) {
2490    theEntry = new LoopLandInfo();
2491  }
2492  theEntry->contInitRegs.insert(regNum);
2493
2494  if (DEBUGME) {
2495    errs() << "addLoopContInitReg loop-header = BB"
2496           << loopRep->getHeader()->getNumber()
2497           << "  regNum = " << regNum << "\n";
2498  }
2499} // addLoopContInitReg
2500
2501template<class PassT>
2502void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
2503                                                     RegiT regNum) {
2504  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2505
2506  if (theEntry == NULL) {
2507    theEntry = new LoopLandInfo();
2508  }
2509  theEntry->endbranchInitRegs.insert(regNum);
2510
2511  if (DEBUGME)
2512  {
2513        errs() << "addLoopEndbranchInitReg loop-header = BB"
2514      << loopRep->getHeader()->getNumber()
2515      << "  regNum = " << regNum << "\n";
2516  }
2517} // addLoopEndbranchInitReg
2518
2519template<class PassT>
2520typename CFGStructurizer<PassT>::LoopLandInfo *
2521CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
2522  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2523
2524  return theEntry;
2525} // getLoopLandInfo
2526
2527template<class PassT>
2528typename CFGStructurizer<PassT>::BlockT *
2529CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
2530  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2531
2532  return theEntry ? theEntry->landBlk : NULL;
2533} // getLoopLandBlock
2534
2535
2536template<class PassT>
2537bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
2538  LoopT *loopRep = loopInfo->getLoopFor(curBlk);
2539  if (loopRep == NULL)
2540    return false;
2541
2542  BlockT *loopHeader = loopRep->getHeader();
2543
2544  return curBlk->isSuccessor(loopHeader);
2545
2546} //hasBackEdge
2547
2548template<class PassT>
2549unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
2550  return loopRep ? loopRep->getLoopDepth() : 0;
2551} //getLoopDepth
2552
2553template<class PassT>
2554int CFGStructurizer<PassT>::countActiveBlock
2555(typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
2556 typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) {
2557  int count = 0;
2558  while (iterStart != iterEnd) {
2559    if (!isRetiredBlock(*iterStart)) {
2560      ++count;
2561    }
2562    ++iterStart;
2563  }
2564
2565  return count;
2566} //countActiveBlock
2567
2568// This is work around solution for findNearestCommonDominator not avaiable to
2569// post dom a proper fix should go to Dominators.h.
2570
2571template<class PassT>
2572typename CFGStructurizer<PassT>::BlockT*
2573CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
2574
2575  if (postDomTree->dominates(blk1, blk2)) {
2576    return blk1;
2577  }
2578  if (postDomTree->dominates(blk2, blk1)) {
2579    return blk2;
2580  }
2581
2582  DomTreeNodeT *node1 = postDomTree->getNode(blk1);
2583  DomTreeNodeT *node2 = postDomTree->getNode(blk2);
2584
2585  // Handle newly cloned node.
2586  if (node1 == NULL && blk1->succ_size() == 1) {
2587    return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
2588  }
2589  if (node2 == NULL && blk2->succ_size() == 1) {
2590    return findNearestCommonPostDom(blk1, *blk2->succ_begin());
2591  }
2592
2593  if (node1 == NULL || node2 == NULL) {
2594    return NULL;
2595  }
2596
2597  node1 = node1->getIDom();
2598  while (node1) {
2599    if (postDomTree->dominates(node1, node2)) {
2600      return node1->getBlock();
2601    }
2602    node1 = node1->getIDom();
2603  }
2604
2605  return NULL;
2606}
2607
2608template<class PassT>
2609typename CFGStructurizer<PassT>::BlockT *
2610CFGStructurizer<PassT>::findNearestCommonPostDom
2611(typename std::set<BlockT *> &blks) {
2612  BlockT *commonDom;
2613  typename std::set<BlockT *>::const_iterator iter = blks.begin();
2614  typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
2615  for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
2616    BlockT *curBlk = *iter;
2617    if (curBlk != commonDom) {
2618      commonDom = findNearestCommonPostDom(curBlk, commonDom);
2619    }
2620  }
2621
2622  if (DEBUGME) {
2623    errs() << "Common post dominator for exit blocks is ";
2624    if (commonDom) {
2625          errs() << "BB" << commonDom->getNumber() << "\n";
2626    } else {
2627      errs() << "NULL\n";
2628    }
2629  }
2630
2631  return commonDom;
2632} //findNearestCommonPostDom
2633
2634} //end namespace llvm
2635
2636//todo: move-end
2637
2638
2639//===----------------------------------------------------------------------===//
2640//
2641// CFGStructurizer for AMDGPU
2642//
2643//===----------------------------------------------------------------------===//
2644
2645
2646using namespace llvmCFGStruct;
2647
2648namespace llvm
2649{
2650class AMDGPUCFGStructurizer : public MachineFunctionPass
2651{
2652public:
2653  typedef MachineInstr              InstructionType;
2654  typedef MachineFunction           FunctionType;
2655  typedef MachineBasicBlock         BlockType;
2656  typedef MachineLoopInfo           LoopinfoType;
2657  typedef MachineDominatorTree      DominatortreeType;
2658  typedef MachinePostDominatorTree  PostDominatortreeType;
2659  typedef MachineDomTreeNode        DomTreeNodeType;
2660  typedef MachineLoop               LoopType;
2661
2662protected:
2663  TargetMachine &TM;
2664  const TargetInstrInfo *TII;
2665  const AMDGPURegisterInfo *TRI;
2666
2667public:
2668  AMDGPUCFGStructurizer(char &pid, TargetMachine &tm);
2669  const TargetInstrInfo *getTargetInstrInfo() const;
2670  //bool runOnMachineFunction(MachineFunction &F);
2671
2672private:
2673
2674};   //end of class AMDGPUCFGStructurizer
2675
2676//char AMDGPUCFGStructurizer::ID = 0;
2677} //end of namespace llvm
2678AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm
2679                                          )
2680: MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()),
2681  TRI(static_cast<const AMDGPURegisterInfo *>(tm.getRegisterInfo())
2682  ) {
2683}
2684
2685const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const {
2686  return TII;
2687}
2688//===----------------------------------------------------------------------===//
2689//
2690// CFGPrepare
2691//
2692//===----------------------------------------------------------------------===//
2693
2694
2695using namespace llvmCFGStruct;
2696
2697namespace llvm
2698{
2699class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer
2700{
2701public:
2702  static char ID;
2703
2704public:
2705  AMDGPUCFGPrepare(TargetMachine &tm);
2706
2707  virtual const char *getPassName() const;
2708  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
2709
2710  bool runOnMachineFunction(MachineFunction &F);
2711
2712private:
2713
2714};   //end of class AMDGPUCFGPrepare
2715
2716char AMDGPUCFGPrepare::ID = 0;
2717} //end of namespace llvm
2718
2719AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm)
2720  : AMDGPUCFGStructurizer(ID, tm )
2721{
2722}
2723const char *AMDGPUCFGPrepare::getPassName() const {
2724  return "AMD IL Control Flow Graph Preparation Pass";
2725}
2726
2727void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
2728  AU.addPreserved<MachineFunctionAnalysis>();
2729  AU.addRequired<MachineFunctionAnalysis>();
2730  AU.addRequired<MachineDominatorTree>();
2731  AU.addRequired<MachinePostDominatorTree>();
2732  AU.addRequired<MachineLoopInfo>();
2733}
2734
2735//===----------------------------------------------------------------------===//
2736//
2737// CFGPerform
2738//
2739//===----------------------------------------------------------------------===//
2740
2741
2742using namespace llvmCFGStruct;
2743
2744namespace llvm
2745{
2746class AMDGPUCFGPerform : public AMDGPUCFGStructurizer
2747{
2748public:
2749  static char ID;
2750
2751public:
2752  AMDGPUCFGPerform(TargetMachine &tm);
2753  virtual const char *getPassName() const;
2754  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
2755  bool runOnMachineFunction(MachineFunction &F);
2756
2757private:
2758
2759};   //end of class AMDGPUCFGPerform
2760
2761char AMDGPUCFGPerform::ID = 0;
2762} //end of namespace llvm
2763
2764  AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm)
2765: AMDGPUCFGStructurizer(ID, tm)
2766{
2767}
2768
2769const char *AMDGPUCFGPerform::getPassName() const {
2770  return "AMD IL Control Flow Graph structurizer Pass";
2771}
2772
2773void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
2774  AU.addPreserved<MachineFunctionAnalysis>();
2775  AU.addRequired<MachineFunctionAnalysis>();
2776  AU.addRequired<MachineDominatorTree>();
2777  AU.addRequired<MachinePostDominatorTree>();
2778  AU.addRequired<MachineLoopInfo>();
2779}
2780
2781//===----------------------------------------------------------------------===//
2782//
2783// CFGStructTraits<AMDGPUCFGStructurizer>
2784//
2785//===----------------------------------------------------------------------===//
2786
2787namespace llvmCFGStruct
2788{
2789// this class is tailor to the AMDGPU backend
2790template<>
2791struct CFGStructTraits<AMDGPUCFGStructurizer>
2792{
2793  typedef int RegiT;
2794
2795  static int getBreakNzeroOpcode(int oldOpcode) {
2796    switch(oldOpcode) {
2797      case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALNZ_i32;
2798    default:
2799      assert(0 && "internal error");
2800    };
2801    return -1;
2802  }
2803
2804  static int getBreakZeroOpcode(int oldOpcode) {
2805    switch(oldOpcode) {
2806      case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALZ_i32;
2807    default:
2808      assert(0 && "internal error");
2809    };
2810    return -1;
2811  }
2812
2813  static int getBranchNzeroOpcode(int oldOpcode) {
2814    switch(oldOpcode) {
2815    case AMDGPU::JUMP: return AMDGPU::IF_LOGICALNZ_i32;
2816      ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALNZ);
2817      case AMDGPU::SI_IF_NZ: return AMDGPU::SI_IF_NZ;
2818    default:
2819      assert(0 && "internal error");
2820    };
2821    return -1;
2822  }
2823
2824  static int getBranchZeroOpcode(int oldOpcode) {
2825    switch(oldOpcode) {
2826    case AMDGPU::JUMP: return AMDGPU::IF_LOGICALZ_i32;
2827      ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALZ);
2828      case AMDGPU::SI_IF_Z: return AMDGPU::SI_IF_Z;
2829    default:
2830      assert(0 && "internal error");
2831    };
2832    return -1;
2833  }
2834
2835  static int getContinueNzeroOpcode(int oldOpcode)
2836  {
2837    switch(oldOpcode) {
2838      case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
2839      default:
2840        assert(0 && "internal error");
2841    };
2842    return -1;
2843  }
2844
2845  static int getContinueZeroOpcode(int oldOpcode) {
2846    switch(oldOpcode) {
2847      case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
2848    default:
2849      assert(0 && "internal error");
2850    };
2851    return -1;
2852  }
2853
2854// the explicitly represented branch target is the true branch target
2855#define getExplicitBranch getTrueBranch
2856#define setExplicitBranch setTrueBranch
2857
2858  static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
2859    return instr->getOperand(0).getMBB();
2860  }
2861
2862  static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
2863    instr->getOperand(0).setMBB(blk);
2864  }
2865
2866  static MachineBasicBlock *
2867  getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
2868    assert(blk->succ_size() == 2);
2869    MachineBasicBlock *trueBranch = getTrueBranch(instr);
2870    MachineBasicBlock::succ_iterator iter = blk->succ_begin();
2871    MachineBasicBlock::succ_iterator iterNext = iter;
2872    ++iterNext;
2873
2874    return (*iter == trueBranch) ? *iterNext : *iter;
2875  }
2876
2877  static bool isCondBranch(MachineInstr *instr) {
2878    switch (instr->getOpcode()) {
2879      case AMDGPU::JUMP:
2880        return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0;
2881      ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
2882      case AMDGPU::SI_IF_NZ:
2883      case AMDGPU::SI_IF_Z:
2884      break;
2885    default:
2886      return false;
2887    }
2888    return true;
2889  }
2890
2891  static bool isUncondBranch(MachineInstr *instr) {
2892    switch (instr->getOpcode()) {
2893    case AMDGPU::JUMP:
2894      return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0;
2895    default:
2896      return false;
2897    }
2898    return true;
2899  }
2900
2901  static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
2902    //get DebugLoc from the first MachineBasicBlock instruction with debug info
2903    DebugLoc DL;
2904	for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
2905	  MachineInstr *instr = &(*iter);
2906	  if (instr->getDebugLoc().isUnknown() == false) {
2907	    DL = instr->getDebugLoc();
2908	  }
2909    }
2910    return DL;
2911  }
2912
2913  static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
2914    MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2915    MachineInstr *instr = &*iter;
2916    if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
2917      return instr;
2918    }
2919    return NULL;
2920  }
2921
2922  // The correct naming for this is getPossibleLoopendBlockBranchInstr.
2923  //
2924  // BB with backward-edge could have move instructions after the branch
2925  // instruction.  Such move instruction "belong to" the loop backward-edge.
2926  //
2927  static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
2928    const AMDGPUInstrInfo * TII = static_cast<const AMDGPUInstrInfo *>(
2929                                  blk->getParent()->getTarget().getInstrInfo());
2930
2931    for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
2932         iterEnd = blk->rend(); iter != iterEnd; ++iter) {
2933      // FIXME: Simplify
2934      MachineInstr *instr = &*iter;
2935      if (instr) {
2936        if (isCondBranch(instr) || isUncondBranch(instr)) {
2937          return instr;
2938        } else if (!TII->isMov(instr->getOpcode())) {
2939          break;
2940        }
2941      }
2942    }
2943    return NULL;
2944  }
2945
2946  static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
2947    MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2948    if (iter != blk->rend()) {
2949      MachineInstr *instr = &(*iter);
2950      if (instr->getOpcode() == AMDGPU::RETURN) {
2951        return instr;
2952      }
2953    }
2954    return NULL;
2955  }
2956
2957  static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
2958    MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2959    if (iter != blk->rend()) {
2960      MachineInstr *instr = &(*iter);
2961      if (instr->getOpcode() == AMDGPU::CONTINUE) {
2962        return instr;
2963      }
2964    }
2965    return NULL;
2966  }
2967
2968  static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
2969    for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
2970      MachineInstr *instr = &(*iter);
2971      if ((instr->getOpcode() == AMDGPU::BREAK_LOGICALNZ_i32) || (instr->getOpcode() == AMDGPU::BREAK_LOGICALZ_i32)) {
2972        return instr;
2973      }
2974    }
2975    return NULL;
2976  }
2977
2978  static bool isReturnBlock(MachineBasicBlock *blk) {
2979    MachineInstr *instr = getReturnInstr(blk);
2980    bool isReturn = (blk->succ_size() == 0);
2981    if (instr) {
2982      assert(isReturn);
2983    } else if (isReturn) {
2984      if (DEBUGME) {
2985        errs() << "BB" << blk->getNumber()
2986               <<" is return block without RETURN instr\n";
2987      }
2988    }
2989
2990    return  isReturn;
2991  }
2992
2993  static MachineBasicBlock::iterator
2994  getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
2995    assert(instr->getParent() == blk && "instruction doesn't belong to block");
2996    MachineBasicBlock::iterator iter = blk->begin();
2997    MachineBasicBlock::iterator iterEnd = blk->end();
2998    while (&(*iter) != instr && iter != iterEnd) {
2999      ++iter;
3000    }
3001
3002    assert(iter != iterEnd);
3003    return iter;
3004  }//getInstrPos
3005
3006  static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
3007                                         AMDGPUCFGStructurizer *passRep) {
3008    return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
3009  } //insertInstrBefore
3010
3011  static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
3012                                         AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
3013    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3014    MachineInstr *newInstr =
3015      blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
3016
3017    MachineBasicBlock::iterator res;
3018    if (blk->begin() != blk->end()) {
3019      blk->insert(blk->begin(), newInstr);
3020    } else {
3021      blk->push_back(newInstr);
3022    }
3023
3024    SHOWNEWINSTR(newInstr);
3025
3026    return newInstr;
3027  } //insertInstrBefore
3028
3029  static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
3030                             AMDGPUCFGStructurizer *passRep) {
3031    insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
3032  } //insertInstrEnd
3033
3034  static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
3035                             AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
3036    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3037   MachineInstr *newInstr = blk->getParent()
3038      ->CreateMachineInstr(tii->get(newOpcode), DL);
3039
3040    blk->push_back(newInstr);
3041    //assume the instruction doesn't take any reg operand ...
3042
3043    SHOWNEWINSTR(newInstr);
3044  } //insertInstrEnd
3045
3046  static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
3047                                         int newOpcode,
3048                                         AMDGPUCFGStructurizer *passRep) {
3049    MachineInstr *oldInstr = &(*instrPos);
3050    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3051    MachineBasicBlock *blk = oldInstr->getParent();
3052    MachineInstr *newInstr =
3053      blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
3054                                           DebugLoc());
3055
3056    blk->insert(instrPos, newInstr);
3057    //assume the instruction doesn't take any reg operand ...
3058
3059    SHOWNEWINSTR(newInstr);
3060    return newInstr;
3061  } //insertInstrBefore
3062
3063  static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
3064                                     int newOpcode,
3065                                     AMDGPUCFGStructurizer *passRep,
3066									 DebugLoc DL) {
3067    MachineInstr *oldInstr = &(*instrPos);
3068    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3069    MachineBasicBlock *blk = oldInstr->getParent();
3070    MachineInstr *newInstr =
3071      blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
3072                                           DL);
3073
3074    blk->insert(instrPos, newInstr);
3075    MachineInstrBuilder(newInstr).addReg(oldInstr->getOperand(1).getReg(),
3076                                         false);
3077
3078    SHOWNEWINSTR(newInstr);
3079    //erase later oldInstr->eraseFromParent();
3080  } //insertCondBranchBefore
3081
3082  static void insertCondBranchBefore(MachineBasicBlock *blk,
3083                                     MachineBasicBlock::iterator insertPos,
3084                                     int newOpcode,
3085                                     AMDGPUCFGStructurizer *passRep,
3086                                     RegiT regNum,
3087									 DebugLoc DL) {
3088    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3089
3090    MachineInstr *newInstr =
3091      blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
3092
3093    //insert before
3094    blk->insert(insertPos, newInstr);
3095    MachineInstrBuilder(newInstr).addReg(regNum, false);
3096
3097    SHOWNEWINSTR(newInstr);
3098  } //insertCondBranchBefore
3099
3100  static void insertCondBranchEnd(MachineBasicBlock *blk,
3101                                  int newOpcode,
3102                                  AMDGPUCFGStructurizer *passRep,
3103                                  RegiT regNum) {
3104    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3105    MachineInstr *newInstr =
3106      blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
3107
3108    blk->push_back(newInstr);
3109    MachineInstrBuilder(newInstr).addReg(regNum, false);
3110
3111    SHOWNEWINSTR(newInstr);
3112  } //insertCondBranchEnd
3113
3114
3115  static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
3116                                      AMDGPUCFGStructurizer *passRep,
3117                                      RegiT regNum, int regVal) {
3118    MachineInstr *oldInstr = &(*instrPos);
3119    const AMDGPUInstrInfo *tii =
3120             static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
3121    MachineBasicBlock *blk = oldInstr->getParent();
3122    MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
3123                                                 regVal);
3124    blk->insert(instrPos, newInstr);
3125
3126    SHOWNEWINSTR(newInstr);
3127  } //insertAssignInstrBefore
3128
3129  static void insertAssignInstrBefore(MachineBasicBlock *blk,
3130                                      AMDGPUCFGStructurizer *passRep,
3131                                      RegiT regNum, int regVal) {
3132    const AMDGPUInstrInfo *tii =
3133             static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
3134
3135    MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
3136                                                 regVal);
3137    if (blk->begin() != blk->end()) {
3138      blk->insert(blk->begin(), newInstr);
3139    } else {
3140      blk->push_back(newInstr);
3141    }
3142
3143    SHOWNEWINSTR(newInstr);
3144
3145  } //insertInstrBefore
3146
3147  static void insertCompareInstrBefore(MachineBasicBlock *blk,
3148                                       MachineBasicBlock::iterator instrPos,
3149                                       AMDGPUCFGStructurizer *passRep,
3150                                       RegiT dstReg, RegiT src1Reg,
3151                                       RegiT src2Reg) {
3152    const AMDGPUInstrInfo *tii =
3153             static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
3154    MachineInstr *newInstr =
3155      blk->getParent()->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc());
3156
3157    MachineInstrBuilder(newInstr).addReg(dstReg, RegState::Define); //set target
3158    MachineInstrBuilder(newInstr).addReg(src1Reg); //set src value
3159    MachineInstrBuilder(newInstr).addReg(src2Reg); //set src value
3160
3161    blk->insert(instrPos, newInstr);
3162    SHOWNEWINSTR(newInstr);
3163
3164  } //insertCompareInstrBefore
3165
3166  static void cloneSuccessorList(MachineBasicBlock *dstBlk,
3167                                 MachineBasicBlock *srcBlk) {
3168    for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
3169         iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
3170      dstBlk->addSuccessor(*iter);  // *iter's predecessor is also taken care of
3171    }
3172  } //cloneSuccessorList
3173
3174  static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
3175    MachineFunction *func = srcBlk->getParent();
3176    MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
3177    func->push_back(newBlk);  //insert to function
3178    //newBlk->setNumber(srcBlk->getNumber());
3179    for (MachineBasicBlock::iterator iter = srcBlk->begin(),
3180         iterEnd = srcBlk->end();
3181         iter != iterEnd; ++iter) {
3182      MachineInstr *instr = func->CloneMachineInstr(iter);
3183      newBlk->push_back(instr);
3184    }
3185    return newBlk;
3186  }
3187
3188  //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
3189  //the AMDGPU instruction is not recognized as terminator fix this and retire
3190  //this routine
3191  static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
3192                                         MachineBasicBlock *oldBlk,
3193                                         MachineBasicBlock *newBlk) {
3194    MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
3195    if (branchInstr && isCondBranch(branchInstr) &&
3196        getExplicitBranch(branchInstr) == oldBlk) {
3197      setExplicitBranch(branchInstr, newBlk);
3198    }
3199  }
3200
3201  static void wrapup(MachineBasicBlock *entryBlk) {
3202    assert((!entryBlk->getParent()->getJumpTableInfo()
3203            || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
3204           && "found a jump table");
3205
3206     //collect continue right before endloop
3207     SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
3208     MachineBasicBlock::iterator pre = entryBlk->begin();
3209     MachineBasicBlock::iterator iterEnd = entryBlk->end();
3210     MachineBasicBlock::iterator iter = pre;
3211     while (iter != iterEnd) {
3212       if (pre->getOpcode() == AMDGPU::CONTINUE
3213           && iter->getOpcode() == AMDGPU::ENDLOOP) {
3214         contInstr.push_back(pre);
3215       }
3216       pre = iter;
3217       ++iter;
3218     } //end while
3219
3220     //delete continue right before endloop
3221     for (unsigned i = 0; i < contInstr.size(); ++i) {
3222        contInstr[i]->eraseFromParent();
3223     }
3224
3225     // TODO to fix up jump table so later phase won't be confused.  if
3226     // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
3227     // there isn't such an interface yet.  alternatively, replace all the other
3228     // blocks in the jump table with the entryBlk //}
3229
3230  } //wrapup
3231
3232  static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) {
3233    return &pass.getAnalysis<MachineDominatorTree>();
3234  }
3235
3236  static MachinePostDominatorTree*
3237  getPostDominatorTree(AMDGPUCFGStructurizer &pass) {
3238    return &pass.getAnalysis<MachinePostDominatorTree>();
3239  }
3240
3241  static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) {
3242    return &pass.getAnalysis<MachineLoopInfo>();
3243  }
3244}; // template class CFGStructTraits
3245} //end of namespace llvm
3246
3247// createAMDGPUCFGPreparationPass- Returns a pass
3248FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm
3249                                                 ) {
3250  return new AMDGPUCFGPrepare(tm );
3251}
3252
3253bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) {
3254  return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().prepare(func,
3255                                                                        *this,
3256                                                                        TRI);
3257}
3258
3259// createAMDGPUCFGStructurizerPass- Returns a pass
3260FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm
3261                                                  ) {
3262  return new AMDGPUCFGPerform(tm );
3263}
3264
3265bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) {
3266  return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().run(func,
3267                                                                    *this,
3268                                                                    TRI);
3269}
3270
3271//end of file newline goes below
3272
3273