AMDILCFGStructurizer.cpp revision 3a0187b1b53eca3143286a5ae7917cd71117b902
1//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9
10#define DEBUGME 0
11#define DEBUG_TYPE "structcfg"
12
13#include "AMDIL.h"
14#include "AMDILInstrInfo.h"
15#include "AMDILUtilityFunctions.h"
16#include "llvm/ADT/SCCIterator.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/Analysis/DominatorInternals.h"
20#include "llvm/Analysis/Dominators.h"
21#include "llvm/CodeGen/MachineDominators.h"
22#include "llvm/CodeGen/MachineDominators.h"
23#include "llvm/CodeGen/MachineFunction.h"
24#include "llvm/CodeGen/MachineFunctionAnalysis.h"
25#include "llvm/CodeGen/MachineFunctionPass.h"
26#include "llvm/CodeGen/MachineFunctionPass.h"
27#include "llvm/CodeGen/MachineInstrBuilder.h"
28#include "llvm/CodeGen/MachineJumpTableInfo.h"
29#include "llvm/CodeGen/MachineLoopInfo.h"
30#include "llvm/CodeGen/MachineRegisterInfo.h"
31#include "llvm/Target/TargetInstrInfo.h"
32
33#define FirstNonDebugInstr(A) A->begin()
34using namespace llvm;
35
36// TODO: move-begin.
37
38//===----------------------------------------------------------------------===//
39//
40// Statistics for CFGStructurizer.
41//
42//===----------------------------------------------------------------------===//
43
44STATISTIC(numSerialPatternMatch,    "CFGStructurizer number of serial pattern "
45    "matched");
46STATISTIC(numIfPatternMatch,        "CFGStructurizer number of if pattern "
47    "matched");
48STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
49    "pattern matched");
50STATISTIC(numLoopcontPatternMatch,  "CFGStructurizer number of loop-continue "
51    "pattern matched");
52STATISTIC(numLoopPatternMatch,      "CFGStructurizer number of loop pattern "
53    "matched");
54STATISTIC(numClonedBlock,           "CFGStructurizer cloned blocks");
55STATISTIC(numClonedInstr,           "CFGStructurizer cloned instructions");
56
57//===----------------------------------------------------------------------===//
58//
59// Miscellaneous utility for CFGStructurizer.
60//
61//===----------------------------------------------------------------------===//
62namespace llvmCFGStruct
63{
64#define SHOWNEWINSTR(i) \
65  if (DEBUGME) errs() << "New instr: " << *i << "\n"
66
67#define SHOWNEWBLK(b, msg) \
68if (DEBUGME) { \
69  errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
70  errs() << "\n"; \
71}
72
73#define SHOWBLK_DETAIL(b, msg) \
74if (DEBUGME) { \
75  if (b) { \
76  errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
77  b->print(errs()); \
78  errs() << "\n"; \
79  } \
80}
81
82#define INVALIDSCCNUM -1
83#define INVALIDREGNUM 0
84
85template<class LoopinfoT>
86void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
87  for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
88       iterEnd = LoopInfo.end();
89       iter != iterEnd; ++iter) {
90    (*iter)->print(OS, 0);
91  }
92}
93
94template<class NodeT>
95void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
96  size_t sz = Src.size();
97  for (size_t i = 0; i < sz/2; ++i) {
98    NodeT *t = Src[i];
99    Src[i] = Src[sz - i - 1];
100    Src[sz - i - 1] = t;
101  }
102}
103
104} //end namespace llvmCFGStruct
105
106
107//===----------------------------------------------------------------------===//
108//
109// MachinePostDominatorTree
110//
111//===----------------------------------------------------------------------===//
112
113namespace llvm {
114
115/// PostDominatorTree Class - Concrete subclass of DominatorTree that is used
116/// to compute the a post-dominator tree.
117///
118struct MachinePostDominatorTree : public MachineFunctionPass {
119  static char ID; // Pass identification, replacement for typeid
120  DominatorTreeBase<MachineBasicBlock> *DT;
121  MachinePostDominatorTree() : MachineFunctionPass(ID)
122  {
123    DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate
124    // postdominator
125  }
126
127  ~MachinePostDominatorTree();
128
129  virtual bool runOnMachineFunction(MachineFunction &MF);
130
131  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
132    AU.setPreservesAll();
133    MachineFunctionPass::getAnalysisUsage(AU);
134  }
135
136  inline const std::vector<MachineBasicBlock *> &getRoots() const {
137    return DT->getRoots();
138  }
139
140  inline MachineDomTreeNode *getRootNode() const {
141    return DT->getRootNode();
142  }
143
144  inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const {
145    return DT->getNode(BB);
146  }
147
148  inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const {
149    return DT->getNode(BB);
150  }
151
152  inline bool dominates(MachineDomTreeNode *A, MachineDomTreeNode *B) const {
153    return DT->dominates(A, B);
154  }
155
156  inline bool dominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
157    return DT->dominates(A, B);
158  }
159
160  inline bool
161  properlyDominates(const MachineDomTreeNode *A, MachineDomTreeNode *B) const {
162    return DT->properlyDominates(A, B);
163  }
164
165  inline bool
166  properlyDominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
167    return DT->properlyDominates(A, B);
168  }
169
170  inline MachineBasicBlock *
171  findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B) {
172    return DT->findNearestCommonDominator(A, B);
173  }
174
175  virtual void print(llvm::raw_ostream &OS, const Module *M = 0) const {
176    DT->print(OS);
177  }
178};
179} //end of namespace llvm
180
181char MachinePostDominatorTree::ID = 0;
182static RegisterPass<MachinePostDominatorTree>
183machinePostDominatorTreePass("machinepostdomtree",
184                             "MachinePostDominator Tree Construction",
185                             true, true);
186
187//const PassInfo *const llvm::MachinePostDominatorsID
188//= &machinePostDominatorTreePass;
189
190bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) {
191  DT->recalculate(F);
192  //DEBUG(DT->dump());
193  return false;
194}
195
196MachinePostDominatorTree::~MachinePostDominatorTree() {
197  delete DT;
198}
199
200//===----------------------------------------------------------------------===//
201//
202// supporting data structure for CFGStructurizer
203//
204//===----------------------------------------------------------------------===//
205
206namespace llvmCFGStruct
207{
208template<class PassT>
209struct CFGStructTraits {
210};
211
212template <class InstrT>
213class BlockInformation {
214public:
215  bool isRetired;
216  int  sccNum;
217  //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
218  //Instructions defining the corresponding successor.
219  BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
220};
221
222template <class BlockT, class InstrT, class RegiT>
223class LandInformation {
224public:
225  BlockT *landBlk;
226  std::set<RegiT> breakInitRegs;  //Registers that need to "reg = 0", before
227                                  //WHILELOOP(thisloop) init before entering
228                                  //thisloop.
229  std::set<RegiT> contInitRegs;   //Registers that need to "reg = 0", after
230                                  //WHILELOOP(thisloop) init after entering
231                                  //thisloop.
232  std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
233                                     //land block, branch cond on this reg.
234  std::set<RegiT> breakOnRegs;       //registers that need to "if (reg) break
235                                     //endif" after ENDLOOP(thisloop) break
236                                     //outerLoopOf(thisLoop).
237  std::set<RegiT> contOnRegs;       //registers that need to "if (reg) continue
238                                    //endif" after ENDLOOP(thisloop) continue on
239                                    //outerLoopOf(thisLoop).
240  LandInformation() : landBlk(NULL) {}
241};
242
243} //end of namespace llvmCFGStruct
244
245//===----------------------------------------------------------------------===//
246//
247// CFGStructurizer
248//
249//===----------------------------------------------------------------------===//
250
251namespace llvmCFGStruct
252{
253// bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
254template<class PassT>
255class  CFGStructurizer
256{
257public:
258  typedef enum {
259    Not_SinglePath = 0,
260    SinglePath_InPath = 1,
261    SinglePath_NotInPath = 2
262  } PathToKind;
263
264public:
265  typedef typename PassT::InstructionType         InstrT;
266  typedef typename PassT::FunctionType            FuncT;
267  typedef typename PassT::DominatortreeType       DomTreeT;
268  typedef typename PassT::PostDominatortreeType   PostDomTreeT;
269  typedef typename PassT::DomTreeNodeType         DomTreeNodeT;
270  typedef typename PassT::LoopinfoType            LoopInfoT;
271
272  typedef GraphTraits<FuncT *>                    FuncGTraits;
273  //typedef FuncGTraits::nodes_iterator BlockIterator;
274  typedef typename FuncT::iterator                BlockIterator;
275
276  typedef typename FuncGTraits::NodeType          BlockT;
277  typedef GraphTraits<BlockT *>                   BlockGTraits;
278  typedef GraphTraits<Inverse<BlockT *> >         InvBlockGTraits;
279  //typedef BlockGTraits::succ_iterator InstructionIterator;
280  typedef typename BlockT::iterator               InstrIterator;
281
282  typedef CFGStructTraits<PassT>                  CFGTraits;
283  typedef BlockInformation<InstrT>                BlockInfo;
284  typedef std::map<BlockT *, BlockInfo *>         BlockInfoMap;
285
286  typedef int                                     RegiT;
287  typedef typename PassT::LoopType                LoopT;
288  typedef LandInformation<BlockT, InstrT, RegiT>  LoopLandInfo;
289        typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
290        //landing info for loop break
291  typedef SmallVector<BlockT *, 32>               BlockTSmallerVector;
292
293public:
294  CFGStructurizer();
295  ~CFGStructurizer();
296
297  /// Perform the CFG structurization
298  bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
299
300  /// Perform the CFG preparation
301  bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
302
303private:
304  void   orderBlocks();
305  void   printOrderedBlocks(llvm::raw_ostream &OS);
306  int patternMatch(BlockT *CurBlock);
307  int patternMatchGroup(BlockT *CurBlock);
308
309  int serialPatternMatch(BlockT *CurBlock);
310  int ifPatternMatch(BlockT *CurBlock);
311  int switchPatternMatch(BlockT *CurBlock);
312  int loopendPatternMatch(BlockT *CurBlock);
313  int loopPatternMatch(BlockT *CurBlock);
314
315  int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
316  int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
317  //int loopWithoutBreak(BlockT *);
318
319  void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
320                        BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
321  void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
322                           BlockT *ContBlock, LoopT *contLoop);
323  bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
324  int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
325                       BlockT *FalseBlock);
326  int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
327                          BlockT *FalseBlock);
328  int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
329                              BlockT *FalseBlock, BlockT **LandBlockPtr);
330  void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
331                                   BlockT *FalseBlock, BlockT *LandBlock,
332                                   bool Detail = false);
333  PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
334                          bool AllowSideEntry = true);
335  BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
336                        bool AllowSideEntry = true);
337  int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
338  void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
339
340  void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
341                            BlockT *TrueBlock, BlockT *FalseBlock,
342                            BlockT *LandBlock);
343  void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
344  void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
345                           BlockT *ExitLandBlock, RegiT SetReg);
346  void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
347                           RegiT SetReg);
348  BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
349                                std::set<BlockT*> &ExitBlockSet,
350                                BlockT *ExitLandBlk);
351  BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
352                                BlockTSmallerVector &ExitingBlocks,
353                                BlockTSmallerVector &ExitBlocks);
354  BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
355  void removeUnconditionalBranch(BlockT *SrcBlock);
356  void removeRedundantConditionalBranch(BlockT *SrcBlock);
357  void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
358
359  void removeSuccessor(BlockT *SrcBlock);
360  BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
361  BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
362
363  void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
364                          InstrIterator InsertPos);
365
366  void recordSccnum(BlockT *SrcBlock, int SCCNum);
367  int getSCCNum(BlockT *srcBlk);
368
369  void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
370  bool isRetiredBlock(BlockT *SrcBlock);
371  bool isActiveLoophead(BlockT *CurBlock);
372  bool needMigrateBlock(BlockT *Block);
373
374  BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
375                              BlockTSmallerVector &exitBlocks,
376                              std::set<BlockT*> &ExitBlockSet);
377  void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
378  BlockT *getLoopLandBlock(LoopT *LoopRep);
379  LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
380
381  void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
382  void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
383  void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
384  void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
385  void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
386
387  bool hasBackEdge(BlockT *curBlock);
388  unsigned getLoopDepth  (LoopT *LoopRep);
389  int countActiveBlock(
390    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
391    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
392    BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
393  BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
394
395private:
396  DomTreeT *domTree;
397  PostDomTreeT *postDomTree;
398  LoopInfoT *loopInfo;
399  PassT *passRep;
400  FuncT *funcRep;
401
402  BlockInfoMap blockInfoMap;
403  LoopLandInfoMap loopLandInfoMap;
404  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
405  const AMDGPURegisterInfo *TRI;
406
407};  //template class CFGStructurizer
408
409template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
410  : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
411}
412
413template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
414  for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
415       E = blockInfoMap.end(); I != E; ++I) {
416    delete I->second;
417  }
418}
419
420template<class PassT>
421bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
422                                     const AMDGPURegisterInfo * tri) {
423  passRep = &pass;
424  funcRep = &func;
425  TRI = tri;
426
427  bool changed = false;
428  //func.RenumberBlocks();
429
430  //to do, if not reducible flow graph, make it so ???
431
432  if (DEBUGME) {
433        errs() << "AMDILCFGStructurizer::prepare\n";
434    //func.viewCFG();
435    //func.viewCFGOnly();
436    //func.dump();
437  }
438
439  //FIXME: gcc complains on this.
440  //domTree = &pass.getAnalysis<DomTreeT>();
441      //domTree = CFGTraits::getDominatorTree(pass);
442      //if (DEBUGME) {
443      //    domTree->print(errs());
444    //}
445
446  //FIXME: gcc complains on this.
447  //domTree = &pass.getAnalysis<DomTreeT>();
448      //postDomTree = CFGTraits::getPostDominatorTree(pass);
449      //if (DEBUGME) {
450      //   postDomTree->print(errs());
451    //}
452
453  //FIXME: gcc complains on this.
454  //loopInfo = &pass.getAnalysis<LoopInfoT>();
455  loopInfo = CFGTraits::getLoopInfo(pass);
456  if (DEBUGME) {
457    errs() << "LoopInfo:\n";
458    PrintLoopinfo(*loopInfo, errs());
459  }
460
461  orderBlocks();
462  if (DEBUGME) {
463    errs() << "Ordered blocks:\n";
464    printOrderedBlocks(errs());
465  }
466
467  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
468
469  for (typename LoopInfoT::iterator iter = loopInfo->begin(),
470       iterEnd = loopInfo->end();
471       iter != iterEnd; ++iter) {
472    LoopT* loopRep = (*iter);
473    BlockTSmallerVector exitingBlks;
474    loopRep->getExitingBlocks(exitingBlks);
475
476    if (exitingBlks.size() == 0) {
477      BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
478      if (dummyExitBlk != NULL)
479        retBlks.push_back(dummyExitBlk);
480    }
481  }
482
483  // Remove unconditional branch instr.
484  // Add dummy exit block iff there are multiple returns.
485
486  for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
487       iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
488       iterBlk != iterEndBlk;
489       ++iterBlk) {
490    BlockT *curBlk = *iterBlk;
491    removeUnconditionalBranch(curBlk);
492    removeRedundantConditionalBranch(curBlk);
493    if (CFGTraits::isReturnBlock(curBlk)) {
494      retBlks.push_back(curBlk);
495    }
496    assert(curBlk->succ_size() <= 2);
497    //assert(curBlk->size() > 0);
498    //removeEmptyBlock(curBlk) ??
499  } //for
500
501  if (retBlks.size() >= 2) {
502    addDummyExitBlock(retBlks);
503    changed = true;
504  }
505
506  return changed;
507} //CFGStructurizer::prepare
508
509template<class PassT>
510bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
511    const AMDGPURegisterInfo * tri) {
512  passRep = &pass;
513  funcRep = &func;
514  TRI = tri;
515
516  //func.RenumberBlocks();
517
518  //Assume reducible CFG...
519  if (DEBUGME) {
520    errs() << "AMDILCFGStructurizer::run\n";
521    //errs() << func.getFunction()->getNameStr() << "\n";
522    func.viewCFG();
523    //func.viewCFGOnly();
524    //func.dump();
525  }
526
527#if 1
528  //FIXME: gcc complains on this.
529  //domTree = &pass.getAnalysis<DomTreeT>();
530  domTree = CFGTraits::getDominatorTree(pass);
531  if (DEBUGME) {
532    domTree->print(errs(), (const llvm::Module*)0);
533  }
534#endif
535
536  //FIXME: gcc complains on this.
537  //domTree = &pass.getAnalysis<DomTreeT>();
538  postDomTree = CFGTraits::getPostDominatorTree(pass);
539  if (DEBUGME) {
540    postDomTree->print(errs());
541  }
542
543  //FIXME: gcc complains on this.
544  //loopInfo = &pass.getAnalysis<LoopInfoT>();
545  loopInfo = CFGTraits::getLoopInfo(pass);
546  if (DEBUGME) {
547    errs() << "LoopInfo:\n";
548    PrintLoopinfo(*loopInfo, errs());
549  }
550
551  orderBlocks();
552//#define STRESSTEST
553#ifdef STRESSTEST
554  //Use the worse block ordering to test the algorithm.
555  ReverseVector(orderedBlks);
556#endif
557
558  if (DEBUGME) {
559    errs() << "Ordered blocks:\n";
560    printOrderedBlocks(errs());
561  }
562  int numIter = 0;
563  bool finish = false;
564  BlockT *curBlk;
565  bool makeProgress = false;
566  int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
567                                        orderedBlks.end());
568
569  do {
570    ++numIter;
571    if (DEBUGME) {
572      errs() << "numIter = " << numIter
573             << ", numRemaintedBlk = " << numRemainedBlk << "\n";
574    }
575
576    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
577      iterBlk = orderedBlks.begin();
578    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
579      iterBlkEnd = orderedBlks.end();
580
581    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
582      sccBeginIter = iterBlk;
583    BlockT *sccBeginBlk = NULL;
584    int sccNumBlk = 0;  // The number of active blocks, init to a
585                        // maximum possible number.
586    int sccNumIter;     // Number of iteration in this SCC.
587
588    while (iterBlk != iterBlkEnd) {
589      curBlk = *iterBlk;
590
591      if (sccBeginBlk == NULL) {
592        sccBeginIter = iterBlk;
593        sccBeginBlk = curBlk;
594        sccNumIter = 0;
595        sccNumBlk = numRemainedBlk; // Init to maximum possible number.
596        if (DEBUGME) {
597              errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
598              errs() << "\n";
599        }
600      }
601
602      if (!isRetiredBlock(curBlk)) {
603        patternMatch(curBlk);
604      }
605
606      ++iterBlk;
607
608      bool contNextScc = true;
609      if (iterBlk == iterBlkEnd
610          || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
611        // Just finish one scc.
612        ++sccNumIter;
613        int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
614        if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
615          if (DEBUGME) {
616            errs() << "Can't reduce SCC " << getSCCNum(curBlk)
617                   << ", sccNumIter = " << sccNumIter;
618            errs() << "doesn't make any progress\n";
619          }
620          contNextScc = true;
621        } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
622          sccNumBlk = sccRemainedNumBlk;
623          iterBlk = sccBeginIter;
624          contNextScc = false;
625          if (DEBUGME) {
626            errs() << "repeat processing SCC" << getSCCNum(curBlk)
627                   << "sccNumIter = " << sccNumIter << "\n";
628            func.viewCFG();
629            //func.viewCFGOnly();
630          }
631        } else {
632          // Finish the current scc.
633          contNextScc = true;
634        }
635      } else {
636        // Continue on next component in the current scc.
637        contNextScc = false;
638      }
639
640      if (contNextScc) {
641        sccBeginBlk = NULL;
642      }
643    } //while, "one iteration" over the function.
644
645    BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
646    if (entryBlk->succ_size() == 0) {
647      finish = true;
648      if (DEBUGME) {
649        errs() << "Reduce to one block\n";
650      }
651    } else {
652      int newnumRemainedBlk
653        = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
654      // consider cloned blocks ??
655      if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
656        makeProgress = true;
657        numRemainedBlk = newnumRemainedBlk;
658      } else {
659        makeProgress = false;
660        if (DEBUGME) {
661          errs() << "No progress\n";
662        }
663      }
664    }
665  } while (!finish && makeProgress);
666
667  // Misc wrap up to maintain the consistency of the Function representation.
668  CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
669
670  // Detach retired Block, release memory.
671  for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
672       iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
673    if ((*iterMap).second && (*iterMap).second->isRetired) {
674      assert(((*iterMap).first)->getNumber() != -1);
675      if (DEBUGME) {
676        errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
677      }
678      (*iterMap).first->eraseFromParent();  //Remove from the parent Function.
679    }
680    delete (*iterMap).second;
681  }
682  blockInfoMap.clear();
683
684  // clear loopLandInfoMap
685  for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
686       iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
687    delete (*iterMap).second;
688  }
689  loopLandInfoMap.clear();
690
691  if (DEBUGME) {
692    func.viewCFG();
693    //func.dump();
694  }
695
696  if (!finish) {
697    assert(!"IRREDUCIBL_CF");
698  }
699
700  return true;
701} //CFGStructurizer::run
702
703/// Print the ordered Blocks.
704///
705template<class PassT>
706void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
707  size_t i = 0;
708  for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
709      iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
710       iterBlk != iterBlkEnd;
711       ++iterBlk, ++i) {
712    os << "BB" << (*iterBlk)->getNumber();
713    os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
714    if (i != 0 && i % 10 == 0) {
715      os << "\n";
716    } else {
717      os << " ";
718    }
719  }
720} //printOrderedBlocks
721
722/// Compute the reversed DFS post order of Blocks
723///
724template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
725  int sccNum = 0;
726  BlockT *bb;
727  for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
728       sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
729    std::vector<BlockT *> &sccNext = *sccIter;
730    for (typename std::vector<BlockT *>::const_iterator
731         blockIter = sccNext.begin(), blockEnd = sccNext.end();
732         blockIter != blockEnd; ++blockIter) {
733      bb = *blockIter;
734      orderedBlks.push_back(bb);
735      recordSccnum(bb, sccNum);
736    }
737  }
738
739  //walk through all the block in func to check for unreachable
740  for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
741       blockEnd1 = FuncGTraits::nodes_end(funcRep);
742       blockIter1 != blockEnd1; ++blockIter1) {
743    BlockT *bb = &(*blockIter1);
744    sccNum = getSCCNum(bb);
745    if (sccNum == INVALIDSCCNUM) {
746      errs() << "unreachable block BB" << bb->getNumber() << "\n";
747    }
748  } //end of for
749} //orderBlocks
750
751template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
752  int numMatch = 0;
753  int curMatch;
754
755  if (DEBUGME) {
756        errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
757  }
758
759  while ((curMatch = patternMatchGroup(curBlk)) > 0) {
760    numMatch += curMatch;
761  }
762
763  if (DEBUGME) {
764        errs() << "End patternMatch BB" << curBlk->getNumber()
765      << ", numMatch = " << numMatch << "\n";
766  }
767
768  return numMatch;
769} //patternMatch
770
771template<class PassT>
772int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
773  int numMatch = 0;
774  numMatch += serialPatternMatch(curBlk);
775  numMatch += ifPatternMatch(curBlk);
776  //numMatch += switchPatternMatch(curBlk);
777  numMatch += loopendPatternMatch(curBlk);
778  numMatch += loopPatternMatch(curBlk);
779  return numMatch;
780}//patternMatchGroup
781
782template<class PassT>
783int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
784  if (curBlk->succ_size() != 1) {
785    return 0;
786  }
787
788  BlockT *childBlk = *curBlk->succ_begin();
789  if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
790    return 0;
791  }
792
793  mergeSerialBlock(curBlk, childBlk);
794  ++numSerialPatternMatch;
795  return 1;
796} //serialPatternMatch
797
798template<class PassT>
799int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
800  //two edges
801  if (curBlk->succ_size() != 2) {
802    return 0;
803  }
804
805  if (hasBackEdge(curBlk)) {
806    return 0;
807  }
808
809  InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
810  if (branchInstr == NULL) {
811    return 0;
812  }
813
814  assert(CFGTraits::isCondBranch(branchInstr));
815
816  BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
817  BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
818  BlockT *landBlk;
819  int cloned = 0;
820
821  // TODO: Simplify
822  if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
823    && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
824    landBlk = *trueBlk->succ_begin();
825  } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
826    landBlk = NULL;
827  } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
828    landBlk = falseBlk;
829    falseBlk = NULL;
830  } else if (falseBlk->succ_size() == 1
831             && *falseBlk->succ_begin() == trueBlk) {
832    landBlk = trueBlk;
833    trueBlk = NULL;
834  } else if (falseBlk->succ_size() == 1
835             && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
836    landBlk = *falseBlk->succ_begin();
837  } else if (trueBlk->succ_size() == 1
838    && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
839    landBlk = *trueBlk->succ_begin();
840  } else {
841    return handleJumpintoIf(curBlk, trueBlk, falseBlk);
842  }
843
844  // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
845  // new BB created for landBlk==NULL may introduce new challenge to the
846  // reduction process.
847  if (landBlk != NULL &&
848      ((trueBlk && trueBlk->pred_size() > 1)
849      || (falseBlk && falseBlk->pred_size() > 1))) {
850     cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
851  }
852
853  if (trueBlk && trueBlk->pred_size() > 1) {
854    trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
855    ++cloned;
856  }
857
858  if (falseBlk && falseBlk->pred_size() > 1) {
859    falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
860    ++cloned;
861  }
862
863  mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
864
865  ++numIfPatternMatch;
866
867  numClonedBlock += cloned;
868
869  return 1 + cloned;
870} //ifPatternMatch
871
872template<class PassT>
873int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
874  return 0;
875} //switchPatternMatch
876
877template<class PassT>
878int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
879  LoopT *loopRep = loopInfo->getLoopFor(curBlk);
880  typename std::vector<LoopT *> nestedLoops;
881  while (loopRep) {
882    nestedLoops.push_back(loopRep);
883    loopRep = loopRep->getParentLoop();
884  }
885
886  if (nestedLoops.size() == 0) {
887    return 0;
888  }
889
890  // Process nested loop outside->inside, so "continue" to a outside loop won't
891  // be mistaken as "break" of the current loop.
892  int num = 0;
893  for (typename std::vector<LoopT *>::reverse_iterator
894       iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
895       iter != iterEnd; ++iter) {
896    loopRep = *iter;
897
898    if (getLoopLandBlock(loopRep) != NULL) {
899      continue;
900    }
901
902    BlockT *loopHeader = loopRep->getHeader();
903
904    int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
905
906    if (numBreak == -1) {
907      break;
908    }
909
910    int numCont = loopcontPatternMatch(loopRep, loopHeader);
911    num += numBreak + numCont;
912  }
913
914  return num;
915} //loopendPatternMatch
916
917template<class PassT>
918int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
919  if (curBlk->succ_size() != 0) {
920    return 0;
921  }
922
923  int numLoop = 0;
924  LoopT *loopRep = loopInfo->getLoopFor(curBlk);
925  while (loopRep && loopRep->getHeader() == curBlk) {
926    LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
927    if (loopLand) {
928      BlockT *landBlk = loopLand->landBlk;
929      assert(landBlk);
930      if (!isRetiredBlock(landBlk)) {
931        mergeLooplandBlock(curBlk, loopLand);
932        ++numLoop;
933      }
934    }
935    loopRep = loopRep->getParentLoop();
936  }
937
938  numLoopPatternMatch += numLoop;
939
940  return numLoop;
941} //loopPatternMatch
942
943template<class PassT>
944int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
945                                                  BlockT *loopHeader) {
946  BlockTSmallerVector exitingBlks;
947  loopRep->getExitingBlocks(exitingBlks);
948
949  if (DEBUGME) {
950    errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
951  }
952
953  if (exitingBlks.size() == 0) {
954    setLoopLandBlock(loopRep);
955    return 0;
956  }
957
958  // Compute the corresponding exitBlks and exit block set.
959  BlockTSmallerVector exitBlks;
960  std::set<BlockT *> exitBlkSet;
961  for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
962       iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
963    BlockT *exitingBlk = *iter;
964    BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
965    exitBlks.push_back(exitBlk);
966    exitBlkSet.insert(exitBlk);  //non-duplicate insert
967  }
968
969  assert(exitBlkSet.size() > 0);
970  assert(exitBlks.size() == exitingBlks.size());
971
972  if (DEBUGME) {
973    errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
974  }
975
976  // Find exitLandBlk.
977  BlockT *exitLandBlk = NULL;
978  int numCloned = 0;
979  int numSerial = 0;
980
981  if (exitBlkSet.size() == 1)
982  {
983    exitLandBlk = *exitBlkSet.begin();
984  } else {
985    exitLandBlk = findNearestCommonPostDom(exitBlkSet);
986
987    if (exitLandBlk == NULL) {
988      return -1;
989    }
990
991    bool allInPath = true;
992    bool allNotInPath = true;
993    for (typename std::set<BlockT*>::const_iterator
994         iter = exitBlkSet.begin(),
995         iterEnd = exitBlkSet.end();
996         iter != iterEnd; ++iter) {
997      BlockT *exitBlk = *iter;
998
999      PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
1000      if (DEBUGME) {
1001        errs() << "BB" << exitBlk->getNumber()
1002               << " to BB" << exitLandBlk->getNumber() << " PathToKind="
1003               << pathKind << "\n";
1004      }
1005
1006      allInPath = allInPath && (pathKind == SinglePath_InPath);
1007      allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
1008
1009      if (!allInPath && !allNotInPath) {
1010        if (DEBUGME) {
1011              errs() << "singlePath check fail\n";
1012        }
1013        return -1;
1014      }
1015    } // check all exit blocks
1016
1017    if (allNotInPath) {
1018#if 1
1019
1020      // TODO: Simplify, maybe separate function?
1021      //funcRep->viewCFG();
1022      LoopT *parentLoopRep = loopRep->getParentLoop();
1023      BlockT *parentLoopHeader = NULL;
1024      if (parentLoopRep)
1025        parentLoopHeader = parentLoopRep->getHeader();
1026
1027      if (exitLandBlk == parentLoopHeader &&
1028          (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
1029                                               loopRep,
1030                                               exitBlkSet,
1031                                               exitLandBlk)) != NULL) {
1032        if (DEBUGME) {
1033          errs() << "relocateLoopcontBlock success\n";
1034        }
1035      } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
1036                                                      exitingBlks,
1037                                                      exitBlks)) != NULL) {
1038        if (DEBUGME) {
1039          errs() << "insertEndbranchBlock success\n";
1040        }
1041      } else {
1042        if (DEBUGME) {
1043          errs() << "loop exit fail\n";
1044        }
1045        return -1;
1046      }
1047#else
1048      return -1;
1049#endif
1050    }
1051
1052    // Handle side entry to exit path.
1053    exitBlks.clear();
1054    exitBlkSet.clear();
1055    for (typename BlockTSmallerVector::iterator iterExiting =
1056           exitingBlks.begin(),
1057         iterExitingEnd = exitingBlks.end();
1058         iterExiting != iterExitingEnd; ++iterExiting) {
1059      BlockT *exitingBlk = *iterExiting;
1060      BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
1061      BlockT *newExitBlk = exitBlk;
1062
1063      if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
1064        newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
1065        ++numCloned;
1066      }
1067
1068      numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
1069
1070      exitBlks.push_back(newExitBlk);
1071      exitBlkSet.insert(newExitBlk);
1072    }
1073
1074    for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
1075         iterExitEnd = exitBlks.end();
1076         iterExit != iterExitEnd; ++iterExit) {
1077      BlockT *exitBlk = *iterExit;
1078      numSerial += serialPatternMatch(exitBlk);
1079    }
1080
1081    for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
1082         iterExitEnd = exitBlks.end();
1083         iterExit != iterExitEnd; ++iterExit) {
1084      BlockT *exitBlk = *iterExit;
1085      if (exitBlk->pred_size() > 1) {
1086        if (exitBlk != exitLandBlk) {
1087          return -1;
1088        }
1089      } else {
1090        if (exitBlk != exitLandBlk &&
1091            (exitBlk->succ_size() != 1 ||
1092            *exitBlk->succ_begin() != exitLandBlk)) {
1093          return -1;
1094        }
1095      }
1096    }
1097  } // else
1098
1099  // LoopT *exitLandLoop = loopInfo->getLoopFor(exitLandBlk);
1100  exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
1101
1102  // Fold break into the breaking block. Leverage across level breaks.
1103  assert(exitingBlks.size() == exitBlks.size());
1104  for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
1105       iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
1106       iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
1107    BlockT *exitBlk = *iterExit;
1108    BlockT *exitingBlk = *iterExiting;
1109    assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
1110    LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
1111    handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
1112  }
1113
1114  int numBreak = static_cast<int>(exitingBlks.size());
1115  numLoopbreakPatternMatch += numBreak;
1116  numClonedBlock += numCloned;
1117  return numBreak + numSerial + numCloned;
1118} //loopbreakPatternMatch
1119
1120template<class PassT>
1121int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
1122                                                 BlockT *loopHeader) {
1123  int numCont = 0;
1124  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
1125  for (typename InvBlockGTraits::ChildIteratorType iter =
1126       InvBlockGTraits::child_begin(loopHeader),
1127       iterEnd = InvBlockGTraits::child_end(loopHeader);
1128       iter != iterEnd; ++iter) {
1129    BlockT *curBlk = *iter;
1130    if (loopRep->contains(curBlk)) {
1131      handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
1132                          loopHeader, loopRep);
1133      contBlk.push_back(curBlk);
1134      ++numCont;
1135    }
1136  }
1137
1138  for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator
1139       iter = contBlk.begin(), iterEnd = contBlk.end();
1140       iter != iterEnd; ++iter) {
1141    (*iter)->removeSuccessor(loopHeader);
1142  }
1143
1144  numLoopcontPatternMatch += numCont;
1145
1146  return numCont;
1147} //loopcontPatternMatch
1148
1149
1150template<class PassT>
1151bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
1152                                                         BlockT *src2Blk) {
1153  // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
1154  // same loop with LoopLandInfo without explicitly keeping track of
1155  // loopContBlks and loopBreakBlks, this is a method to get the information.
1156  //
1157  if (src1Blk->succ_size() == 0) {
1158    LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
1159    if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
1160      LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
1161      if (theEntry != NULL) {
1162        if (DEBUGME) {
1163          errs() << "isLoopContBreakBlock yes src1 = BB"
1164                 << src1Blk->getNumber()
1165                 << " src2 = BB" << src2Blk->getNumber() << "\n";
1166        }
1167        return true;
1168      }
1169    }
1170  }
1171  return false;
1172}  //isSameloopDetachedContbreak
1173
1174template<class PassT>
1175int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
1176                                             BlockT *trueBlk,
1177                                             BlockT *falseBlk) {
1178  int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
1179  if (num == 0) {
1180    if (DEBUGME) {
1181      errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
1182    }
1183    num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
1184  }
1185  return num;
1186}
1187
1188template<class PassT>
1189int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
1190                                                BlockT *trueBlk,
1191                                                BlockT *falseBlk) {
1192  int num = 0;
1193  BlockT *downBlk;
1194
1195  //trueBlk could be the common post dominator
1196  downBlk = trueBlk;
1197
1198  if (DEBUGME) {
1199    errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
1200           << " true = BB" << trueBlk->getNumber()
1201           << ", numSucc=" << trueBlk->succ_size()
1202           << " false = BB" << falseBlk->getNumber() << "\n";
1203  }
1204
1205  while (downBlk) {
1206    if (DEBUGME) {
1207      errs() << "check down = BB" << downBlk->getNumber();
1208    }
1209
1210    if (//postDomTree->dominates(downBlk, falseBlk) &&
1211        singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
1212      if (DEBUGME) {
1213        errs() << " working\n";
1214      }
1215
1216      num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
1217      num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
1218
1219      numClonedBlock += num;
1220      num += serialPatternMatch(*headBlk->succ_begin());
1221      num += serialPatternMatch(*(++headBlk->succ_begin()));
1222      num += ifPatternMatch(headBlk);
1223      assert(num > 0); //
1224
1225      break;
1226    }
1227    if (DEBUGME) {
1228      errs() << " not working\n";
1229    }
1230    downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
1231  } // walk down the postDomTree
1232
1233  return num;
1234} //handleJumpintoIf
1235
1236template<class PassT>
1237void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
1238                                                         BlockT *trueBlk,
1239                                                         BlockT *falseBlk,
1240                                                         BlockT *landBlk,
1241                                                         bool detail) {
1242  errs() << "head = BB" << headBlk->getNumber()
1243         << " size = " << headBlk->size();
1244  if (detail) {
1245    errs() << "\n";
1246    headBlk->print(errs());
1247    errs() << "\n";
1248  }
1249
1250  if (trueBlk) {
1251    errs() << ", true = BB" << trueBlk->getNumber() << " size = "
1252           << trueBlk->size() << " numPred = " << trueBlk->pred_size();
1253    if (detail) {
1254      errs() << "\n";
1255      trueBlk->print(errs());
1256      errs() << "\n";
1257    }
1258  }
1259  if (falseBlk) {
1260    errs() << ", false = BB" << falseBlk->getNumber() << " size = "
1261           << falseBlk->size() << " numPred = " << falseBlk->pred_size();
1262    if (detail) {
1263      errs() << "\n";
1264      falseBlk->print(errs());
1265      errs() << "\n";
1266    }
1267  }
1268  if (landBlk) {
1269    errs() << ", land = BB" << landBlk->getNumber() << " size = "
1270           << landBlk->size() << " numPred = " << landBlk->pred_size();
1271    if (detail) {
1272      errs() << "\n";
1273      landBlk->print(errs());
1274      errs() << "\n";
1275    }
1276  }
1277
1278    errs() << "\n";
1279} //showImproveSimpleJumpintoIf
1280
1281template<class PassT>
1282int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
1283                                                    BlockT *trueBlk,
1284                                                    BlockT *falseBlk,
1285                                                    BlockT **plandBlk) {
1286  bool migrateTrue = false;
1287  bool migrateFalse = false;
1288
1289  BlockT *landBlk = *plandBlk;
1290
1291  assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
1292         && (falseBlk == NULL || falseBlk->succ_size() <= 1));
1293
1294  if (trueBlk == falseBlk) {
1295    return 0;
1296  }
1297
1298#if 0
1299  if (DEBUGME) {
1300    errs() << "improveSimpleJumpintoIf: ";
1301    showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1302  }
1303#endif
1304
1305  // unsigned landPredSize = landBlk ? landBlk->pred_size() : 0;
1306  // May consider the # landBlk->pred_size() as it represents the number of
1307  // assignment initReg = .. needed to insert.
1308  migrateTrue = needMigrateBlock(trueBlk);
1309  migrateFalse = needMigrateBlock(falseBlk);
1310
1311  if (!migrateTrue && !migrateFalse) {
1312    return 0;
1313  }
1314
1315  // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1316  // have more than one predecessors.  without doing this, its predecessor
1317  // rather than headBlk will have undefined value in initReg.
1318  if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
1319    migrateTrue = true;
1320  }
1321  if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
1322    migrateFalse = true;
1323  }
1324
1325  if (DEBUGME) {
1326    errs() << "before improveSimpleJumpintoIf: ";
1327    showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1328    //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
1329  }
1330
1331  // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1332  //
1333  // new: headBlk => if () {initReg = 1; org trueBlk branch} else
1334  //      {initReg = 0; org falseBlk branch }
1335  //      => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
1336  //      => org landBlk
1337  //      if landBlk->pred_size() > 2, put the about if-else inside
1338  //      if (initReg !=2) {...}
1339  //
1340  // add initReg = initVal to headBlk
1341
1342  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1343  unsigned initReg =
1344    funcRep->getRegInfo().createVirtualRegister(I32RC);
1345  if (!migrateTrue || !migrateFalse) {
1346    int initVal = migrateTrue ? 0 : 1;
1347    CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
1348  }
1349
1350  int numNewBlk = 0;
1351
1352  if (landBlk == NULL) {
1353    landBlk = funcRep->CreateMachineBasicBlock();
1354    funcRep->push_back(landBlk);  //insert to function
1355
1356    if (trueBlk) {
1357      trueBlk->addSuccessor(landBlk);
1358    } else {
1359      headBlk->addSuccessor(landBlk);
1360    }
1361
1362    if (falseBlk) {
1363      falseBlk->addSuccessor(landBlk);
1364    } else {
1365      headBlk->addSuccessor(landBlk);
1366    }
1367
1368    numNewBlk ++;
1369  }
1370
1371  bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
1372
1373  //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
1374  typename BlockT::iterator insertPos =
1375    CFGTraits::getInstrPos
1376    (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep));
1377
1378  if (landBlkHasOtherPred) {
1379    unsigned immReg =
1380      funcRep->getRegInfo().createVirtualRegister(I32RC);
1381    CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
1382    unsigned cmpResReg =
1383      funcRep->getRegInfo().createVirtualRegister(I32RC);
1384
1385    CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
1386                                        initReg, immReg);
1387    CFGTraits::insertCondBranchBefore(landBlk, insertPos,
1388                                      AMDGPU::IF_LOGICALZ_i32, passRep,
1389                                      cmpResReg, DebugLoc());
1390  }
1391
1392  CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_LOGICALNZ_i32,
1393                                    passRep, initReg, DebugLoc());
1394
1395  if (migrateTrue) {
1396    migrateInstruction(trueBlk, landBlk, insertPos);
1397    // need to uncondionally insert the assignment to ensure a path from its
1398    // predecessor rather than headBlk has valid value in initReg if
1399    // (initVal != 1).
1400    CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
1401  }
1402  CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep);
1403
1404  if (migrateFalse) {
1405    migrateInstruction(falseBlk, landBlk, insertPos);
1406    // need to uncondionally insert the assignment to ensure a path from its
1407    // predecessor rather than headBlk has valid value in initReg if
1408    // (initVal != 0)
1409    CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
1410  }
1411  //CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
1412
1413  if (landBlkHasOtherPred) {
1414    // add endif
1415    CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
1416
1417    // put initReg = 2 to other predecessors of landBlk
1418    for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
1419         predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
1420         ++predIter) {
1421      BlockT *curBlk = *predIter;
1422      if (curBlk != trueBlk && curBlk != falseBlk) {
1423        CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
1424      }
1425    } //for
1426  }
1427  if (DEBUGME) {
1428    errs() << "result from improveSimpleJumpintoIf: ";
1429    showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
1430    //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
1431  }
1432
1433  // update landBlk
1434  *plandBlk = landBlk;
1435
1436  return numNewBlk;
1437} //improveSimpleJumpintoIf
1438
1439template<class PassT>
1440void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
1441                                              LoopT *exitingLoop,
1442                                             BlockT *exitBlk,
1443                                              LoopT *exitLoop,
1444                                             BlockT *landBlk) {
1445  if (DEBUGME) {
1446    errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
1447           << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
1448  }
1449  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1450
1451  RegiT initReg = INVALIDREGNUM;
1452  if (exitingLoop != exitLoop) {
1453    initReg = static_cast<int>
1454      (funcRep->getRegInfo().createVirtualRegister(I32RC));
1455    assert(initReg != INVALIDREGNUM);
1456    addLoopBreakInitReg(exitLoop, initReg);
1457    while (exitingLoop != exitLoop && exitingLoop) {
1458      addLoopBreakOnReg(exitingLoop, initReg);
1459      exitingLoop = exitingLoop->getParentLoop();
1460    }
1461    assert(exitingLoop == exitLoop);
1462  }
1463
1464  mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
1465
1466} //handleLoopbreak
1467
1468template<class PassT>
1469void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
1470                                                  LoopT *contingLoop,
1471                                                 BlockT *contBlk,
1472                                                  LoopT *contLoop) {
1473  if (DEBUGME) {
1474    errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
1475           << " header = BB" << contBlk->getNumber() << "\n";
1476
1477    errs() << "Trying to continue loop-depth = "
1478           << getLoopDepth(contLoop)
1479           << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
1480  }
1481
1482  RegiT initReg = INVALIDREGNUM;
1483  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1484  if (contingLoop != contLoop) {
1485    initReg = static_cast<int>
1486      (funcRep->getRegInfo().createVirtualRegister(I32RC));
1487    assert(initReg != INVALIDREGNUM);
1488    addLoopContInitReg(contLoop, initReg);
1489    while (contingLoop && contingLoop->getParentLoop() != contLoop) {
1490      addLoopBreakOnReg(contingLoop, initReg);  //not addLoopContOnReg
1491      contingLoop = contingLoop->getParentLoop();
1492    }
1493    assert(contingLoop && contingLoop->getParentLoop() == contLoop);
1494    addLoopContOnReg(contingLoop, initReg);
1495  }
1496
1497  settleLoopcontBlock(contingBlk, contBlk, initReg);
1498  //contingBlk->removeSuccessor(loopHeader);
1499} //handleLoopcontBlock
1500
1501template<class PassT>
1502void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
1503  if (DEBUGME) {
1504    errs() << "serialPattern BB" << dstBlk->getNumber()
1505           << " <= BB" << srcBlk->getNumber() << "\n";
1506  }
1507  //removeUnconditionalBranch(dstBlk);
1508  dstBlk->splice(dstBlk->end(), srcBlk, FirstNonDebugInstr(srcBlk), srcBlk->end());
1509
1510  dstBlk->removeSuccessor(srcBlk);
1511  CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
1512
1513  removeSuccessor(srcBlk);
1514  retireBlock(dstBlk, srcBlk);
1515} //mergeSerialBlock
1516
1517template<class PassT>
1518void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
1519                                                  BlockT *curBlk,
1520                                                  BlockT *trueBlk,
1521                                                  BlockT *falseBlk,
1522                                                  BlockT *landBlk) {
1523  if (DEBUGME) {
1524    errs() << "ifPattern BB" << curBlk->getNumber();
1525    errs() << "{  ";
1526    if (trueBlk) {
1527      errs() << "BB" << trueBlk->getNumber();
1528    }
1529    errs() << "  } else ";
1530    errs() << "{  ";
1531    if (falseBlk) {
1532      errs() << "BB" << falseBlk->getNumber();
1533    }
1534    errs() << "  }\n ";
1535    errs() << "landBlock: ";
1536    if (landBlk == NULL) {
1537      errs() << "NULL";
1538    } else {
1539      errs() << "BB" << landBlk->getNumber();
1540    }
1541    errs() << "\n";
1542  }
1543
1544  int oldOpcode = branchInstr->getOpcode();
1545  DebugLoc branchDL = branchInstr->getDebugLoc();
1546
1547//    transform to
1548//    if cond
1549//       trueBlk
1550//    else
1551//       falseBlk
1552//    endif
1553//    landBlk
1554
1555  typename BlockT::iterator branchInstrPos =
1556    CFGTraits::getInstrPos(curBlk, branchInstr);
1557  CFGTraits::insertCondBranchBefore(branchInstrPos,
1558                                    CFGTraits::getBranchNzeroOpcode(oldOpcode),
1559                                    passRep,
1560									branchDL);
1561
1562  if (trueBlk) {
1563    curBlk->splice(branchInstrPos, trueBlk, FirstNonDebugInstr(trueBlk), trueBlk->end());
1564    curBlk->removeSuccessor(trueBlk);
1565    if (landBlk && trueBlk->succ_size()!=0) {
1566      trueBlk->removeSuccessor(landBlk);
1567    }
1568    retireBlock(curBlk, trueBlk);
1569  }
1570  CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep);
1571
1572  if (falseBlk) {
1573    curBlk->splice(branchInstrPos, falseBlk, FirstNonDebugInstr(falseBlk),
1574                   falseBlk->end());
1575    curBlk->removeSuccessor(falseBlk);
1576    if (landBlk && falseBlk->succ_size() != 0) {
1577      falseBlk->removeSuccessor(landBlk);
1578    }
1579    retireBlock(curBlk, falseBlk);
1580  }
1581  CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
1582
1583  //curBlk->remove(branchInstrPos);
1584  branchInstr->eraseFromParent();
1585
1586  if (landBlk && trueBlk && falseBlk) {
1587    curBlk->addSuccessor(landBlk);
1588  }
1589
1590} //mergeIfthenelseBlock
1591
1592template<class PassT>
1593void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
1594                                                LoopLandInfo *loopLand) {
1595  BlockT *landBlk = loopLand->landBlk;
1596
1597  if (DEBUGME) {
1598    errs() << "loopPattern header = BB" << dstBlk->getNumber()
1599           << " land = BB" << landBlk->getNumber() << "\n";
1600  }
1601
1602  // Loop contInitRegs are init at the beginning of the loop.
1603  for (typename std::set<RegiT>::const_iterator iter =
1604         loopLand->contInitRegs.begin(),
1605       iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
1606    CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1607  }
1608
1609  /* we last inserterd the DebugLoc in the
1610   * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk.
1611   * search for the DebugLoc in the that statement.
1612   * if not found, we have to insert the empty/default DebugLoc */
1613  InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
1614  DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
1615
1616  CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak);
1617  // Loop breakInitRegs are init before entering the loop.
1618  for (typename std::set<RegiT>::const_iterator iter =
1619         loopLand->breakInitRegs.begin(),
1620       iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter)
1621  {
1622    CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1623  }
1624  // Loop endbranchInitRegs are init before entering the loop.
1625  for (typename std::set<RegiT>::const_iterator iter =
1626         loopLand->endbranchInitRegs.begin(),
1627       iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
1628    CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
1629  }
1630
1631  /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
1632   * search for the DebugLoc in the continue statement.
1633   * if not found, we have to insert the empty/default DebugLoc */
1634  InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
1635  DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
1636
1637  CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue);
1638  // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
1639  // loop.
1640  for (typename std::set<RegiT>::const_iterator iter =
1641         loopLand->breakOnRegs.begin(),
1642       iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
1643    CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::BREAK_LOGICALNZ_i32, passRep,
1644                                   *iter);
1645  }
1646
1647  // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
1648  // loop.
1649  for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
1650       iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
1651    CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32,
1652                                   passRep, *iter);
1653  }
1654
1655  dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
1656
1657  for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
1658       iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
1659    dstBlk->addSuccessor(*iter);  // *iter's predecessor is also taken care of.
1660  }
1661
1662  removeSuccessor(landBlk);
1663  retireBlock(dstBlk, landBlk);
1664} //mergeLooplandBlock
1665
1666template<class PassT>
1667void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
1668                                                 BlockT *exitBlk,
1669                                                 BlockT *exitLandBlk,
1670                                                 RegiT  setReg) {
1671  if (DEBUGME) {
1672    errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
1673           << " exit = BB" << exitBlk->getNumber()
1674           << " land = BB" << exitLandBlk->getNumber() << "\n";
1675  }
1676
1677  InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
1678  assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
1679
1680  DebugLoc DL = branchInstr->getDebugLoc();
1681
1682  BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
1683  int oldOpcode = branchInstr->getOpcode();
1684
1685  //    transform exitingBlk to
1686  //    if ( ) {
1687  //       exitBlk (if exitBlk != exitLandBlk)
1688  //       setReg = 1
1689  //       break
1690  //    }endif
1691  //    successor = {orgSuccessor(exitingBlk) - exitBlk}
1692
1693  typename BlockT::iterator branchInstrPos =
1694    CFGTraits::getInstrPos(exitingBlk, branchInstr);
1695
1696  if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
1697    //break_logical
1698    int newOpcode =
1699    (trueBranch == exitBlk) ? CFGTraits::getBreakNzeroOpcode(oldOpcode)
1700                            : CFGTraits::getBreakZeroOpcode(oldOpcode);
1701    CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
1702  } else {
1703    int newOpcode =
1704    (trueBranch == exitBlk) ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
1705                            : CFGTraits::getBranchZeroOpcode(oldOpcode);
1706    CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
1707    if (exitBlk != exitLandBlk) {
1708      //splice is insert-before ...
1709      exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
1710                         exitBlk->end());
1711    }
1712    if (setReg != INVALIDREGNUM) {
1713      CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
1714    }
1715    CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep);
1716    CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
1717  } //if_logical
1718
1719  //now branchInst can be erase safely
1720  //exitingBlk->eraseFromParent(branchInstr);
1721  branchInstr->eraseFromParent();
1722
1723  //now take care of successors, retire blocks
1724  exitingBlk->removeSuccessor(exitBlk);
1725  if (exitBlk != exitLandBlk) {
1726    //splice is insert-before ...
1727    exitBlk->removeSuccessor(exitLandBlk);
1728    retireBlock(exitingBlk, exitBlk);
1729  }
1730
1731} //mergeLoopbreakBlock
1732
1733template<class PassT>
1734void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
1735                                                 BlockT *contBlk,
1736                                                 RegiT   setReg) {
1737  if (DEBUGME) {
1738    errs() << "settleLoopcontBlock conting = BB"
1739           << contingBlk->getNumber()
1740           << ", cont = BB" << contBlk->getNumber() << "\n";
1741  }
1742
1743  InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
1744  if (branchInstr) {
1745    assert(CFGTraits::isCondBranch(branchInstr));
1746    typename BlockT::iterator branchInstrPos =
1747      CFGTraits::getInstrPos(contingBlk, branchInstr);
1748    BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
1749    int oldOpcode = branchInstr->getOpcode();
1750	DebugLoc DL = branchInstr->getDebugLoc();
1751
1752    //    transform contingBlk to
1753    //     if () {
1754    //          move instr after branchInstr
1755    //          continue
1756    //        or
1757    //          setReg = 1
1758    //          break
1759    //     }endif
1760    //     successor = {orgSuccessor(contingBlk) - loopHeader}
1761
1762    bool useContinueLogical =
1763      (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
1764
1765    if (useContinueLogical == false)
1766    {
1767      int branchOpcode =
1768        trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
1769                              : CFGTraits::getBranchZeroOpcode(oldOpcode);
1770
1771      CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
1772
1773      if (setReg != INVALIDREGNUM) {
1774        CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
1775        // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1776        CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL);
1777      } else {
1778        // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1779        CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL);
1780      }
1781
1782      CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL);
1783    } else {
1784      int branchOpcode =
1785        trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
1786                              : CFGTraits::getContinueZeroOpcode(oldOpcode);
1787
1788      CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
1789    }
1790
1791    //contingBlk->eraseFromParent(branchInstr);
1792    branchInstr->eraseFromParent();
1793  } else {
1794    /* if we've arrived here then we've already erased the branch instruction
1795	 * travel back up the basic block to see the last reference of our debug location
1796	 * we've just inserted that reference here so it should be representative */
1797    if (setReg != INVALIDREGNUM) {
1798      CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
1799      // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1800      CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
1801    } else {
1802      // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1803      CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
1804    }
1805  } //else
1806
1807} //settleLoopcontBlock
1808
1809// BBs in exitBlkSet are determined as in break-path for loopRep,
1810// before we can put code for BBs as inside loop-body for loopRep
1811// check whether those BBs are determined as cont-BB for parentLoopRep
1812// earlier.
1813// If so, generate a new BB newBlk
1814//    (1) set newBlk common successor of BBs in exitBlkSet
1815//    (2) change the continue-instr in BBs in exitBlkSet to break-instr
1816//    (3) generate continue-instr in newBlk
1817//
1818template<class PassT>
1819typename CFGStructurizer<PassT>::BlockT *
1820CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
1821                                              LoopT *loopRep,
1822                                              std::set<BlockT *> &exitBlkSet,
1823                                              BlockT *exitLandBlk) {
1824  std::set<BlockT *> endBlkSet;
1825
1826//  BlockT *parentLoopHead = parentLoopRep->getHeader();
1827
1828
1829  for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
1830       iterEnd = exitBlkSet.end();
1831       iter != iterEnd; ++iter) {
1832    BlockT *exitBlk = *iter;
1833    BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
1834
1835    if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
1836      return NULL;
1837
1838    endBlkSet.insert(endBlk);
1839  }
1840
1841  BlockT *newBlk = funcRep->CreateMachineBasicBlock();
1842  funcRep->push_back(newBlk);  //insert to function
1843  CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep);
1844  SHOWNEWBLK(newBlk, "New continue block: ");
1845
1846  for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
1847       iterEnd = endBlkSet.end();
1848       iter != iterEnd; ++iter) {
1849      BlockT *endBlk = *iter;
1850      InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
1851      if (contInstr) {
1852        contInstr->eraseFromParent();
1853      }
1854      endBlk->addSuccessor(newBlk);
1855      if (DEBUGME) {
1856        errs() << "Add new continue Block to BB"
1857               << endBlk->getNumber() << " successors\n";
1858      }
1859  }
1860
1861  return newBlk;
1862} //relocateLoopcontBlock
1863
1864
1865// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
1866// LoopLandBlock. This BB branch on the loop endBranchInit register to the
1867// pathes corresponding to the loop exiting branches.
1868
1869template<class PassT>
1870typename CFGStructurizer<PassT>::BlockT *
1871CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
1872                                              BlockTSmallerVector &exitingBlks,
1873                                              BlockTSmallerVector &exitBlks) {
1874  const AMDILInstrInfo *tii =
1875             static_cast<const AMDILInstrInfo *>(passRep->getTargetInstrInfo());
1876  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1877
1878  RegiT endBranchReg = static_cast<int>
1879    (funcRep->getRegInfo().createVirtualRegister(I32RC));
1880  assert(endBranchReg >= 0);
1881
1882  // reg = 0 before entering the loop
1883  addLoopEndbranchInitReg(loopRep, endBranchReg);
1884
1885  uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
1886  assert(numBlks >=2 && numBlks == exitBlks.size());
1887
1888  BlockT *preExitingBlk = exitingBlks[0];
1889  BlockT *preExitBlk = exitBlks[0];
1890  BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
1891  funcRep->push_back(preBranchBlk);  //insert to function
1892  SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
1893
1894  BlockT *newLandBlk = preBranchBlk;
1895
1896      CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
1897        newLandBlk);
1898  preExitingBlk->removeSuccessor(preExitBlk);
1899  preExitingBlk->addSuccessor(newLandBlk);
1900
1901  //it is redundant to add reg = 0 to exitingBlks[0]
1902
1903  // For 1..n th exiting path (the last iteration handles two pathes) create the
1904  // branch to the previous path and the current path.
1905  for (uint32_t i = 1; i < numBlks; ++i) {
1906    BlockT *curExitingBlk = exitingBlks[i];
1907    BlockT *curExitBlk = exitBlks[i];
1908    BlockT *curBranchBlk;
1909
1910    if (i == numBlks - 1) {
1911      curBranchBlk = curExitBlk;
1912    } else {
1913      curBranchBlk = funcRep->CreateMachineBasicBlock();
1914      funcRep->push_back(curBranchBlk);  //insert to function
1915      SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
1916    }
1917
1918    // Add reg = i to exitingBlks[i].
1919    CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
1920                                       endBranchReg, i);
1921
1922    // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
1923    // (exitingBlks[i], newLandBlk).
1924    CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
1925                                          newLandBlk);
1926    curExitingBlk->removeSuccessor(curExitBlk);
1927    curExitingBlk->addSuccessor(newLandBlk);
1928
1929    // add to preBranchBlk the branch instruction:
1930    // if (endBranchReg == preVal)
1931    //    preExitBlk
1932    // else
1933    //    curBranchBlk
1934    //
1935    // preValReg = i - 1
1936
1937  DebugLoc DL;
1938  RegiT preValReg = static_cast<int>
1939    (funcRep->getRegInfo().createVirtualRegister(I32RC));
1940
1941  preBranchBlk->insert(preBranchBlk->begin(),
1942                       tii->getMovImmInstr(preBranchBlk->getParent(), preValReg,
1943                       i - 1));
1944
1945  // condResReg = (endBranchReg == preValReg)
1946    RegiT condResReg = static_cast<int>
1947      (funcRep->getRegInfo().createVirtualRegister(I32RC));
1948    BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg)
1949      .addReg(endBranchReg).addReg(preValReg);
1950
1951    BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32))
1952      .addMBB(preExitBlk).addReg(condResReg);
1953
1954    preBranchBlk->addSuccessor(preExitBlk);
1955    preBranchBlk->addSuccessor(curBranchBlk);
1956
1957    // Update preExitingBlk, preExitBlk, preBranchBlk.
1958    preExitingBlk = curExitingBlk;
1959    preExitBlk = curExitBlk;
1960    preBranchBlk = curBranchBlk;
1961
1962  }  //end for 1 .. n blocks
1963
1964  return newLandBlk;
1965} //addLoopEndbranchBlock
1966
1967template<class PassT>
1968typename CFGStructurizer<PassT>::PathToKind
1969CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
1970                                     bool allowSideEntry) {
1971  assert(dstBlk);
1972
1973  if (srcBlk == dstBlk) {
1974    return SinglePath_InPath;
1975  }
1976
1977  while (srcBlk && srcBlk->succ_size() == 1) {
1978    srcBlk = *srcBlk->succ_begin();
1979    if (srcBlk == dstBlk) {
1980      return SinglePath_InPath;
1981    }
1982
1983    if (!allowSideEntry && srcBlk->pred_size() > 1) {
1984      return Not_SinglePath;
1985    }
1986  }
1987
1988  if (srcBlk && srcBlk->succ_size()==0) {
1989    return SinglePath_NotInPath;
1990  }
1991
1992  return Not_SinglePath;
1993} //singlePathTo
1994
1995// If there is a single path from srcBlk to dstBlk, return the last block before
1996// dstBlk If there is a single path from srcBlk->end without dstBlk, return the
1997// last block in the path Otherwise, return NULL
1998template<class PassT>
1999typename CFGStructurizer<PassT>::BlockT *
2000CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
2001                                      bool allowSideEntry) {
2002  assert(dstBlk);
2003
2004  if (srcBlk == dstBlk) {
2005    return srcBlk;
2006  }
2007
2008  if (srcBlk->succ_size() == 0) {
2009    return srcBlk;
2010  }
2011
2012  while (srcBlk && srcBlk->succ_size() == 1) {
2013    BlockT *preBlk = srcBlk;
2014
2015    srcBlk = *srcBlk->succ_begin();
2016    if (srcBlk == NULL) {
2017      return preBlk;
2018    }
2019
2020    if (!allowSideEntry && srcBlk->pred_size() > 1) {
2021      return NULL;
2022    }
2023  }
2024
2025  if (srcBlk && srcBlk->succ_size()==0) {
2026    return srcBlk;
2027  }
2028
2029  return NULL;
2030
2031} //singlePathEnd
2032
2033template<class PassT>
2034int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
2035                                               BlockT *dstBlk) {
2036  int cloned = 0;
2037  assert(preBlk->isSuccessor(srcBlk));
2038  while (srcBlk && srcBlk != dstBlk) {
2039    assert(srcBlk->succ_size() == 1);
2040    if (srcBlk->pred_size() > 1) {
2041      srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
2042      ++cloned;
2043    }
2044
2045    preBlk = srcBlk;
2046    srcBlk = *srcBlk->succ_begin();
2047  }
2048
2049  return cloned;
2050} //cloneOnSideEntryTo
2051
2052template<class PassT>
2053typename CFGStructurizer<PassT>::BlockT *
2054CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
2055                                                 BlockT *predBlk) {
2056  assert(predBlk->isSuccessor(curBlk) &&
2057         "succBlk is not a prececessor of curBlk");
2058
2059  BlockT *cloneBlk = CFGTraits::clone(curBlk);  //clone instructions
2060  CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
2061  //srcBlk, oldBlk, newBlk
2062
2063  predBlk->removeSuccessor(curBlk);
2064  predBlk->addSuccessor(cloneBlk);
2065
2066  // add all successor to cloneBlk
2067  CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
2068
2069  numClonedInstr += curBlk->size();
2070
2071  if (DEBUGME) {
2072    errs() << "Cloned block: " << "BB"
2073           << curBlk->getNumber() << "size " << curBlk->size() << "\n";
2074  }
2075
2076  SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
2077
2078  return cloneBlk;
2079} //cloneBlockForPredecessor
2080
2081template<class PassT>
2082typename CFGStructurizer<PassT>::BlockT *
2083CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
2084                                               BlockT *exitingBlk) {
2085  BlockT *exitBlk = NULL;
2086
2087  for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
2088       iterSuccEnd = exitingBlk->succ_end();
2089       iterSucc != iterSuccEnd; ++iterSucc) {
2090    BlockT *curBlk = *iterSucc;
2091    if (!loopRep->contains(curBlk)) {
2092      assert(exitBlk == NULL);
2093      exitBlk = curBlk;
2094    }
2095  }
2096
2097  assert(exitBlk != NULL);
2098
2099  return exitBlk;
2100} //exitingBlock2ExitBlock
2101
2102template<class PassT>
2103void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
2104                                                BlockT *dstBlk,
2105                                                InstrIterator insertPos) {
2106  InstrIterator spliceEnd;
2107  //look for the input branchinstr, not the AMDIL branchinstr
2108  InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
2109  if (branchInstr == NULL) {
2110    if (DEBUGME) {
2111      errs() << "migrateInstruction don't see branch instr\n" ;
2112    }
2113    spliceEnd = srcBlk->end();
2114  } else {
2115    if (DEBUGME) {
2116      errs() << "migrateInstruction see branch instr\n" ;
2117      branchInstr->dump();
2118    }
2119    spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
2120  }
2121  if (DEBUGME) {
2122    errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
2123      << "srcSize = " << srcBlk->size() << "\n";
2124  }
2125
2126  //splice insert before insertPos
2127  dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
2128
2129  if (DEBUGME) {
2130    errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
2131      << "srcSize = " << srcBlk->size() << "\n";
2132  }
2133} //migrateInstruction
2134
2135// normalizeInfiniteLoopExit change
2136//   B1:
2137//        uncond_br LoopHeader
2138//
2139// to
2140//   B1:
2141//        cond_br 1 LoopHeader dummyExit
2142// and return the newly added dummy exit block
2143//
2144template<class PassT>
2145typename CFGStructurizer<PassT>::BlockT *
2146CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
2147  BlockT *loopHeader;
2148  BlockT *loopLatch;
2149  loopHeader = LoopRep->getHeader();
2150  loopLatch = LoopRep->getLoopLatch();
2151  BlockT *dummyExitBlk = NULL;
2152  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
2153  if (loopHeader!=NULL && loopLatch!=NULL) {
2154    InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
2155    if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
2156      dummyExitBlk = funcRep->CreateMachineBasicBlock();
2157      funcRep->push_back(dummyExitBlk);  //insert to function
2158      SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
2159
2160      if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
2161
2162      typename BlockT::iterator insertPos =
2163        CFGTraits::getInstrPos(loopLatch, branchInstr);
2164      unsigned immReg =
2165        funcRep->getRegInfo().createVirtualRegister(I32RC);
2166      CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
2167      InstrT *newInstr =
2168        CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep);
2169      MachineInstrBuilder(newInstr).addMBB(loopHeader).addReg(immReg, false);
2170
2171      SHOWNEWINSTR(newInstr);
2172
2173      branchInstr->eraseFromParent();
2174      loopLatch->addSuccessor(dummyExitBlk);
2175    }
2176  }
2177
2178  return dummyExitBlk;
2179} //normalizeInfiniteLoopExit
2180
2181template<class PassT>
2182void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
2183  InstrT *branchInstr;
2184
2185  // I saw two unconditional branch in one basic block in example
2186  // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
2187  while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
2188          && CFGTraits::isUncondBranch(branchInstr)) {
2189    if (DEBUGME) {
2190          errs() << "Removing unconditional branch instruction" ;
2191      branchInstr->dump();
2192    }
2193    branchInstr->eraseFromParent();
2194  }
2195} //removeUnconditionalBranch
2196
2197template<class PassT>
2198void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
2199  if (srcBlk->succ_size() == 2) {
2200    BlockT *blk1 = *srcBlk->succ_begin();
2201    BlockT *blk2 = *(++srcBlk->succ_begin());
2202
2203    if (blk1 == blk2) {
2204      InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
2205      assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
2206      if (DEBUGME) {
2207        errs() << "Removing unneeded conditional branch instruction" ;
2208        branchInstr->dump();
2209      }
2210      branchInstr->eraseFromParent();
2211      SHOWNEWBLK(blk1, "Removing redundant successor");
2212      srcBlk->removeSuccessor(blk1);
2213    }
2214  }
2215} //removeRedundantConditionalBranch
2216
2217template<class PassT>
2218void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
2219                                               DEFAULT_VEC_SLOTS> &retBlks) {
2220  BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
2221  funcRep->push_back(dummyExitBlk);  //insert to function
2222  CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep);
2223
2224  for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter =
2225         retBlks.begin(),
2226       iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
2227    BlockT *curBlk = *iter;
2228    InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
2229    if (curInstr) {
2230      curInstr->eraseFromParent();
2231    }
2232#if 0
2233    if (curBlk->size()==0 && curBlk->pred_size() == 1) {
2234      if (DEBUGME) {
2235        errs() << "Replace empty block BB" <<  curBlk->getNumber()
2236          << " with dummyExitBlock\n";
2237      }
2238      BlockT *predb = *curBlk->pred_begin();
2239      predb->removeSuccessor(curBlk);
2240      curBlk = predb;
2241    } //handle empty curBlk
2242#endif
2243    curBlk->addSuccessor(dummyExitBlk);
2244    if (DEBUGME) {
2245      errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
2246             << " successors\n";
2247    }
2248  } //for
2249
2250  SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
2251} //addDummyExitBlock
2252
2253template<class PassT>
2254void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
2255  while (srcBlk->succ_size()) {
2256    srcBlk->removeSuccessor(*srcBlk->succ_begin());
2257  }
2258}
2259
2260template<class PassT>
2261void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
2262  BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
2263
2264  if (srcBlkInfo == NULL) {
2265    srcBlkInfo = new BlockInfo();
2266  }
2267
2268  srcBlkInfo->sccNum = sccNum;
2269}
2270
2271template<class PassT>
2272int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
2273  BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
2274  return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
2275}
2276
2277template<class PassT>
2278void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
2279  if (DEBUGME) {
2280        errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
2281  }
2282
2283  BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
2284
2285  if (srcBlkInfo == NULL) {
2286    srcBlkInfo = new BlockInfo();
2287  }
2288
2289  srcBlkInfo->isRetired = true;
2290  //int i = srcBlk->succ_size();
2291  //int j = srcBlk->pred_size();
2292  assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
2293         && "can't retire block yet");
2294}
2295
2296template<class PassT>
2297bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
2298  BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
2299  return (srcBlkInfo && srcBlkInfo->isRetired);
2300}
2301
2302template<class PassT>
2303bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
2304  LoopT *loopRep = loopInfo->getLoopFor(curBlk);
2305  while (loopRep && loopRep->getHeader() == curBlk) {
2306    LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
2307
2308    if(loopLand == NULL)
2309      return true;
2310
2311    BlockT *landBlk = loopLand->landBlk;
2312    assert(landBlk);
2313    if (!isRetiredBlock(landBlk)) {
2314      return true;
2315    }
2316
2317    loopRep = loopRep->getParentLoop();
2318  }
2319
2320  return false;
2321} //isActiveLoophead
2322
2323template<class PassT>
2324bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
2325  const unsigned blockSizeThreshold = 30;
2326  const unsigned cloneInstrThreshold = 100;
2327
2328  bool multiplePreds = blk && (blk->pred_size() > 1);
2329
2330  if(!multiplePreds)
2331    return false;
2332
2333  unsigned blkSize = blk->size();
2334  return ((blkSize > blockSizeThreshold)
2335          && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
2336} //needMigrateBlock
2337
2338template<class PassT>
2339typename CFGStructurizer<PassT>::BlockT *
2340CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
2341                                            BlockTSmallerVector &exitBlks,
2342                                            std::set<BlockT *> &exitBlkSet) {
2343  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks;  //in exit path blocks
2344
2345  for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
2346       predIterEnd = landBlk->pred_end();
2347       predIter != predIterEnd; ++predIter) {
2348    BlockT *curBlk = *predIter;
2349    if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
2350      inpathBlks.push_back(curBlk);
2351    }
2352  } //for
2353
2354  //if landBlk has predecessors that are not in the given loop,
2355  //create a new block
2356  BlockT *newLandBlk = landBlk;
2357  if (inpathBlks.size() != landBlk->pred_size()) {
2358    newLandBlk = funcRep->CreateMachineBasicBlock();
2359    funcRep->push_back(newLandBlk);  //insert to function
2360    newLandBlk->addSuccessor(landBlk);
2361    for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter =
2362         inpathBlks.begin(),
2363         iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
2364      BlockT *curBlk = *iter;
2365      CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
2366      //srcBlk, oldBlk, newBlk
2367      curBlk->removeSuccessor(landBlk);
2368      curBlk->addSuccessor(newLandBlk);
2369    }
2370    for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
2371      if (exitBlks[i] == landBlk) {
2372        exitBlks[i] = newLandBlk;
2373      }
2374    }
2375    SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
2376  }
2377
2378  setLoopLandBlock(loopRep, newLandBlk);
2379
2380  return newLandBlk;
2381} // recordLoopbreakLand
2382
2383template<class PassT>
2384void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
2385  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2386
2387  if (theEntry == NULL) {
2388    theEntry = new LoopLandInfo();
2389  }
2390  assert(theEntry->landBlk == NULL);
2391
2392  if (blk == NULL) {
2393    blk = funcRep->CreateMachineBasicBlock();
2394    funcRep->push_back(blk);  //insert to function
2395    SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
2396  }
2397
2398  theEntry->landBlk = blk;
2399
2400  if (DEBUGME) {
2401    errs() << "setLoopLandBlock loop-header = BB"
2402           << loopRep->getHeader()->getNumber()
2403           << "  landing-block = BB" << blk->getNumber() << "\n";
2404  }
2405} // setLoopLandBlock
2406
2407template<class PassT>
2408void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
2409  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2410
2411  if (theEntry == NULL) {
2412    theEntry = new LoopLandInfo();
2413  }
2414
2415  theEntry->breakOnRegs.insert(regNum);
2416
2417  if (DEBUGME) {
2418    errs() << "addLoopBreakOnReg loop-header = BB"
2419           << loopRep->getHeader()->getNumber()
2420           << "  regNum = " << regNum << "\n";
2421  }
2422} // addLoopBreakOnReg
2423
2424template<class PassT>
2425void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
2426  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2427
2428  if (theEntry == NULL) {
2429    theEntry = new LoopLandInfo();
2430  }
2431  theEntry->contOnRegs.insert(regNum);
2432
2433  if (DEBUGME) {
2434    errs() << "addLoopContOnReg loop-header = BB"
2435           << loopRep->getHeader()->getNumber()
2436           << "  regNum = " << regNum << "\n";
2437  }
2438} // addLoopContOnReg
2439
2440template<class PassT>
2441void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
2442  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2443
2444  if (theEntry == NULL) {
2445    theEntry = new LoopLandInfo();
2446  }
2447  theEntry->breakInitRegs.insert(regNum);
2448
2449  if (DEBUGME) {
2450    errs() << "addLoopBreakInitReg loop-header = BB"
2451           << loopRep->getHeader()->getNumber()
2452           << "  regNum = " << regNum << "\n";
2453  }
2454} // addLoopBreakInitReg
2455
2456template<class PassT>
2457void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
2458  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2459
2460  if (theEntry == NULL) {
2461    theEntry = new LoopLandInfo();
2462  }
2463  theEntry->contInitRegs.insert(regNum);
2464
2465  if (DEBUGME) {
2466    errs() << "addLoopContInitReg loop-header = BB"
2467           << loopRep->getHeader()->getNumber()
2468           << "  regNum = " << regNum << "\n";
2469  }
2470} // addLoopContInitReg
2471
2472template<class PassT>
2473void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
2474                                                     RegiT regNum) {
2475  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2476
2477  if (theEntry == NULL) {
2478    theEntry = new LoopLandInfo();
2479  }
2480  theEntry->endbranchInitRegs.insert(regNum);
2481
2482  if (DEBUGME)
2483  {
2484        errs() << "addLoopEndbranchInitReg loop-header = BB"
2485      << loopRep->getHeader()->getNumber()
2486      << "  regNum = " << regNum << "\n";
2487  }
2488} // addLoopEndbranchInitReg
2489
2490template<class PassT>
2491typename CFGStructurizer<PassT>::LoopLandInfo *
2492CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
2493  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2494
2495  return theEntry;
2496} // getLoopLandInfo
2497
2498template<class PassT>
2499typename CFGStructurizer<PassT>::BlockT *
2500CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
2501  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
2502
2503  return theEntry ? theEntry->landBlk : NULL;
2504} // getLoopLandBlock
2505
2506
2507template<class PassT>
2508bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
2509  LoopT *loopRep = loopInfo->getLoopFor(curBlk);
2510  if (loopRep == NULL)
2511    return false;
2512
2513  BlockT *loopHeader = loopRep->getHeader();
2514
2515  return curBlk->isSuccessor(loopHeader);
2516
2517} //hasBackEdge
2518
2519template<class PassT>
2520unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
2521  return loopRep ? loopRep->getLoopDepth() : 0;
2522} //getLoopDepth
2523
2524template<class PassT>
2525int CFGStructurizer<PassT>::countActiveBlock
2526(typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
2527 typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) {
2528  int count = 0;
2529  while (iterStart != iterEnd) {
2530    if (!isRetiredBlock(*iterStart)) {
2531      ++count;
2532    }
2533    ++iterStart;
2534  }
2535
2536  return count;
2537} //countActiveBlock
2538
2539// This is work around solution for findNearestCommonDominator not avaiable to
2540// post dom a proper fix should go to Dominators.h.
2541
2542template<class PassT>
2543typename CFGStructurizer<PassT>::BlockT*
2544CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
2545
2546  if (postDomTree->dominates(blk1, blk2)) {
2547    return blk1;
2548  }
2549  if (postDomTree->dominates(blk2, blk1)) {
2550    return blk2;
2551  }
2552
2553  DomTreeNodeT *node1 = postDomTree->getNode(blk1);
2554  DomTreeNodeT *node2 = postDomTree->getNode(blk2);
2555
2556  // Handle newly cloned node.
2557  if (node1 == NULL && blk1->succ_size() == 1) {
2558    return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
2559  }
2560  if (node2 == NULL && blk2->succ_size() == 1) {
2561    return findNearestCommonPostDom(blk1, *blk2->succ_begin());
2562  }
2563
2564  if (node1 == NULL || node2 == NULL) {
2565    return NULL;
2566  }
2567
2568  node1 = node1->getIDom();
2569  while (node1) {
2570    if (postDomTree->dominates(node1, node2)) {
2571      return node1->getBlock();
2572    }
2573    node1 = node1->getIDom();
2574  }
2575
2576  return NULL;
2577}
2578
2579template<class PassT>
2580typename CFGStructurizer<PassT>::BlockT *
2581CFGStructurizer<PassT>::findNearestCommonPostDom
2582(typename std::set<BlockT *> &blks) {
2583  BlockT *commonDom;
2584  typename std::set<BlockT *>::const_iterator iter = blks.begin();
2585  typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
2586  for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
2587    BlockT *curBlk = *iter;
2588    if (curBlk != commonDom) {
2589      commonDom = findNearestCommonPostDom(curBlk, commonDom);
2590    }
2591  }
2592
2593  if (DEBUGME) {
2594    errs() << "Common post dominator for exit blocks is ";
2595    if (commonDom) {
2596          errs() << "BB" << commonDom->getNumber() << "\n";
2597    } else {
2598      errs() << "NULL\n";
2599    }
2600  }
2601
2602  return commonDom;
2603} //findNearestCommonPostDom
2604
2605} //end namespace llvm
2606
2607//todo: move-end
2608
2609
2610//===----------------------------------------------------------------------===//
2611//
2612// CFGStructurizer for AMDIL
2613//
2614//===----------------------------------------------------------------------===//
2615
2616
2617using namespace llvmCFGStruct;
2618
2619namespace llvm
2620{
2621class AMDILCFGStructurizer : public MachineFunctionPass
2622{
2623public:
2624  typedef MachineInstr              InstructionType;
2625  typedef MachineFunction           FunctionType;
2626  typedef MachineBasicBlock         BlockType;
2627  typedef MachineLoopInfo           LoopinfoType;
2628  typedef MachineDominatorTree      DominatortreeType;
2629  typedef MachinePostDominatorTree  PostDominatortreeType;
2630  typedef MachineDomTreeNode        DomTreeNodeType;
2631  typedef MachineLoop               LoopType;
2632
2633protected:
2634  TargetMachine &TM;
2635  const TargetInstrInfo *TII;
2636  const AMDGPURegisterInfo *TRI;
2637
2638public:
2639  AMDILCFGStructurizer(char &pid, TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
2640  const TargetInstrInfo *getTargetInstrInfo() const;
2641  //bool runOnMachineFunction(MachineFunction &F);
2642
2643private:
2644
2645};   //end of class AMDILCFGStructurizer
2646
2647//char AMDILCFGStructurizer::ID = 0;
2648} //end of namespace llvm
2649AMDILCFGStructurizer::AMDILCFGStructurizer(char &pid, TargetMachine &tm
2650                                           AMDIL_OPT_LEVEL_DECL)
2651: MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()),
2652  TRI(static_cast<const AMDGPURegisterInfo *>(tm.getRegisterInfo())
2653  ) {
2654}
2655
2656const TargetInstrInfo *AMDILCFGStructurizer::getTargetInstrInfo() const {
2657  return TII;
2658}
2659//===----------------------------------------------------------------------===//
2660//
2661// CFGPrepare
2662//
2663//===----------------------------------------------------------------------===//
2664
2665
2666using namespace llvmCFGStruct;
2667
2668namespace llvm
2669{
2670class AMDILCFGPrepare : public AMDILCFGStructurizer
2671{
2672public:
2673  static char ID;
2674
2675public:
2676  AMDILCFGPrepare(TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
2677
2678  virtual const char *getPassName() const;
2679  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
2680
2681  bool runOnMachineFunction(MachineFunction &F);
2682
2683private:
2684
2685};   //end of class AMDILCFGPrepare
2686
2687char AMDILCFGPrepare::ID = 0;
2688} //end of namespace llvm
2689
2690AMDILCFGPrepare::AMDILCFGPrepare(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
2691  : AMDILCFGStructurizer(ID, tm  AMDIL_OPT_LEVEL_VAR)
2692{
2693}
2694const char *AMDILCFGPrepare::getPassName() const {
2695  return "AMD IL Control Flow Graph Preparation Pass";
2696}
2697
2698void AMDILCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
2699  AU.addPreserved<MachineFunctionAnalysis>();
2700  AU.addRequired<MachineFunctionAnalysis>();
2701  AU.addRequired<MachineDominatorTree>();
2702  AU.addRequired<MachinePostDominatorTree>();
2703  AU.addRequired<MachineLoopInfo>();
2704}
2705
2706//===----------------------------------------------------------------------===//
2707//
2708// CFGPerform
2709//
2710//===----------------------------------------------------------------------===//
2711
2712
2713using namespace llvmCFGStruct;
2714
2715namespace llvm
2716{
2717class AMDILCFGPerform : public AMDILCFGStructurizer
2718{
2719public:
2720  static char ID;
2721
2722public:
2723  AMDILCFGPerform(TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
2724  virtual const char *getPassName() const;
2725  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
2726  bool runOnMachineFunction(MachineFunction &F);
2727
2728private:
2729
2730};   //end of class AMDILCFGPerform
2731
2732char AMDILCFGPerform::ID = 0;
2733} //end of namespace llvm
2734
2735  AMDILCFGPerform::AMDILCFGPerform(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
2736: AMDILCFGStructurizer(ID, tm AMDIL_OPT_LEVEL_VAR)
2737{
2738}
2739
2740const char *AMDILCFGPerform::getPassName() const {
2741  return "AMD IL Control Flow Graph structurizer Pass";
2742}
2743
2744void AMDILCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
2745  AU.addPreserved<MachineFunctionAnalysis>();
2746  AU.addRequired<MachineFunctionAnalysis>();
2747  AU.addRequired<MachineDominatorTree>();
2748  AU.addRequired<MachinePostDominatorTree>();
2749  AU.addRequired<MachineLoopInfo>();
2750}
2751
2752//===----------------------------------------------------------------------===//
2753//
2754// CFGStructTraits<AMDILCFGStructurizer>
2755//
2756//===----------------------------------------------------------------------===//
2757
2758namespace llvmCFGStruct
2759{
2760// this class is tailor to the AMDIL backend
2761template<>
2762struct CFGStructTraits<AMDILCFGStructurizer>
2763{
2764  typedef int RegiT;
2765
2766  static int getBreakNzeroOpcode(int oldOpcode) {
2767    switch(oldOpcode) {
2768      ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::BREAK_LOGICALNZ);
2769    default:
2770      assert(0 && "internal error");
2771    };
2772    return -1;
2773  }
2774
2775  static int getBreakZeroOpcode(int oldOpcode) {
2776    switch(oldOpcode) {
2777      ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::BREAK_LOGICALZ);
2778    default:
2779      assert(0 && "internal error");
2780    };
2781    return -1;
2782  }
2783
2784  static int getBranchNzeroOpcode(int oldOpcode) {
2785    switch(oldOpcode) {
2786      ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALNZ);
2787      case AMDGPU::SI_IF_NZ: return AMDGPU::SI_IF_NZ;
2788    default:
2789      assert(0 && "internal error");
2790    };
2791    return -1;
2792  }
2793
2794  static int getBranchZeroOpcode(int oldOpcode) {
2795    switch(oldOpcode) {
2796      ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALZ);
2797      case AMDGPU::SI_IF_Z: return AMDGPU::SI_IF_Z;
2798    default:
2799      assert(0 && "internal error");
2800    };
2801    return -1;
2802  }
2803
2804  static int getContinueNzeroOpcode(int oldOpcode)
2805  {
2806    switch(oldOpcode) {
2807      ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::CONTINUE_LOGICALNZ);
2808      default:
2809        assert(0 && "internal error");
2810    };
2811    return -1;
2812  }
2813
2814  static int getContinueZeroOpcode(int oldOpcode) {
2815    switch(oldOpcode) {
2816      ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::CONTINUE_LOGICALZ);
2817    default:
2818      assert(0 && "internal error");
2819    };
2820    return -1;
2821  }
2822
2823// the explicitly represented branch target is the true branch target
2824#define getExplicitBranch getTrueBranch
2825#define setExplicitBranch setTrueBranch
2826
2827  static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
2828    return instr->getOperand(0).getMBB();
2829  }
2830
2831  static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
2832    instr->getOperand(0).setMBB(blk);
2833  }
2834
2835  static MachineBasicBlock *
2836  getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
2837    assert(blk->succ_size() == 2);
2838    MachineBasicBlock *trueBranch = getTrueBranch(instr);
2839    MachineBasicBlock::succ_iterator iter = blk->succ_begin();
2840    MachineBasicBlock::succ_iterator iterNext = iter;
2841    ++iterNext;
2842
2843    return (*iter == trueBranch) ? *iterNext : *iter;
2844  }
2845
2846  static bool isCondBranch(MachineInstr *instr) {
2847    switch (instr->getOpcode()) {
2848      ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
2849      case AMDGPU::SI_IF_NZ:
2850      case AMDGPU::SI_IF_Z:
2851      break;
2852    default:
2853      return false;
2854    }
2855    return true;
2856  }
2857
2858  static bool isUncondBranch(MachineInstr *instr) {
2859    switch (instr->getOpcode()) {
2860    case AMDGPU::BRANCH:
2861      break;
2862    default:
2863      return false;
2864    }
2865    return true;
2866  }
2867
2868  static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
2869    //get DebugLoc from the first MachineBasicBlock instruction with debug info
2870    DebugLoc DL;
2871	for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
2872	  MachineInstr *instr = &(*iter);
2873	  if (instr->getDebugLoc().isUnknown() == false) {
2874	    DL = instr->getDebugLoc();
2875	  }
2876    }
2877    return DL;
2878  }
2879
2880  static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
2881    MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2882    MachineInstr *instr = &*iter;
2883    if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
2884      return instr;
2885    }
2886    return NULL;
2887  }
2888
2889  // The correct naming for this is getPossibleLoopendBlockBranchInstr.
2890  //
2891  // BB with backward-edge could have move instructions after the branch
2892  // instruction.  Such move instruction "belong to" the loop backward-edge.
2893  //
2894  static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
2895    const AMDILInstrInfo * TII = static_cast<const AMDILInstrInfo *>(
2896                                  blk->getParent()->getTarget().getInstrInfo());
2897
2898    for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
2899         iterEnd = blk->rend(); iter != iterEnd; ++iter) {
2900      // FIXME: Simplify
2901      MachineInstr *instr = &*iter;
2902      if (instr) {
2903        if (isCondBranch(instr) || isUncondBranch(instr)) {
2904          return instr;
2905        } else if (!TII->isMov(instr->getOpcode())) {
2906          break;
2907        }
2908      }
2909    }
2910    return NULL;
2911  }
2912
2913  static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
2914    MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2915    if (iter != blk->rend()) {
2916      MachineInstr *instr = &(*iter);
2917      if (instr->getOpcode() == AMDGPU::RETURN) {
2918        return instr;
2919      }
2920    }
2921    return NULL;
2922  }
2923
2924  static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
2925    MachineBasicBlock::reverse_iterator iter = blk->rbegin();
2926    if (iter != blk->rend()) {
2927      MachineInstr *instr = &(*iter);
2928      if (instr->getOpcode() == AMDGPU::CONTINUE) {
2929        return instr;
2930      }
2931    }
2932    return NULL;
2933  }
2934
2935  static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
2936    for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
2937      MachineInstr *instr = &(*iter);
2938      if ((instr->getOpcode() == AMDGPU::BREAK_LOGICALNZ_i32) || (instr->getOpcode() == AMDGPU::BREAK_LOGICALZ_i32)) {
2939        return instr;
2940      }
2941    }
2942    return NULL;
2943  }
2944
2945  static bool isReturnBlock(MachineBasicBlock *blk) {
2946    MachineInstr *instr = getReturnInstr(blk);
2947    bool isReturn = (blk->succ_size() == 0);
2948    if (instr) {
2949      assert(isReturn);
2950    } else if (isReturn) {
2951      if (DEBUGME) {
2952        errs() << "BB" << blk->getNumber()
2953               <<" is return block without RETURN instr\n";
2954      }
2955    }
2956
2957    return  isReturn;
2958  }
2959
2960  static MachineBasicBlock::iterator
2961  getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
2962    assert(instr->getParent() == blk && "instruction doesn't belong to block");
2963    MachineBasicBlock::iterator iter = blk->begin();
2964    MachineBasicBlock::iterator iterEnd = blk->end();
2965    while (&(*iter) != instr && iter != iterEnd) {
2966      ++iter;
2967    }
2968
2969    assert(iter != iterEnd);
2970    return iter;
2971  }//getInstrPos
2972
2973  static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
2974                                         AMDILCFGStructurizer *passRep) {
2975    return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
2976  } //insertInstrBefore
2977
2978  static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
2979                                         AMDILCFGStructurizer *passRep, DebugLoc DL) {
2980    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
2981    MachineInstr *newInstr =
2982      blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
2983
2984    MachineBasicBlock::iterator res;
2985    if (blk->begin() != blk->end()) {
2986      blk->insert(blk->begin(), newInstr);
2987    } else {
2988      blk->push_back(newInstr);
2989    }
2990
2991    SHOWNEWINSTR(newInstr);
2992
2993    return newInstr;
2994  } //insertInstrBefore
2995
2996  static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
2997                             AMDILCFGStructurizer *passRep) {
2998    insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
2999  } //insertInstrEnd
3000
3001  static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
3002                             AMDILCFGStructurizer *passRep, DebugLoc DL) {
3003    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3004   MachineInstr *newInstr = blk->getParent()
3005      ->CreateMachineInstr(tii->get(newOpcode), DL);
3006
3007    blk->push_back(newInstr);
3008    //assume the instruction doesn't take any reg operand ...
3009
3010    SHOWNEWINSTR(newInstr);
3011  } //insertInstrEnd
3012
3013  static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
3014                                         int newOpcode,
3015                                         AMDILCFGStructurizer *passRep) {
3016    MachineInstr *oldInstr = &(*instrPos);
3017    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3018    MachineBasicBlock *blk = oldInstr->getParent();
3019    MachineInstr *newInstr =
3020      blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
3021                                           DebugLoc());
3022
3023    blk->insert(instrPos, newInstr);
3024    //assume the instruction doesn't take any reg operand ...
3025
3026    SHOWNEWINSTR(newInstr);
3027    return newInstr;
3028  } //insertInstrBefore
3029
3030  static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
3031                                     int newOpcode,
3032                                     AMDILCFGStructurizer *passRep,
3033									 DebugLoc DL) {
3034    MachineInstr *oldInstr = &(*instrPos);
3035    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3036    MachineBasicBlock *blk = oldInstr->getParent();
3037    MachineInstr *newInstr =
3038      blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
3039                                           DL);
3040
3041    blk->insert(instrPos, newInstr);
3042    MachineInstrBuilder(newInstr).addReg(oldInstr->getOperand(1).getReg(),
3043                                         false);
3044
3045    SHOWNEWINSTR(newInstr);
3046    //erase later oldInstr->eraseFromParent();
3047  } //insertCondBranchBefore
3048
3049  static void insertCondBranchBefore(MachineBasicBlock *blk,
3050                                     MachineBasicBlock::iterator insertPos,
3051                                     int newOpcode,
3052                                     AMDILCFGStructurizer *passRep,
3053                                     RegiT regNum,
3054									 DebugLoc DL) {
3055    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3056
3057    MachineInstr *newInstr =
3058      blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
3059
3060    //insert before
3061    blk->insert(insertPos, newInstr);
3062    MachineInstrBuilder(newInstr).addReg(regNum, false);
3063
3064    SHOWNEWINSTR(newInstr);
3065  } //insertCondBranchBefore
3066
3067  static void insertCondBranchEnd(MachineBasicBlock *blk,
3068                                  int newOpcode,
3069                                  AMDILCFGStructurizer *passRep,
3070                                  RegiT regNum) {
3071    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
3072    MachineInstr *newInstr =
3073      blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
3074
3075    blk->push_back(newInstr);
3076    MachineInstrBuilder(newInstr).addReg(regNum, false);
3077
3078    SHOWNEWINSTR(newInstr);
3079  } //insertCondBranchEnd
3080
3081
3082  static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
3083                                      AMDILCFGStructurizer *passRep,
3084                                      RegiT regNum, int regVal) {
3085    MachineInstr *oldInstr = &(*instrPos);
3086    const AMDILInstrInfo *tii =
3087             static_cast<const AMDILInstrInfo *>(passRep->getTargetInstrInfo());
3088    MachineBasicBlock *blk = oldInstr->getParent();
3089    MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
3090                                                 regVal);
3091    blk->insert(instrPos, newInstr);
3092
3093    SHOWNEWINSTR(newInstr);
3094  } //insertAssignInstrBefore
3095
3096  static void insertAssignInstrBefore(MachineBasicBlock *blk,
3097                                      AMDILCFGStructurizer *passRep,
3098                                      RegiT regNum, int regVal) {
3099    const AMDILInstrInfo *tii =
3100             static_cast<const AMDILInstrInfo *>(passRep->getTargetInstrInfo());
3101
3102    MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
3103                                                 regVal);
3104    if (blk->begin() != blk->end()) {
3105      blk->insert(blk->begin(), newInstr);
3106    } else {
3107      blk->push_back(newInstr);
3108    }
3109
3110    SHOWNEWINSTR(newInstr);
3111
3112  } //insertInstrBefore
3113
3114  static void insertCompareInstrBefore(MachineBasicBlock *blk,
3115                                       MachineBasicBlock::iterator instrPos,
3116                                       AMDILCFGStructurizer *passRep,
3117                                       RegiT dstReg, RegiT src1Reg,
3118                                       RegiT src2Reg) {
3119    const AMDILInstrInfo *tii =
3120             static_cast<const AMDILInstrInfo *>(passRep->getTargetInstrInfo());
3121    MachineInstr *newInstr =
3122      blk->getParent()->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc());
3123
3124    MachineInstrBuilder(newInstr).addReg(dstReg, RegState::Define); //set target
3125    MachineInstrBuilder(newInstr).addReg(src1Reg); //set src value
3126    MachineInstrBuilder(newInstr).addReg(src2Reg); //set src value
3127
3128    blk->insert(instrPos, newInstr);
3129    SHOWNEWINSTR(newInstr);
3130
3131  } //insertCompareInstrBefore
3132
3133  static void cloneSuccessorList(MachineBasicBlock *dstBlk,
3134                                 MachineBasicBlock *srcBlk) {
3135    for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
3136         iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
3137      dstBlk->addSuccessor(*iter);  // *iter's predecessor is also taken care of
3138    }
3139  } //cloneSuccessorList
3140
3141  static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
3142    MachineFunction *func = srcBlk->getParent();
3143    MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
3144    func->push_back(newBlk);  //insert to function
3145    //newBlk->setNumber(srcBlk->getNumber());
3146    for (MachineBasicBlock::iterator iter = srcBlk->begin(),
3147         iterEnd = srcBlk->end();
3148         iter != iterEnd; ++iter) {
3149      MachineInstr *instr = func->CloneMachineInstr(iter);
3150      newBlk->push_back(instr);
3151    }
3152    return newBlk;
3153  }
3154
3155  //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
3156  //the AMDIL instruction is not recognized as terminator fix this and retire
3157  //this routine
3158  static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
3159                                         MachineBasicBlock *oldBlk,
3160                                         MachineBasicBlock *newBlk) {
3161    MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
3162    if (branchInstr && isCondBranch(branchInstr) &&
3163        getExplicitBranch(branchInstr) == oldBlk) {
3164      setExplicitBranch(branchInstr, newBlk);
3165    }
3166  }
3167
3168  static void wrapup(MachineBasicBlock *entryBlk) {
3169    assert((!entryBlk->getParent()->getJumpTableInfo()
3170            || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
3171           && "found a jump table");
3172
3173     //collect continue right before endloop
3174     SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
3175     MachineBasicBlock::iterator pre = entryBlk->begin();
3176     MachineBasicBlock::iterator iterEnd = entryBlk->end();
3177     MachineBasicBlock::iterator iter = pre;
3178     while (iter != iterEnd) {
3179       if (pre->getOpcode() == AMDGPU::CONTINUE
3180           && iter->getOpcode() == AMDGPU::ENDLOOP) {
3181         contInstr.push_back(pre);
3182       }
3183       pre = iter;
3184       ++iter;
3185     } //end while
3186
3187     //delete continue right before endloop
3188     for (unsigned i = 0; i < contInstr.size(); ++i) {
3189        contInstr[i]->eraseFromParent();
3190     }
3191
3192     // TODO to fix up jump table so later phase won't be confused.  if
3193     // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
3194     // there isn't such an interface yet.  alternatively, replace all the other
3195     // blocks in the jump table with the entryBlk //}
3196
3197  } //wrapup
3198
3199  static MachineDominatorTree *getDominatorTree(AMDILCFGStructurizer &pass) {
3200    return &pass.getAnalysis<MachineDominatorTree>();
3201  }
3202
3203  static MachinePostDominatorTree*
3204  getPostDominatorTree(AMDILCFGStructurizer &pass) {
3205    return &pass.getAnalysis<MachinePostDominatorTree>();
3206  }
3207
3208  static MachineLoopInfo *getLoopInfo(AMDILCFGStructurizer &pass) {
3209    return &pass.getAnalysis<MachineLoopInfo>();
3210  }
3211}; // template class CFGStructTraits
3212} //end of namespace llvm
3213
3214// createAMDILCFGPreparationPass- Returns a pass
3215FunctionPass *llvm::createAMDILCFGPreparationPass(TargetMachine &tm
3216                                                  AMDIL_OPT_LEVEL_DECL) {
3217  return new AMDILCFGPrepare(tm  AMDIL_OPT_LEVEL_VAR);
3218}
3219
3220bool AMDILCFGPrepare::runOnMachineFunction(MachineFunction &func) {
3221  return llvmCFGStruct::CFGStructurizer<AMDILCFGStructurizer>().prepare(func,
3222                                                                        *this,
3223                                                                        TRI);
3224}
3225
3226// createAMDILCFGStructurizerPass- Returns a pass
3227FunctionPass *llvm::createAMDILCFGStructurizerPass(TargetMachine &tm
3228                                                   AMDIL_OPT_LEVEL_DECL) {
3229  return new AMDILCFGPerform(tm  AMDIL_OPT_LEVEL_VAR);
3230}
3231
3232bool AMDILCFGPerform::runOnMachineFunction(MachineFunction &func) {
3233  return llvmCFGStruct::CFGStructurizer<AMDILCFGStructurizer>().run(func,
3234                                                                    *this,
3235                                                                    TRI);
3236}
3237
3238//end of file newline goes below
3239
3240