AMDILCFGStructurizer.cpp revision 3a0187b1b53eca3143286a5ae7917cd71117b902
1//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//==-----------------------------------------------------------------------===// 9 10#define DEBUGME 0 11#define DEBUG_TYPE "structcfg" 12 13#include "AMDIL.h" 14#include "AMDILInstrInfo.h" 15#include "AMDILUtilityFunctions.h" 16#include "llvm/ADT/SCCIterator.h" 17#include "llvm/ADT/SmallVector.h" 18#include "llvm/ADT/Statistic.h" 19#include "llvm/Analysis/DominatorInternals.h" 20#include "llvm/Analysis/Dominators.h" 21#include "llvm/CodeGen/MachineDominators.h" 22#include "llvm/CodeGen/MachineDominators.h" 23#include "llvm/CodeGen/MachineFunction.h" 24#include "llvm/CodeGen/MachineFunctionAnalysis.h" 25#include "llvm/CodeGen/MachineFunctionPass.h" 26#include "llvm/CodeGen/MachineFunctionPass.h" 27#include "llvm/CodeGen/MachineInstrBuilder.h" 28#include "llvm/CodeGen/MachineJumpTableInfo.h" 29#include "llvm/CodeGen/MachineLoopInfo.h" 30#include "llvm/CodeGen/MachineRegisterInfo.h" 31#include "llvm/Target/TargetInstrInfo.h" 32 33#define FirstNonDebugInstr(A) A->begin() 34using namespace llvm; 35 36// TODO: move-begin. 37 38//===----------------------------------------------------------------------===// 39// 40// Statistics for CFGStructurizer. 41// 42//===----------------------------------------------------------------------===// 43 44STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern " 45 "matched"); 46STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern " 47 "matched"); 48STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break " 49 "pattern matched"); 50STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue " 51 "pattern matched"); 52STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern " 53 "matched"); 54STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks"); 55STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions"); 56 57//===----------------------------------------------------------------------===// 58// 59// Miscellaneous utility for CFGStructurizer. 60// 61//===----------------------------------------------------------------------===// 62namespace llvmCFGStruct 63{ 64#define SHOWNEWINSTR(i) \ 65 if (DEBUGME) errs() << "New instr: " << *i << "\n" 66 67#define SHOWNEWBLK(b, msg) \ 68if (DEBUGME) { \ 69 errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ 70 errs() << "\n"; \ 71} 72 73#define SHOWBLK_DETAIL(b, msg) \ 74if (DEBUGME) { \ 75 if (b) { \ 76 errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ 77 b->print(errs()); \ 78 errs() << "\n"; \ 79 } \ 80} 81 82#define INVALIDSCCNUM -1 83#define INVALIDREGNUM 0 84 85template<class LoopinfoT> 86void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) { 87 for (typename LoopinfoT::iterator iter = LoopInfo.begin(), 88 iterEnd = LoopInfo.end(); 89 iter != iterEnd; ++iter) { 90 (*iter)->print(OS, 0); 91 } 92} 93 94template<class NodeT> 95void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) { 96 size_t sz = Src.size(); 97 for (size_t i = 0; i < sz/2; ++i) { 98 NodeT *t = Src[i]; 99 Src[i] = Src[sz - i - 1]; 100 Src[sz - i - 1] = t; 101 } 102} 103 104} //end namespace llvmCFGStruct 105 106 107//===----------------------------------------------------------------------===// 108// 109// MachinePostDominatorTree 110// 111//===----------------------------------------------------------------------===// 112 113namespace llvm { 114 115/// PostDominatorTree Class - Concrete subclass of DominatorTree that is used 116/// to compute the a post-dominator tree. 117/// 118struct MachinePostDominatorTree : public MachineFunctionPass { 119 static char ID; // Pass identification, replacement for typeid 120 DominatorTreeBase<MachineBasicBlock> *DT; 121 MachinePostDominatorTree() : MachineFunctionPass(ID) 122 { 123 DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate 124 // postdominator 125 } 126 127 ~MachinePostDominatorTree(); 128 129 virtual bool runOnMachineFunction(MachineFunction &MF); 130 131 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 132 AU.setPreservesAll(); 133 MachineFunctionPass::getAnalysisUsage(AU); 134 } 135 136 inline const std::vector<MachineBasicBlock *> &getRoots() const { 137 return DT->getRoots(); 138 } 139 140 inline MachineDomTreeNode *getRootNode() const { 141 return DT->getRootNode(); 142 } 143 144 inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const { 145 return DT->getNode(BB); 146 } 147 148 inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const { 149 return DT->getNode(BB); 150 } 151 152 inline bool dominates(MachineDomTreeNode *A, MachineDomTreeNode *B) const { 153 return DT->dominates(A, B); 154 } 155 156 inline bool dominates(MachineBasicBlock *A, MachineBasicBlock *B) const { 157 return DT->dominates(A, B); 158 } 159 160 inline bool 161 properlyDominates(const MachineDomTreeNode *A, MachineDomTreeNode *B) const { 162 return DT->properlyDominates(A, B); 163 } 164 165 inline bool 166 properlyDominates(MachineBasicBlock *A, MachineBasicBlock *B) const { 167 return DT->properlyDominates(A, B); 168 } 169 170 inline MachineBasicBlock * 171 findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B) { 172 return DT->findNearestCommonDominator(A, B); 173 } 174 175 virtual void print(llvm::raw_ostream &OS, const Module *M = 0) const { 176 DT->print(OS); 177 } 178}; 179} //end of namespace llvm 180 181char MachinePostDominatorTree::ID = 0; 182static RegisterPass<MachinePostDominatorTree> 183machinePostDominatorTreePass("machinepostdomtree", 184 "MachinePostDominator Tree Construction", 185 true, true); 186 187//const PassInfo *const llvm::MachinePostDominatorsID 188//= &machinePostDominatorTreePass; 189 190bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) { 191 DT->recalculate(F); 192 //DEBUG(DT->dump()); 193 return false; 194} 195 196MachinePostDominatorTree::~MachinePostDominatorTree() { 197 delete DT; 198} 199 200//===----------------------------------------------------------------------===// 201// 202// supporting data structure for CFGStructurizer 203// 204//===----------------------------------------------------------------------===// 205 206namespace llvmCFGStruct 207{ 208template<class PassT> 209struct CFGStructTraits { 210}; 211 212template <class InstrT> 213class BlockInformation { 214public: 215 bool isRetired; 216 int sccNum; 217 //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr; 218 //Instructions defining the corresponding successor. 219 BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {} 220}; 221 222template <class BlockT, class InstrT, class RegiT> 223class LandInformation { 224public: 225 BlockT *landBlk; 226 std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before 227 //WHILELOOP(thisloop) init before entering 228 //thisloop. 229 std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after 230 //WHILELOOP(thisloop) init after entering 231 //thisloop. 232 std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop 233 //land block, branch cond on this reg. 234 std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break 235 //endif" after ENDLOOP(thisloop) break 236 //outerLoopOf(thisLoop). 237 std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue 238 //endif" after ENDLOOP(thisloop) continue on 239 //outerLoopOf(thisLoop). 240 LandInformation() : landBlk(NULL) {} 241}; 242 243} //end of namespace llvmCFGStruct 244 245//===----------------------------------------------------------------------===// 246// 247// CFGStructurizer 248// 249//===----------------------------------------------------------------------===// 250 251namespace llvmCFGStruct 252{ 253// bixia TODO: port it to BasicBlock, not just MachineBasicBlock. 254template<class PassT> 255class CFGStructurizer 256{ 257public: 258 typedef enum { 259 Not_SinglePath = 0, 260 SinglePath_InPath = 1, 261 SinglePath_NotInPath = 2 262 } PathToKind; 263 264public: 265 typedef typename PassT::InstructionType InstrT; 266 typedef typename PassT::FunctionType FuncT; 267 typedef typename PassT::DominatortreeType DomTreeT; 268 typedef typename PassT::PostDominatortreeType PostDomTreeT; 269 typedef typename PassT::DomTreeNodeType DomTreeNodeT; 270 typedef typename PassT::LoopinfoType LoopInfoT; 271 272 typedef GraphTraits<FuncT *> FuncGTraits; 273 //typedef FuncGTraits::nodes_iterator BlockIterator; 274 typedef typename FuncT::iterator BlockIterator; 275 276 typedef typename FuncGTraits::NodeType BlockT; 277 typedef GraphTraits<BlockT *> BlockGTraits; 278 typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits; 279 //typedef BlockGTraits::succ_iterator InstructionIterator; 280 typedef typename BlockT::iterator InstrIterator; 281 282 typedef CFGStructTraits<PassT> CFGTraits; 283 typedef BlockInformation<InstrT> BlockInfo; 284 typedef std::map<BlockT *, BlockInfo *> BlockInfoMap; 285 286 typedef int RegiT; 287 typedef typename PassT::LoopType LoopT; 288 typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo; 289 typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap; 290 //landing info for loop break 291 typedef SmallVector<BlockT *, 32> BlockTSmallerVector; 292 293public: 294 CFGStructurizer(); 295 ~CFGStructurizer(); 296 297 /// Perform the CFG structurization 298 bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri); 299 300 /// Perform the CFG preparation 301 bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri); 302 303private: 304 void orderBlocks(); 305 void printOrderedBlocks(llvm::raw_ostream &OS); 306 int patternMatch(BlockT *CurBlock); 307 int patternMatchGroup(BlockT *CurBlock); 308 309 int serialPatternMatch(BlockT *CurBlock); 310 int ifPatternMatch(BlockT *CurBlock); 311 int switchPatternMatch(BlockT *CurBlock); 312 int loopendPatternMatch(BlockT *CurBlock); 313 int loopPatternMatch(BlockT *CurBlock); 314 315 int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader); 316 int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader); 317 //int loopWithoutBreak(BlockT *); 318 319 void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop, 320 BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock); 321 void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop, 322 BlockT *ContBlock, LoopT *contLoop); 323 bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block); 324 int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, 325 BlockT *FalseBlock); 326 int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock, 327 BlockT *FalseBlock); 328 int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, 329 BlockT *FalseBlock, BlockT **LandBlockPtr); 330 void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, 331 BlockT *FalseBlock, BlockT *LandBlock, 332 bool Detail = false); 333 PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock, 334 bool AllowSideEntry = true); 335 BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock, 336 bool AllowSideEntry = true); 337 int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock); 338 void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock); 339 340 void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock, 341 BlockT *TrueBlock, BlockT *FalseBlock, 342 BlockT *LandBlock); 343 void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand); 344 void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock, 345 BlockT *ExitLandBlock, RegiT SetReg); 346 void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock, 347 RegiT SetReg); 348 BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep, 349 std::set<BlockT*> &ExitBlockSet, 350 BlockT *ExitLandBlk); 351 BlockT *addLoopEndbranchBlock(LoopT *LoopRep, 352 BlockTSmallerVector &ExitingBlocks, 353 BlockTSmallerVector &ExitBlocks); 354 BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep); 355 void removeUnconditionalBranch(BlockT *SrcBlock); 356 void removeRedundantConditionalBranch(BlockT *SrcBlock); 357 void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks); 358 359 void removeSuccessor(BlockT *SrcBlock); 360 BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock); 361 BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock); 362 363 void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock, 364 InstrIterator InsertPos); 365 366 void recordSccnum(BlockT *SrcBlock, int SCCNum); 367 int getSCCNum(BlockT *srcBlk); 368 369 void retireBlock(BlockT *DstBlock, BlockT *SrcBlock); 370 bool isRetiredBlock(BlockT *SrcBlock); 371 bool isActiveLoophead(BlockT *CurBlock); 372 bool needMigrateBlock(BlockT *Block); 373 374 BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock, 375 BlockTSmallerVector &exitBlocks, 376 std::set<BlockT*> &ExitBlockSet); 377 void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL); 378 BlockT *getLoopLandBlock(LoopT *LoopRep); 379 LoopLandInfo *getLoopLandInfo(LoopT *LoopRep); 380 381 void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum); 382 void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum); 383 void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum); 384 void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum); 385 void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum); 386 387 bool hasBackEdge(BlockT *curBlock); 388 unsigned getLoopDepth (LoopT *LoopRep); 389 int countActiveBlock( 390 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart, 391 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd); 392 BlockT *findNearestCommonPostDom(std::set<BlockT *>&); 393 BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2); 394 395private: 396 DomTreeT *domTree; 397 PostDomTreeT *postDomTree; 398 LoopInfoT *loopInfo; 399 PassT *passRep; 400 FuncT *funcRep; 401 402 BlockInfoMap blockInfoMap; 403 LoopLandInfoMap loopLandInfoMap; 404 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks; 405 const AMDGPURegisterInfo *TRI; 406 407}; //template class CFGStructurizer 408 409template<class PassT> CFGStructurizer<PassT>::CFGStructurizer() 410 : domTree(NULL), postDomTree(NULL), loopInfo(NULL) { 411} 412 413template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() { 414 for (typename BlockInfoMap::iterator I = blockInfoMap.begin(), 415 E = blockInfoMap.end(); I != E; ++I) { 416 delete I->second; 417 } 418} 419 420template<class PassT> 421bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass, 422 const AMDGPURegisterInfo * tri) { 423 passRep = &pass; 424 funcRep = &func; 425 TRI = tri; 426 427 bool changed = false; 428 //func.RenumberBlocks(); 429 430 //to do, if not reducible flow graph, make it so ??? 431 432 if (DEBUGME) { 433 errs() << "AMDILCFGStructurizer::prepare\n"; 434 //func.viewCFG(); 435 //func.viewCFGOnly(); 436 //func.dump(); 437 } 438 439 //FIXME: gcc complains on this. 440 //domTree = &pass.getAnalysis<DomTreeT>(); 441 //domTree = CFGTraits::getDominatorTree(pass); 442 //if (DEBUGME) { 443 // domTree->print(errs()); 444 //} 445 446 //FIXME: gcc complains on this. 447 //domTree = &pass.getAnalysis<DomTreeT>(); 448 //postDomTree = CFGTraits::getPostDominatorTree(pass); 449 //if (DEBUGME) { 450 // postDomTree->print(errs()); 451 //} 452 453 //FIXME: gcc complains on this. 454 //loopInfo = &pass.getAnalysis<LoopInfoT>(); 455 loopInfo = CFGTraits::getLoopInfo(pass); 456 if (DEBUGME) { 457 errs() << "LoopInfo:\n"; 458 PrintLoopinfo(*loopInfo, errs()); 459 } 460 461 orderBlocks(); 462 if (DEBUGME) { 463 errs() << "Ordered blocks:\n"; 464 printOrderedBlocks(errs()); 465 } 466 467 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks; 468 469 for (typename LoopInfoT::iterator iter = loopInfo->begin(), 470 iterEnd = loopInfo->end(); 471 iter != iterEnd; ++iter) { 472 LoopT* loopRep = (*iter); 473 BlockTSmallerVector exitingBlks; 474 loopRep->getExitingBlocks(exitingBlks); 475 476 if (exitingBlks.size() == 0) { 477 BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep); 478 if (dummyExitBlk != NULL) 479 retBlks.push_back(dummyExitBlk); 480 } 481 } 482 483 // Remove unconditional branch instr. 484 // Add dummy exit block iff there are multiple returns. 485 486 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator 487 iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end(); 488 iterBlk != iterEndBlk; 489 ++iterBlk) { 490 BlockT *curBlk = *iterBlk; 491 removeUnconditionalBranch(curBlk); 492 removeRedundantConditionalBranch(curBlk); 493 if (CFGTraits::isReturnBlock(curBlk)) { 494 retBlks.push_back(curBlk); 495 } 496 assert(curBlk->succ_size() <= 2); 497 //assert(curBlk->size() > 0); 498 //removeEmptyBlock(curBlk) ?? 499 } //for 500 501 if (retBlks.size() >= 2) { 502 addDummyExitBlock(retBlks); 503 changed = true; 504 } 505 506 return changed; 507} //CFGStructurizer::prepare 508 509template<class PassT> 510bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass, 511 const AMDGPURegisterInfo * tri) { 512 passRep = &pass; 513 funcRep = &func; 514 TRI = tri; 515 516 //func.RenumberBlocks(); 517 518 //Assume reducible CFG... 519 if (DEBUGME) { 520 errs() << "AMDILCFGStructurizer::run\n"; 521 //errs() << func.getFunction()->getNameStr() << "\n"; 522 func.viewCFG(); 523 //func.viewCFGOnly(); 524 //func.dump(); 525 } 526 527#if 1 528 //FIXME: gcc complains on this. 529 //domTree = &pass.getAnalysis<DomTreeT>(); 530 domTree = CFGTraits::getDominatorTree(pass); 531 if (DEBUGME) { 532 domTree->print(errs(), (const llvm::Module*)0); 533 } 534#endif 535 536 //FIXME: gcc complains on this. 537 //domTree = &pass.getAnalysis<DomTreeT>(); 538 postDomTree = CFGTraits::getPostDominatorTree(pass); 539 if (DEBUGME) { 540 postDomTree->print(errs()); 541 } 542 543 //FIXME: gcc complains on this. 544 //loopInfo = &pass.getAnalysis<LoopInfoT>(); 545 loopInfo = CFGTraits::getLoopInfo(pass); 546 if (DEBUGME) { 547 errs() << "LoopInfo:\n"; 548 PrintLoopinfo(*loopInfo, errs()); 549 } 550 551 orderBlocks(); 552//#define STRESSTEST 553#ifdef STRESSTEST 554 //Use the worse block ordering to test the algorithm. 555 ReverseVector(orderedBlks); 556#endif 557 558 if (DEBUGME) { 559 errs() << "Ordered blocks:\n"; 560 printOrderedBlocks(errs()); 561 } 562 int numIter = 0; 563 bool finish = false; 564 BlockT *curBlk; 565 bool makeProgress = false; 566 int numRemainedBlk = countActiveBlock(orderedBlks.begin(), 567 orderedBlks.end()); 568 569 do { 570 ++numIter; 571 if (DEBUGME) { 572 errs() << "numIter = " << numIter 573 << ", numRemaintedBlk = " << numRemainedBlk << "\n"; 574 } 575 576 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator 577 iterBlk = orderedBlks.begin(); 578 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator 579 iterBlkEnd = orderedBlks.end(); 580 581 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator 582 sccBeginIter = iterBlk; 583 BlockT *sccBeginBlk = NULL; 584 int sccNumBlk = 0; // The number of active blocks, init to a 585 // maximum possible number. 586 int sccNumIter; // Number of iteration in this SCC. 587 588 while (iterBlk != iterBlkEnd) { 589 curBlk = *iterBlk; 590 591 if (sccBeginBlk == NULL) { 592 sccBeginIter = iterBlk; 593 sccBeginBlk = curBlk; 594 sccNumIter = 0; 595 sccNumBlk = numRemainedBlk; // Init to maximum possible number. 596 if (DEBUGME) { 597 errs() << "start processing SCC" << getSCCNum(sccBeginBlk); 598 errs() << "\n"; 599 } 600 } 601 602 if (!isRetiredBlock(curBlk)) { 603 patternMatch(curBlk); 604 } 605 606 ++iterBlk; 607 608 bool contNextScc = true; 609 if (iterBlk == iterBlkEnd 610 || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) { 611 // Just finish one scc. 612 ++sccNumIter; 613 int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk); 614 if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) { 615 if (DEBUGME) { 616 errs() << "Can't reduce SCC " << getSCCNum(curBlk) 617 << ", sccNumIter = " << sccNumIter; 618 errs() << "doesn't make any progress\n"; 619 } 620 contNextScc = true; 621 } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) { 622 sccNumBlk = sccRemainedNumBlk; 623 iterBlk = sccBeginIter; 624 contNextScc = false; 625 if (DEBUGME) { 626 errs() << "repeat processing SCC" << getSCCNum(curBlk) 627 << "sccNumIter = " << sccNumIter << "\n"; 628 func.viewCFG(); 629 //func.viewCFGOnly(); 630 } 631 } else { 632 // Finish the current scc. 633 contNextScc = true; 634 } 635 } else { 636 // Continue on next component in the current scc. 637 contNextScc = false; 638 } 639 640 if (contNextScc) { 641 sccBeginBlk = NULL; 642 } 643 } //while, "one iteration" over the function. 644 645 BlockT *entryBlk = FuncGTraits::nodes_begin(&func); 646 if (entryBlk->succ_size() == 0) { 647 finish = true; 648 if (DEBUGME) { 649 errs() << "Reduce to one block\n"; 650 } 651 } else { 652 int newnumRemainedBlk 653 = countActiveBlock(orderedBlks.begin(), orderedBlks.end()); 654 // consider cloned blocks ?? 655 if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) { 656 makeProgress = true; 657 numRemainedBlk = newnumRemainedBlk; 658 } else { 659 makeProgress = false; 660 if (DEBUGME) { 661 errs() << "No progress\n"; 662 } 663 } 664 } 665 } while (!finish && makeProgress); 666 667 // Misc wrap up to maintain the consistency of the Function representation. 668 CFGTraits::wrapup(FuncGTraits::nodes_begin(&func)); 669 670 // Detach retired Block, release memory. 671 for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(), 672 iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) { 673 if ((*iterMap).second && (*iterMap).second->isRetired) { 674 assert(((*iterMap).first)->getNumber() != -1); 675 if (DEBUGME) { 676 errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n"; 677 } 678 (*iterMap).first->eraseFromParent(); //Remove from the parent Function. 679 } 680 delete (*iterMap).second; 681 } 682 blockInfoMap.clear(); 683 684 // clear loopLandInfoMap 685 for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(), 686 iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) { 687 delete (*iterMap).second; 688 } 689 loopLandInfoMap.clear(); 690 691 if (DEBUGME) { 692 func.viewCFG(); 693 //func.dump(); 694 } 695 696 if (!finish) { 697 assert(!"IRREDUCIBL_CF"); 698 } 699 700 return true; 701} //CFGStructurizer::run 702 703/// Print the ordered Blocks. 704/// 705template<class PassT> 706void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) { 707 size_t i = 0; 708 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator 709 iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end(); 710 iterBlk != iterBlkEnd; 711 ++iterBlk, ++i) { 712 os << "BB" << (*iterBlk)->getNumber(); 713 os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")"; 714 if (i != 0 && i % 10 == 0) { 715 os << "\n"; 716 } else { 717 os << " "; 718 } 719 } 720} //printOrderedBlocks 721 722/// Compute the reversed DFS post order of Blocks 723/// 724template<class PassT> void CFGStructurizer<PassT>::orderBlocks() { 725 int sccNum = 0; 726 BlockT *bb; 727 for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep), 728 sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) { 729 std::vector<BlockT *> &sccNext = *sccIter; 730 for (typename std::vector<BlockT *>::const_iterator 731 blockIter = sccNext.begin(), blockEnd = sccNext.end(); 732 blockIter != blockEnd; ++blockIter) { 733 bb = *blockIter; 734 orderedBlks.push_back(bb); 735 recordSccnum(bb, sccNum); 736 } 737 } 738 739 //walk through all the block in func to check for unreachable 740 for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep), 741 blockEnd1 = FuncGTraits::nodes_end(funcRep); 742 blockIter1 != blockEnd1; ++blockIter1) { 743 BlockT *bb = &(*blockIter1); 744 sccNum = getSCCNum(bb); 745 if (sccNum == INVALIDSCCNUM) { 746 errs() << "unreachable block BB" << bb->getNumber() << "\n"; 747 } 748 } //end of for 749} //orderBlocks 750 751template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) { 752 int numMatch = 0; 753 int curMatch; 754 755 if (DEBUGME) { 756 errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n"; 757 } 758 759 while ((curMatch = patternMatchGroup(curBlk)) > 0) { 760 numMatch += curMatch; 761 } 762 763 if (DEBUGME) { 764 errs() << "End patternMatch BB" << curBlk->getNumber() 765 << ", numMatch = " << numMatch << "\n"; 766 } 767 768 return numMatch; 769} //patternMatch 770 771template<class PassT> 772int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) { 773 int numMatch = 0; 774 numMatch += serialPatternMatch(curBlk); 775 numMatch += ifPatternMatch(curBlk); 776 //numMatch += switchPatternMatch(curBlk); 777 numMatch += loopendPatternMatch(curBlk); 778 numMatch += loopPatternMatch(curBlk); 779 return numMatch; 780}//patternMatchGroup 781 782template<class PassT> 783int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) { 784 if (curBlk->succ_size() != 1) { 785 return 0; 786 } 787 788 BlockT *childBlk = *curBlk->succ_begin(); 789 if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) { 790 return 0; 791 } 792 793 mergeSerialBlock(curBlk, childBlk); 794 ++numSerialPatternMatch; 795 return 1; 796} //serialPatternMatch 797 798template<class PassT> 799int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) { 800 //two edges 801 if (curBlk->succ_size() != 2) { 802 return 0; 803 } 804 805 if (hasBackEdge(curBlk)) { 806 return 0; 807 } 808 809 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk); 810 if (branchInstr == NULL) { 811 return 0; 812 } 813 814 assert(CFGTraits::isCondBranch(branchInstr)); 815 816 BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr); 817 BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr); 818 BlockT *landBlk; 819 int cloned = 0; 820 821 // TODO: Simplify 822 if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1 823 && *trueBlk->succ_begin() == *falseBlk->succ_begin()) { 824 landBlk = *trueBlk->succ_begin(); 825 } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) { 826 landBlk = NULL; 827 } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) { 828 landBlk = falseBlk; 829 falseBlk = NULL; 830 } else if (falseBlk->succ_size() == 1 831 && *falseBlk->succ_begin() == trueBlk) { 832 landBlk = trueBlk; 833 trueBlk = NULL; 834 } else if (falseBlk->succ_size() == 1 835 && isSameloopDetachedContbreak(trueBlk, falseBlk)) { 836 landBlk = *falseBlk->succ_begin(); 837 } else if (trueBlk->succ_size() == 1 838 && isSameloopDetachedContbreak(falseBlk, trueBlk)) { 839 landBlk = *trueBlk->succ_begin(); 840 } else { 841 return handleJumpintoIf(curBlk, trueBlk, falseBlk); 842 } 843 844 // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the 845 // new BB created for landBlk==NULL may introduce new challenge to the 846 // reduction process. 847 if (landBlk != NULL && 848 ((trueBlk && trueBlk->pred_size() > 1) 849 || (falseBlk && falseBlk->pred_size() > 1))) { 850 cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk); 851 } 852 853 if (trueBlk && trueBlk->pred_size() > 1) { 854 trueBlk = cloneBlockForPredecessor(trueBlk, curBlk); 855 ++cloned; 856 } 857 858 if (falseBlk && falseBlk->pred_size() > 1) { 859 falseBlk = cloneBlockForPredecessor(falseBlk, curBlk); 860 ++cloned; 861 } 862 863 mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk); 864 865 ++numIfPatternMatch; 866 867 numClonedBlock += cloned; 868 869 return 1 + cloned; 870} //ifPatternMatch 871 872template<class PassT> 873int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) { 874 return 0; 875} //switchPatternMatch 876 877template<class PassT> 878int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) { 879 LoopT *loopRep = loopInfo->getLoopFor(curBlk); 880 typename std::vector<LoopT *> nestedLoops; 881 while (loopRep) { 882 nestedLoops.push_back(loopRep); 883 loopRep = loopRep->getParentLoop(); 884 } 885 886 if (nestedLoops.size() == 0) { 887 return 0; 888 } 889 890 // Process nested loop outside->inside, so "continue" to a outside loop won't 891 // be mistaken as "break" of the current loop. 892 int num = 0; 893 for (typename std::vector<LoopT *>::reverse_iterator 894 iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend(); 895 iter != iterEnd; ++iter) { 896 loopRep = *iter; 897 898 if (getLoopLandBlock(loopRep) != NULL) { 899 continue; 900 } 901 902 BlockT *loopHeader = loopRep->getHeader(); 903 904 int numBreak = loopbreakPatternMatch(loopRep, loopHeader); 905 906 if (numBreak == -1) { 907 break; 908 } 909 910 int numCont = loopcontPatternMatch(loopRep, loopHeader); 911 num += numBreak + numCont; 912 } 913 914 return num; 915} //loopendPatternMatch 916 917template<class PassT> 918int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) { 919 if (curBlk->succ_size() != 0) { 920 return 0; 921 } 922 923 int numLoop = 0; 924 LoopT *loopRep = loopInfo->getLoopFor(curBlk); 925 while (loopRep && loopRep->getHeader() == curBlk) { 926 LoopLandInfo *loopLand = getLoopLandInfo(loopRep); 927 if (loopLand) { 928 BlockT *landBlk = loopLand->landBlk; 929 assert(landBlk); 930 if (!isRetiredBlock(landBlk)) { 931 mergeLooplandBlock(curBlk, loopLand); 932 ++numLoop; 933 } 934 } 935 loopRep = loopRep->getParentLoop(); 936 } 937 938 numLoopPatternMatch += numLoop; 939 940 return numLoop; 941} //loopPatternMatch 942 943template<class PassT> 944int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep, 945 BlockT *loopHeader) { 946 BlockTSmallerVector exitingBlks; 947 loopRep->getExitingBlocks(exitingBlks); 948 949 if (DEBUGME) { 950 errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n"; 951 } 952 953 if (exitingBlks.size() == 0) { 954 setLoopLandBlock(loopRep); 955 return 0; 956 } 957 958 // Compute the corresponding exitBlks and exit block set. 959 BlockTSmallerVector exitBlks; 960 std::set<BlockT *> exitBlkSet; 961 for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(), 962 iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) { 963 BlockT *exitingBlk = *iter; 964 BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk); 965 exitBlks.push_back(exitBlk); 966 exitBlkSet.insert(exitBlk); //non-duplicate insert 967 } 968 969 assert(exitBlkSet.size() > 0); 970 assert(exitBlks.size() == exitingBlks.size()); 971 972 if (DEBUGME) { 973 errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n"; 974 } 975 976 // Find exitLandBlk. 977 BlockT *exitLandBlk = NULL; 978 int numCloned = 0; 979 int numSerial = 0; 980 981 if (exitBlkSet.size() == 1) 982 { 983 exitLandBlk = *exitBlkSet.begin(); 984 } else { 985 exitLandBlk = findNearestCommonPostDom(exitBlkSet); 986 987 if (exitLandBlk == NULL) { 988 return -1; 989 } 990 991 bool allInPath = true; 992 bool allNotInPath = true; 993 for (typename std::set<BlockT*>::const_iterator 994 iter = exitBlkSet.begin(), 995 iterEnd = exitBlkSet.end(); 996 iter != iterEnd; ++iter) { 997 BlockT *exitBlk = *iter; 998 999 PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true); 1000 if (DEBUGME) { 1001 errs() << "BB" << exitBlk->getNumber() 1002 << " to BB" << exitLandBlk->getNumber() << " PathToKind=" 1003 << pathKind << "\n"; 1004 } 1005 1006 allInPath = allInPath && (pathKind == SinglePath_InPath); 1007 allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath); 1008 1009 if (!allInPath && !allNotInPath) { 1010 if (DEBUGME) { 1011 errs() << "singlePath check fail\n"; 1012 } 1013 return -1; 1014 } 1015 } // check all exit blocks 1016 1017 if (allNotInPath) { 1018#if 1 1019 1020 // TODO: Simplify, maybe separate function? 1021 //funcRep->viewCFG(); 1022 LoopT *parentLoopRep = loopRep->getParentLoop(); 1023 BlockT *parentLoopHeader = NULL; 1024 if (parentLoopRep) 1025 parentLoopHeader = parentLoopRep->getHeader(); 1026 1027 if (exitLandBlk == parentLoopHeader && 1028 (exitLandBlk = relocateLoopcontBlock(parentLoopRep, 1029 loopRep, 1030 exitBlkSet, 1031 exitLandBlk)) != NULL) { 1032 if (DEBUGME) { 1033 errs() << "relocateLoopcontBlock success\n"; 1034 } 1035 } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep, 1036 exitingBlks, 1037 exitBlks)) != NULL) { 1038 if (DEBUGME) { 1039 errs() << "insertEndbranchBlock success\n"; 1040 } 1041 } else { 1042 if (DEBUGME) { 1043 errs() << "loop exit fail\n"; 1044 } 1045 return -1; 1046 } 1047#else 1048 return -1; 1049#endif 1050 } 1051 1052 // Handle side entry to exit path. 1053 exitBlks.clear(); 1054 exitBlkSet.clear(); 1055 for (typename BlockTSmallerVector::iterator iterExiting = 1056 exitingBlks.begin(), 1057 iterExitingEnd = exitingBlks.end(); 1058 iterExiting != iterExitingEnd; ++iterExiting) { 1059 BlockT *exitingBlk = *iterExiting; 1060 BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk); 1061 BlockT *newExitBlk = exitBlk; 1062 1063 if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) { 1064 newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk); 1065 ++numCloned; 1066 } 1067 1068 numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk); 1069 1070 exitBlks.push_back(newExitBlk); 1071 exitBlkSet.insert(newExitBlk); 1072 } 1073 1074 for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(), 1075 iterExitEnd = exitBlks.end(); 1076 iterExit != iterExitEnd; ++iterExit) { 1077 BlockT *exitBlk = *iterExit; 1078 numSerial += serialPatternMatch(exitBlk); 1079 } 1080 1081 for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(), 1082 iterExitEnd = exitBlks.end(); 1083 iterExit != iterExitEnd; ++iterExit) { 1084 BlockT *exitBlk = *iterExit; 1085 if (exitBlk->pred_size() > 1) { 1086 if (exitBlk != exitLandBlk) { 1087 return -1; 1088 } 1089 } else { 1090 if (exitBlk != exitLandBlk && 1091 (exitBlk->succ_size() != 1 || 1092 *exitBlk->succ_begin() != exitLandBlk)) { 1093 return -1; 1094 } 1095 } 1096 } 1097 } // else 1098 1099 // LoopT *exitLandLoop = loopInfo->getLoopFor(exitLandBlk); 1100 exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet); 1101 1102 // Fold break into the breaking block. Leverage across level breaks. 1103 assert(exitingBlks.size() == exitBlks.size()); 1104 for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(), 1105 iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end(); 1106 iterExit != iterExitEnd; ++iterExit, ++iterExiting) { 1107 BlockT *exitBlk = *iterExit; 1108 BlockT *exitingBlk = *iterExiting; 1109 assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk); 1110 LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk); 1111 handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk); 1112 } 1113 1114 int numBreak = static_cast<int>(exitingBlks.size()); 1115 numLoopbreakPatternMatch += numBreak; 1116 numClonedBlock += numCloned; 1117 return numBreak + numSerial + numCloned; 1118} //loopbreakPatternMatch 1119 1120template<class PassT> 1121int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep, 1122 BlockT *loopHeader) { 1123 int numCont = 0; 1124 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk; 1125 for (typename InvBlockGTraits::ChildIteratorType iter = 1126 InvBlockGTraits::child_begin(loopHeader), 1127 iterEnd = InvBlockGTraits::child_end(loopHeader); 1128 iter != iterEnd; ++iter) { 1129 BlockT *curBlk = *iter; 1130 if (loopRep->contains(curBlk)) { 1131 handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk), 1132 loopHeader, loopRep); 1133 contBlk.push_back(curBlk); 1134 ++numCont; 1135 } 1136 } 1137 1138 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator 1139 iter = contBlk.begin(), iterEnd = contBlk.end(); 1140 iter != iterEnd; ++iter) { 1141 (*iter)->removeSuccessor(loopHeader); 1142 } 1143 1144 numLoopcontPatternMatch += numCont; 1145 1146 return numCont; 1147} //loopcontPatternMatch 1148 1149 1150template<class PassT> 1151bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk, 1152 BlockT *src2Blk) { 1153 // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the 1154 // same loop with LoopLandInfo without explicitly keeping track of 1155 // loopContBlks and loopBreakBlks, this is a method to get the information. 1156 // 1157 if (src1Blk->succ_size() == 0) { 1158 LoopT *loopRep = loopInfo->getLoopFor(src1Blk); 1159 if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) { 1160 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; 1161 if (theEntry != NULL) { 1162 if (DEBUGME) { 1163 errs() << "isLoopContBreakBlock yes src1 = BB" 1164 << src1Blk->getNumber() 1165 << " src2 = BB" << src2Blk->getNumber() << "\n"; 1166 } 1167 return true; 1168 } 1169 } 1170 } 1171 return false; 1172} //isSameloopDetachedContbreak 1173 1174template<class PassT> 1175int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk, 1176 BlockT *trueBlk, 1177 BlockT *falseBlk) { 1178 int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk); 1179 if (num == 0) { 1180 if (DEBUGME) { 1181 errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n"; 1182 } 1183 num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk); 1184 } 1185 return num; 1186} 1187 1188template<class PassT> 1189int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk, 1190 BlockT *trueBlk, 1191 BlockT *falseBlk) { 1192 int num = 0; 1193 BlockT *downBlk; 1194 1195 //trueBlk could be the common post dominator 1196 downBlk = trueBlk; 1197 1198 if (DEBUGME) { 1199 errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber() 1200 << " true = BB" << trueBlk->getNumber() 1201 << ", numSucc=" << trueBlk->succ_size() 1202 << " false = BB" << falseBlk->getNumber() << "\n"; 1203 } 1204 1205 while (downBlk) { 1206 if (DEBUGME) { 1207 errs() << "check down = BB" << downBlk->getNumber(); 1208 } 1209 1210 if (//postDomTree->dominates(downBlk, falseBlk) && 1211 singlePathTo(falseBlk, downBlk) == SinglePath_InPath) { 1212 if (DEBUGME) { 1213 errs() << " working\n"; 1214 } 1215 1216 num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk); 1217 num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk); 1218 1219 numClonedBlock += num; 1220 num += serialPatternMatch(*headBlk->succ_begin()); 1221 num += serialPatternMatch(*(++headBlk->succ_begin())); 1222 num += ifPatternMatch(headBlk); 1223 assert(num > 0); // 1224 1225 break; 1226 } 1227 if (DEBUGME) { 1228 errs() << " not working\n"; 1229 } 1230 downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL; 1231 } // walk down the postDomTree 1232 1233 return num; 1234} //handleJumpintoIf 1235 1236template<class PassT> 1237void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk, 1238 BlockT *trueBlk, 1239 BlockT *falseBlk, 1240 BlockT *landBlk, 1241 bool detail) { 1242 errs() << "head = BB" << headBlk->getNumber() 1243 << " size = " << headBlk->size(); 1244 if (detail) { 1245 errs() << "\n"; 1246 headBlk->print(errs()); 1247 errs() << "\n"; 1248 } 1249 1250 if (trueBlk) { 1251 errs() << ", true = BB" << trueBlk->getNumber() << " size = " 1252 << trueBlk->size() << " numPred = " << trueBlk->pred_size(); 1253 if (detail) { 1254 errs() << "\n"; 1255 trueBlk->print(errs()); 1256 errs() << "\n"; 1257 } 1258 } 1259 if (falseBlk) { 1260 errs() << ", false = BB" << falseBlk->getNumber() << " size = " 1261 << falseBlk->size() << " numPred = " << falseBlk->pred_size(); 1262 if (detail) { 1263 errs() << "\n"; 1264 falseBlk->print(errs()); 1265 errs() << "\n"; 1266 } 1267 } 1268 if (landBlk) { 1269 errs() << ", land = BB" << landBlk->getNumber() << " size = " 1270 << landBlk->size() << " numPred = " << landBlk->pred_size(); 1271 if (detail) { 1272 errs() << "\n"; 1273 landBlk->print(errs()); 1274 errs() << "\n"; 1275 } 1276 } 1277 1278 errs() << "\n"; 1279} //showImproveSimpleJumpintoIf 1280 1281template<class PassT> 1282int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk, 1283 BlockT *trueBlk, 1284 BlockT *falseBlk, 1285 BlockT **plandBlk) { 1286 bool migrateTrue = false; 1287 bool migrateFalse = false; 1288 1289 BlockT *landBlk = *plandBlk; 1290 1291 assert((trueBlk == NULL || trueBlk->succ_size() <= 1) 1292 && (falseBlk == NULL || falseBlk->succ_size() <= 1)); 1293 1294 if (trueBlk == falseBlk) { 1295 return 0; 1296 } 1297 1298#if 0 1299 if (DEBUGME) { 1300 errs() << "improveSimpleJumpintoIf: "; 1301 showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); 1302 } 1303#endif 1304 1305 // unsigned landPredSize = landBlk ? landBlk->pred_size() : 0; 1306 // May consider the # landBlk->pred_size() as it represents the number of 1307 // assignment initReg = .. needed to insert. 1308 migrateTrue = needMigrateBlock(trueBlk); 1309 migrateFalse = needMigrateBlock(falseBlk); 1310 1311 if (!migrateTrue && !migrateFalse) { 1312 return 0; 1313 } 1314 1315 // If we need to migrate either trueBlk and falseBlk, migrate the rest that 1316 // have more than one predecessors. without doing this, its predecessor 1317 // rather than headBlk will have undefined value in initReg. 1318 if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) { 1319 migrateTrue = true; 1320 } 1321 if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) { 1322 migrateFalse = true; 1323 } 1324 1325 if (DEBUGME) { 1326 errs() << "before improveSimpleJumpintoIf: "; 1327 showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); 1328 //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1); 1329 } 1330 1331 // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk 1332 // 1333 // new: headBlk => if () {initReg = 1; org trueBlk branch} else 1334 // {initReg = 0; org falseBlk branch } 1335 // => landBlk => if (initReg) {org trueBlk} else {org falseBlk} 1336 // => org landBlk 1337 // if landBlk->pred_size() > 2, put the about if-else inside 1338 // if (initReg !=2) {...} 1339 // 1340 // add initReg = initVal to headBlk 1341 1342 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); 1343 unsigned initReg = 1344 funcRep->getRegInfo().createVirtualRegister(I32RC); 1345 if (!migrateTrue || !migrateFalse) { 1346 int initVal = migrateTrue ? 0 : 1; 1347 CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal); 1348 } 1349 1350 int numNewBlk = 0; 1351 1352 if (landBlk == NULL) { 1353 landBlk = funcRep->CreateMachineBasicBlock(); 1354 funcRep->push_back(landBlk); //insert to function 1355 1356 if (trueBlk) { 1357 trueBlk->addSuccessor(landBlk); 1358 } else { 1359 headBlk->addSuccessor(landBlk); 1360 } 1361 1362 if (falseBlk) { 1363 falseBlk->addSuccessor(landBlk); 1364 } else { 1365 headBlk->addSuccessor(landBlk); 1366 } 1367 1368 numNewBlk ++; 1369 } 1370 1371 bool landBlkHasOtherPred = (landBlk->pred_size() > 2); 1372 1373 //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL" 1374 typename BlockT::iterator insertPos = 1375 CFGTraits::getInstrPos 1376 (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep)); 1377 1378 if (landBlkHasOtherPred) { 1379 unsigned immReg = 1380 funcRep->getRegInfo().createVirtualRegister(I32RC); 1381 CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2); 1382 unsigned cmpResReg = 1383 funcRep->getRegInfo().createVirtualRegister(I32RC); 1384 1385 CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg, 1386 initReg, immReg); 1387 CFGTraits::insertCondBranchBefore(landBlk, insertPos, 1388 AMDGPU::IF_LOGICALZ_i32, passRep, 1389 cmpResReg, DebugLoc()); 1390 } 1391 1392 CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_LOGICALNZ_i32, 1393 passRep, initReg, DebugLoc()); 1394 1395 if (migrateTrue) { 1396 migrateInstruction(trueBlk, landBlk, insertPos); 1397 // need to uncondionally insert the assignment to ensure a path from its 1398 // predecessor rather than headBlk has valid value in initReg if 1399 // (initVal != 1). 1400 CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1); 1401 } 1402 CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep); 1403 1404 if (migrateFalse) { 1405 migrateInstruction(falseBlk, landBlk, insertPos); 1406 // need to uncondionally insert the assignment to ensure a path from its 1407 // predecessor rather than headBlk has valid value in initReg if 1408 // (initVal != 0) 1409 CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0); 1410 } 1411 //CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep); 1412 1413 if (landBlkHasOtherPred) { 1414 // add endif 1415 CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep); 1416 1417 // put initReg = 2 to other predecessors of landBlk 1418 for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(), 1419 predIterEnd = landBlk->pred_end(); predIter != predIterEnd; 1420 ++predIter) { 1421 BlockT *curBlk = *predIter; 1422 if (curBlk != trueBlk && curBlk != falseBlk) { 1423 CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2); 1424 } 1425 } //for 1426 } 1427 if (DEBUGME) { 1428 errs() << "result from improveSimpleJumpintoIf: "; 1429 showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); 1430 //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1); 1431 } 1432 1433 // update landBlk 1434 *plandBlk = landBlk; 1435 1436 return numNewBlk; 1437} //improveSimpleJumpintoIf 1438 1439template<class PassT> 1440void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk, 1441 LoopT *exitingLoop, 1442 BlockT *exitBlk, 1443 LoopT *exitLoop, 1444 BlockT *landBlk) { 1445 if (DEBUGME) { 1446 errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop) 1447 << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n"; 1448 } 1449 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); 1450 1451 RegiT initReg = INVALIDREGNUM; 1452 if (exitingLoop != exitLoop) { 1453 initReg = static_cast<int> 1454 (funcRep->getRegInfo().createVirtualRegister(I32RC)); 1455 assert(initReg != INVALIDREGNUM); 1456 addLoopBreakInitReg(exitLoop, initReg); 1457 while (exitingLoop != exitLoop && exitingLoop) { 1458 addLoopBreakOnReg(exitingLoop, initReg); 1459 exitingLoop = exitingLoop->getParentLoop(); 1460 } 1461 assert(exitingLoop == exitLoop); 1462 } 1463 1464 mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg); 1465 1466} //handleLoopbreak 1467 1468template<class PassT> 1469void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk, 1470 LoopT *contingLoop, 1471 BlockT *contBlk, 1472 LoopT *contLoop) { 1473 if (DEBUGME) { 1474 errs() << "loopcontPattern cont = BB" << contingBlk->getNumber() 1475 << " header = BB" << contBlk->getNumber() << "\n"; 1476 1477 errs() << "Trying to continue loop-depth = " 1478 << getLoopDepth(contLoop) 1479 << " from loop-depth = " << getLoopDepth(contingLoop) << "\n"; 1480 } 1481 1482 RegiT initReg = INVALIDREGNUM; 1483 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); 1484 if (contingLoop != contLoop) { 1485 initReg = static_cast<int> 1486 (funcRep->getRegInfo().createVirtualRegister(I32RC)); 1487 assert(initReg != INVALIDREGNUM); 1488 addLoopContInitReg(contLoop, initReg); 1489 while (contingLoop && contingLoop->getParentLoop() != contLoop) { 1490 addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg 1491 contingLoop = contingLoop->getParentLoop(); 1492 } 1493 assert(contingLoop && contingLoop->getParentLoop() == contLoop); 1494 addLoopContOnReg(contingLoop, initReg); 1495 } 1496 1497 settleLoopcontBlock(contingBlk, contBlk, initReg); 1498 //contingBlk->removeSuccessor(loopHeader); 1499} //handleLoopcontBlock 1500 1501template<class PassT> 1502void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) { 1503 if (DEBUGME) { 1504 errs() << "serialPattern BB" << dstBlk->getNumber() 1505 << " <= BB" << srcBlk->getNumber() << "\n"; 1506 } 1507 //removeUnconditionalBranch(dstBlk); 1508 dstBlk->splice(dstBlk->end(), srcBlk, FirstNonDebugInstr(srcBlk), srcBlk->end()); 1509 1510 dstBlk->removeSuccessor(srcBlk); 1511 CFGTraits::cloneSuccessorList(dstBlk, srcBlk); 1512 1513 removeSuccessor(srcBlk); 1514 retireBlock(dstBlk, srcBlk); 1515} //mergeSerialBlock 1516 1517template<class PassT> 1518void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr, 1519 BlockT *curBlk, 1520 BlockT *trueBlk, 1521 BlockT *falseBlk, 1522 BlockT *landBlk) { 1523 if (DEBUGME) { 1524 errs() << "ifPattern BB" << curBlk->getNumber(); 1525 errs() << "{ "; 1526 if (trueBlk) { 1527 errs() << "BB" << trueBlk->getNumber(); 1528 } 1529 errs() << " } else "; 1530 errs() << "{ "; 1531 if (falseBlk) { 1532 errs() << "BB" << falseBlk->getNumber(); 1533 } 1534 errs() << " }\n "; 1535 errs() << "landBlock: "; 1536 if (landBlk == NULL) { 1537 errs() << "NULL"; 1538 } else { 1539 errs() << "BB" << landBlk->getNumber(); 1540 } 1541 errs() << "\n"; 1542 } 1543 1544 int oldOpcode = branchInstr->getOpcode(); 1545 DebugLoc branchDL = branchInstr->getDebugLoc(); 1546 1547// transform to 1548// if cond 1549// trueBlk 1550// else 1551// falseBlk 1552// endif 1553// landBlk 1554 1555 typename BlockT::iterator branchInstrPos = 1556 CFGTraits::getInstrPos(curBlk, branchInstr); 1557 CFGTraits::insertCondBranchBefore(branchInstrPos, 1558 CFGTraits::getBranchNzeroOpcode(oldOpcode), 1559 passRep, 1560 branchDL); 1561 1562 if (trueBlk) { 1563 curBlk->splice(branchInstrPos, trueBlk, FirstNonDebugInstr(trueBlk), trueBlk->end()); 1564 curBlk->removeSuccessor(trueBlk); 1565 if (landBlk && trueBlk->succ_size()!=0) { 1566 trueBlk->removeSuccessor(landBlk); 1567 } 1568 retireBlock(curBlk, trueBlk); 1569 } 1570 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep); 1571 1572 if (falseBlk) { 1573 curBlk->splice(branchInstrPos, falseBlk, FirstNonDebugInstr(falseBlk), 1574 falseBlk->end()); 1575 curBlk->removeSuccessor(falseBlk); 1576 if (landBlk && falseBlk->succ_size() != 0) { 1577 falseBlk->removeSuccessor(landBlk); 1578 } 1579 retireBlock(curBlk, falseBlk); 1580 } 1581 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep); 1582 1583 //curBlk->remove(branchInstrPos); 1584 branchInstr->eraseFromParent(); 1585 1586 if (landBlk && trueBlk && falseBlk) { 1587 curBlk->addSuccessor(landBlk); 1588 } 1589 1590} //mergeIfthenelseBlock 1591 1592template<class PassT> 1593void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk, 1594 LoopLandInfo *loopLand) { 1595 BlockT *landBlk = loopLand->landBlk; 1596 1597 if (DEBUGME) { 1598 errs() << "loopPattern header = BB" << dstBlk->getNumber() 1599 << " land = BB" << landBlk->getNumber() << "\n"; 1600 } 1601 1602 // Loop contInitRegs are init at the beginning of the loop. 1603 for (typename std::set<RegiT>::const_iterator iter = 1604 loopLand->contInitRegs.begin(), 1605 iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) { 1606 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); 1607 } 1608 1609 /* we last inserterd the DebugLoc in the 1610 * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk. 1611 * search for the DebugLoc in the that statement. 1612 * if not found, we have to insert the empty/default DebugLoc */ 1613 InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk); 1614 DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc(); 1615 1616 CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak); 1617 // Loop breakInitRegs are init before entering the loop. 1618 for (typename std::set<RegiT>::const_iterator iter = 1619 loopLand->breakInitRegs.begin(), 1620 iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) 1621 { 1622 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); 1623 } 1624 // Loop endbranchInitRegs are init before entering the loop. 1625 for (typename std::set<RegiT>::const_iterator iter = 1626 loopLand->endbranchInitRegs.begin(), 1627 iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) { 1628 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); 1629 } 1630 1631 /* we last inserterd the DebugLoc in the continue statement in the current dstBlk 1632 * search for the DebugLoc in the continue statement. 1633 * if not found, we have to insert the empty/default DebugLoc */ 1634 InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk); 1635 DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc(); 1636 1637 CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue); 1638 // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this 1639 // loop. 1640 for (typename std::set<RegiT>::const_iterator iter = 1641 loopLand->breakOnRegs.begin(), 1642 iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) { 1643 CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::BREAK_LOGICALNZ_i32, passRep, 1644 *iter); 1645 } 1646 1647 // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this 1648 // loop. 1649 for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(), 1650 iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) { 1651 CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32, 1652 passRep, *iter); 1653 } 1654 1655 dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end()); 1656 1657 for (typename BlockT::succ_iterator iter = landBlk->succ_begin(), 1658 iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) { 1659 dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of. 1660 } 1661 1662 removeSuccessor(landBlk); 1663 retireBlock(dstBlk, landBlk); 1664} //mergeLooplandBlock 1665 1666template<class PassT> 1667void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk, 1668 BlockT *exitBlk, 1669 BlockT *exitLandBlk, 1670 RegiT setReg) { 1671 if (DEBUGME) { 1672 errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber() 1673 << " exit = BB" << exitBlk->getNumber() 1674 << " land = BB" << exitLandBlk->getNumber() << "\n"; 1675 } 1676 1677 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk); 1678 assert(branchInstr && CFGTraits::isCondBranch(branchInstr)); 1679 1680 DebugLoc DL = branchInstr->getDebugLoc(); 1681 1682 BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr); 1683 int oldOpcode = branchInstr->getOpcode(); 1684 1685 // transform exitingBlk to 1686 // if ( ) { 1687 // exitBlk (if exitBlk != exitLandBlk) 1688 // setReg = 1 1689 // break 1690 // }endif 1691 // successor = {orgSuccessor(exitingBlk) - exitBlk} 1692 1693 typename BlockT::iterator branchInstrPos = 1694 CFGTraits::getInstrPos(exitingBlk, branchInstr); 1695 1696 if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) { 1697 //break_logical 1698 int newOpcode = 1699 (trueBranch == exitBlk) ? CFGTraits::getBreakNzeroOpcode(oldOpcode) 1700 : CFGTraits::getBreakZeroOpcode(oldOpcode); 1701 CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL); 1702 } else { 1703 int newOpcode = 1704 (trueBranch == exitBlk) ? CFGTraits::getBranchNzeroOpcode(oldOpcode) 1705 : CFGTraits::getBranchZeroOpcode(oldOpcode); 1706 CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL); 1707 if (exitBlk != exitLandBlk) { 1708 //splice is insert-before ... 1709 exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(), 1710 exitBlk->end()); 1711 } 1712 if (setReg != INVALIDREGNUM) { 1713 CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1); 1714 } 1715 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep); 1716 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep); 1717 } //if_logical 1718 1719 //now branchInst can be erase safely 1720 //exitingBlk->eraseFromParent(branchInstr); 1721 branchInstr->eraseFromParent(); 1722 1723 //now take care of successors, retire blocks 1724 exitingBlk->removeSuccessor(exitBlk); 1725 if (exitBlk != exitLandBlk) { 1726 //splice is insert-before ... 1727 exitBlk->removeSuccessor(exitLandBlk); 1728 retireBlock(exitingBlk, exitBlk); 1729 } 1730 1731} //mergeLoopbreakBlock 1732 1733template<class PassT> 1734void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk, 1735 BlockT *contBlk, 1736 RegiT setReg) { 1737 if (DEBUGME) { 1738 errs() << "settleLoopcontBlock conting = BB" 1739 << contingBlk->getNumber() 1740 << ", cont = BB" << contBlk->getNumber() << "\n"; 1741 } 1742 1743 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk); 1744 if (branchInstr) { 1745 assert(CFGTraits::isCondBranch(branchInstr)); 1746 typename BlockT::iterator branchInstrPos = 1747 CFGTraits::getInstrPos(contingBlk, branchInstr); 1748 BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr); 1749 int oldOpcode = branchInstr->getOpcode(); 1750 DebugLoc DL = branchInstr->getDebugLoc(); 1751 1752 // transform contingBlk to 1753 // if () { 1754 // move instr after branchInstr 1755 // continue 1756 // or 1757 // setReg = 1 1758 // break 1759 // }endif 1760 // successor = {orgSuccessor(contingBlk) - loopHeader} 1761 1762 bool useContinueLogical = 1763 (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr); 1764 1765 if (useContinueLogical == false) 1766 { 1767 int branchOpcode = 1768 trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode) 1769 : CFGTraits::getBranchZeroOpcode(oldOpcode); 1770 1771 CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL); 1772 1773 if (setReg != INVALIDREGNUM) { 1774 CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1); 1775 // insertEnd to ensure phi-moves, if exist, go before the continue-instr. 1776 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL); 1777 } else { 1778 // insertEnd to ensure phi-moves, if exist, go before the continue-instr. 1779 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL); 1780 } 1781 1782 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL); 1783 } else { 1784 int branchOpcode = 1785 trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode) 1786 : CFGTraits::getContinueZeroOpcode(oldOpcode); 1787 1788 CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL); 1789 } 1790 1791 //contingBlk->eraseFromParent(branchInstr); 1792 branchInstr->eraseFromParent(); 1793 } else { 1794 /* if we've arrived here then we've already erased the branch instruction 1795 * travel back up the basic block to see the last reference of our debug location 1796 * we've just inserted that reference here so it should be representative */ 1797 if (setReg != INVALIDREGNUM) { 1798 CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1); 1799 // insertEnd to ensure phi-moves, if exist, go before the continue-instr. 1800 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk)); 1801 } else { 1802 // insertEnd to ensure phi-moves, if exist, go before the continue-instr. 1803 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk)); 1804 } 1805 } //else 1806 1807} //settleLoopcontBlock 1808 1809// BBs in exitBlkSet are determined as in break-path for loopRep, 1810// before we can put code for BBs as inside loop-body for loopRep 1811// check whether those BBs are determined as cont-BB for parentLoopRep 1812// earlier. 1813// If so, generate a new BB newBlk 1814// (1) set newBlk common successor of BBs in exitBlkSet 1815// (2) change the continue-instr in BBs in exitBlkSet to break-instr 1816// (3) generate continue-instr in newBlk 1817// 1818template<class PassT> 1819typename CFGStructurizer<PassT>::BlockT * 1820CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep, 1821 LoopT *loopRep, 1822 std::set<BlockT *> &exitBlkSet, 1823 BlockT *exitLandBlk) { 1824 std::set<BlockT *> endBlkSet; 1825 1826// BlockT *parentLoopHead = parentLoopRep->getHeader(); 1827 1828 1829 for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(), 1830 iterEnd = exitBlkSet.end(); 1831 iter != iterEnd; ++iter) { 1832 BlockT *exitBlk = *iter; 1833 BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk); 1834 1835 if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL) 1836 return NULL; 1837 1838 endBlkSet.insert(endBlk); 1839 } 1840 1841 BlockT *newBlk = funcRep->CreateMachineBasicBlock(); 1842 funcRep->push_back(newBlk); //insert to function 1843 CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep); 1844 SHOWNEWBLK(newBlk, "New continue block: "); 1845 1846 for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(), 1847 iterEnd = endBlkSet.end(); 1848 iter != iterEnd; ++iter) { 1849 BlockT *endBlk = *iter; 1850 InstrT *contInstr = CFGTraits::getContinueInstr(endBlk); 1851 if (contInstr) { 1852 contInstr->eraseFromParent(); 1853 } 1854 endBlk->addSuccessor(newBlk); 1855 if (DEBUGME) { 1856 errs() << "Add new continue Block to BB" 1857 << endBlk->getNumber() << " successors\n"; 1858 } 1859 } 1860 1861 return newBlk; 1862} //relocateLoopcontBlock 1863 1864 1865// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as 1866// LoopLandBlock. This BB branch on the loop endBranchInit register to the 1867// pathes corresponding to the loop exiting branches. 1868 1869template<class PassT> 1870typename CFGStructurizer<PassT>::BlockT * 1871CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep, 1872 BlockTSmallerVector &exitingBlks, 1873 BlockTSmallerVector &exitBlks) { 1874 const AMDILInstrInfo *tii = 1875 static_cast<const AMDILInstrInfo *>(passRep->getTargetInstrInfo()); 1876 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); 1877 1878 RegiT endBranchReg = static_cast<int> 1879 (funcRep->getRegInfo().createVirtualRegister(I32RC)); 1880 assert(endBranchReg >= 0); 1881 1882 // reg = 0 before entering the loop 1883 addLoopEndbranchInitReg(loopRep, endBranchReg); 1884 1885 uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size()); 1886 assert(numBlks >=2 && numBlks == exitBlks.size()); 1887 1888 BlockT *preExitingBlk = exitingBlks[0]; 1889 BlockT *preExitBlk = exitBlks[0]; 1890 BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock(); 1891 funcRep->push_back(preBranchBlk); //insert to function 1892 SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: "); 1893 1894 BlockT *newLandBlk = preBranchBlk; 1895 1896 CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk, 1897 newLandBlk); 1898 preExitingBlk->removeSuccessor(preExitBlk); 1899 preExitingBlk->addSuccessor(newLandBlk); 1900 1901 //it is redundant to add reg = 0 to exitingBlks[0] 1902 1903 // For 1..n th exiting path (the last iteration handles two pathes) create the 1904 // branch to the previous path and the current path. 1905 for (uint32_t i = 1; i < numBlks; ++i) { 1906 BlockT *curExitingBlk = exitingBlks[i]; 1907 BlockT *curExitBlk = exitBlks[i]; 1908 BlockT *curBranchBlk; 1909 1910 if (i == numBlks - 1) { 1911 curBranchBlk = curExitBlk; 1912 } else { 1913 curBranchBlk = funcRep->CreateMachineBasicBlock(); 1914 funcRep->push_back(curBranchBlk); //insert to function 1915 SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: "); 1916 } 1917 1918 // Add reg = i to exitingBlks[i]. 1919 CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep, 1920 endBranchReg, i); 1921 1922 // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge 1923 // (exitingBlks[i], newLandBlk). 1924 CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk, 1925 newLandBlk); 1926 curExitingBlk->removeSuccessor(curExitBlk); 1927 curExitingBlk->addSuccessor(newLandBlk); 1928 1929 // add to preBranchBlk the branch instruction: 1930 // if (endBranchReg == preVal) 1931 // preExitBlk 1932 // else 1933 // curBranchBlk 1934 // 1935 // preValReg = i - 1 1936 1937 DebugLoc DL; 1938 RegiT preValReg = static_cast<int> 1939 (funcRep->getRegInfo().createVirtualRegister(I32RC)); 1940 1941 preBranchBlk->insert(preBranchBlk->begin(), 1942 tii->getMovImmInstr(preBranchBlk->getParent(), preValReg, 1943 i - 1)); 1944 1945 // condResReg = (endBranchReg == preValReg) 1946 RegiT condResReg = static_cast<int> 1947 (funcRep->getRegInfo().createVirtualRegister(I32RC)); 1948 BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg) 1949 .addReg(endBranchReg).addReg(preValReg); 1950 1951 BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32)) 1952 .addMBB(preExitBlk).addReg(condResReg); 1953 1954 preBranchBlk->addSuccessor(preExitBlk); 1955 preBranchBlk->addSuccessor(curBranchBlk); 1956 1957 // Update preExitingBlk, preExitBlk, preBranchBlk. 1958 preExitingBlk = curExitingBlk; 1959 preExitBlk = curExitBlk; 1960 preBranchBlk = curBranchBlk; 1961 1962 } //end for 1 .. n blocks 1963 1964 return newLandBlk; 1965} //addLoopEndbranchBlock 1966 1967template<class PassT> 1968typename CFGStructurizer<PassT>::PathToKind 1969CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk, 1970 bool allowSideEntry) { 1971 assert(dstBlk); 1972 1973 if (srcBlk == dstBlk) { 1974 return SinglePath_InPath; 1975 } 1976 1977 while (srcBlk && srcBlk->succ_size() == 1) { 1978 srcBlk = *srcBlk->succ_begin(); 1979 if (srcBlk == dstBlk) { 1980 return SinglePath_InPath; 1981 } 1982 1983 if (!allowSideEntry && srcBlk->pred_size() > 1) { 1984 return Not_SinglePath; 1985 } 1986 } 1987 1988 if (srcBlk && srcBlk->succ_size()==0) { 1989 return SinglePath_NotInPath; 1990 } 1991 1992 return Not_SinglePath; 1993} //singlePathTo 1994 1995// If there is a single path from srcBlk to dstBlk, return the last block before 1996// dstBlk If there is a single path from srcBlk->end without dstBlk, return the 1997// last block in the path Otherwise, return NULL 1998template<class PassT> 1999typename CFGStructurizer<PassT>::BlockT * 2000CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk, 2001 bool allowSideEntry) { 2002 assert(dstBlk); 2003 2004 if (srcBlk == dstBlk) { 2005 return srcBlk; 2006 } 2007 2008 if (srcBlk->succ_size() == 0) { 2009 return srcBlk; 2010 } 2011 2012 while (srcBlk && srcBlk->succ_size() == 1) { 2013 BlockT *preBlk = srcBlk; 2014 2015 srcBlk = *srcBlk->succ_begin(); 2016 if (srcBlk == NULL) { 2017 return preBlk; 2018 } 2019 2020 if (!allowSideEntry && srcBlk->pred_size() > 1) { 2021 return NULL; 2022 } 2023 } 2024 2025 if (srcBlk && srcBlk->succ_size()==0) { 2026 return srcBlk; 2027 } 2028 2029 return NULL; 2030 2031} //singlePathEnd 2032 2033template<class PassT> 2034int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk, 2035 BlockT *dstBlk) { 2036 int cloned = 0; 2037 assert(preBlk->isSuccessor(srcBlk)); 2038 while (srcBlk && srcBlk != dstBlk) { 2039 assert(srcBlk->succ_size() == 1); 2040 if (srcBlk->pred_size() > 1) { 2041 srcBlk = cloneBlockForPredecessor(srcBlk, preBlk); 2042 ++cloned; 2043 } 2044 2045 preBlk = srcBlk; 2046 srcBlk = *srcBlk->succ_begin(); 2047 } 2048 2049 return cloned; 2050} //cloneOnSideEntryTo 2051 2052template<class PassT> 2053typename CFGStructurizer<PassT>::BlockT * 2054CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk, 2055 BlockT *predBlk) { 2056 assert(predBlk->isSuccessor(curBlk) && 2057 "succBlk is not a prececessor of curBlk"); 2058 2059 BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions 2060 CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk); 2061 //srcBlk, oldBlk, newBlk 2062 2063 predBlk->removeSuccessor(curBlk); 2064 predBlk->addSuccessor(cloneBlk); 2065 2066 // add all successor to cloneBlk 2067 CFGTraits::cloneSuccessorList(cloneBlk, curBlk); 2068 2069 numClonedInstr += curBlk->size(); 2070 2071 if (DEBUGME) { 2072 errs() << "Cloned block: " << "BB" 2073 << curBlk->getNumber() << "size " << curBlk->size() << "\n"; 2074 } 2075 2076 SHOWNEWBLK(cloneBlk, "result of Cloned block: "); 2077 2078 return cloneBlk; 2079} //cloneBlockForPredecessor 2080 2081template<class PassT> 2082typename CFGStructurizer<PassT>::BlockT * 2083CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep, 2084 BlockT *exitingBlk) { 2085 BlockT *exitBlk = NULL; 2086 2087 for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(), 2088 iterSuccEnd = exitingBlk->succ_end(); 2089 iterSucc != iterSuccEnd; ++iterSucc) { 2090 BlockT *curBlk = *iterSucc; 2091 if (!loopRep->contains(curBlk)) { 2092 assert(exitBlk == NULL); 2093 exitBlk = curBlk; 2094 } 2095 } 2096 2097 assert(exitBlk != NULL); 2098 2099 return exitBlk; 2100} //exitingBlock2ExitBlock 2101 2102template<class PassT> 2103void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk, 2104 BlockT *dstBlk, 2105 InstrIterator insertPos) { 2106 InstrIterator spliceEnd; 2107 //look for the input branchinstr, not the AMDIL branchinstr 2108 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk); 2109 if (branchInstr == NULL) { 2110 if (DEBUGME) { 2111 errs() << "migrateInstruction don't see branch instr\n" ; 2112 } 2113 spliceEnd = srcBlk->end(); 2114 } else { 2115 if (DEBUGME) { 2116 errs() << "migrateInstruction see branch instr\n" ; 2117 branchInstr->dump(); 2118 } 2119 spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr); 2120 } 2121 if (DEBUGME) { 2122 errs() << "migrateInstruction before splice dstSize = " << dstBlk->size() 2123 << "srcSize = " << srcBlk->size() << "\n"; 2124 } 2125 2126 //splice insert before insertPos 2127 dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd); 2128 2129 if (DEBUGME) { 2130 errs() << "migrateInstruction after splice dstSize = " << dstBlk->size() 2131 << "srcSize = " << srcBlk->size() << "\n"; 2132 } 2133} //migrateInstruction 2134 2135// normalizeInfiniteLoopExit change 2136// B1: 2137// uncond_br LoopHeader 2138// 2139// to 2140// B1: 2141// cond_br 1 LoopHeader dummyExit 2142// and return the newly added dummy exit block 2143// 2144template<class PassT> 2145typename CFGStructurizer<PassT>::BlockT * 2146CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) { 2147 BlockT *loopHeader; 2148 BlockT *loopLatch; 2149 loopHeader = LoopRep->getHeader(); 2150 loopLatch = LoopRep->getLoopLatch(); 2151 BlockT *dummyExitBlk = NULL; 2152 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); 2153 if (loopHeader!=NULL && loopLatch!=NULL) { 2154 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch); 2155 if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) { 2156 dummyExitBlk = funcRep->CreateMachineBasicBlock(); 2157 funcRep->push_back(dummyExitBlk); //insert to function 2158 SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: "); 2159 2160 if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n"; 2161 2162 typename BlockT::iterator insertPos = 2163 CFGTraits::getInstrPos(loopLatch, branchInstr); 2164 unsigned immReg = 2165 funcRep->getRegInfo().createVirtualRegister(I32RC); 2166 CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1); 2167 InstrT *newInstr = 2168 CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep); 2169 MachineInstrBuilder(newInstr).addMBB(loopHeader).addReg(immReg, false); 2170 2171 SHOWNEWINSTR(newInstr); 2172 2173 branchInstr->eraseFromParent(); 2174 loopLatch->addSuccessor(dummyExitBlk); 2175 } 2176 } 2177 2178 return dummyExitBlk; 2179} //normalizeInfiniteLoopExit 2180 2181template<class PassT> 2182void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) { 2183 InstrT *branchInstr; 2184 2185 // I saw two unconditional branch in one basic block in example 2186 // test_fc_do_while_or.c need to fix the upstream on this to remove the loop. 2187 while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk)) 2188 && CFGTraits::isUncondBranch(branchInstr)) { 2189 if (DEBUGME) { 2190 errs() << "Removing unconditional branch instruction" ; 2191 branchInstr->dump(); 2192 } 2193 branchInstr->eraseFromParent(); 2194 } 2195} //removeUnconditionalBranch 2196 2197template<class PassT> 2198void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) { 2199 if (srcBlk->succ_size() == 2) { 2200 BlockT *blk1 = *srcBlk->succ_begin(); 2201 BlockT *blk2 = *(++srcBlk->succ_begin()); 2202 2203 if (blk1 == blk2) { 2204 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk); 2205 assert(branchInstr && CFGTraits::isCondBranch(branchInstr)); 2206 if (DEBUGME) { 2207 errs() << "Removing unneeded conditional branch instruction" ; 2208 branchInstr->dump(); 2209 } 2210 branchInstr->eraseFromParent(); 2211 SHOWNEWBLK(blk1, "Removing redundant successor"); 2212 srcBlk->removeSuccessor(blk1); 2213 } 2214 } 2215} //removeRedundantConditionalBranch 2216 2217template<class PassT> 2218void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*, 2219 DEFAULT_VEC_SLOTS> &retBlks) { 2220 BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock(); 2221 funcRep->push_back(dummyExitBlk); //insert to function 2222 CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep); 2223 2224 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter = 2225 retBlks.begin(), 2226 iterEnd = retBlks.end(); iter != iterEnd; ++iter) { 2227 BlockT *curBlk = *iter; 2228 InstrT *curInstr = CFGTraits::getReturnInstr(curBlk); 2229 if (curInstr) { 2230 curInstr->eraseFromParent(); 2231 } 2232#if 0 2233 if (curBlk->size()==0 && curBlk->pred_size() == 1) { 2234 if (DEBUGME) { 2235 errs() << "Replace empty block BB" << curBlk->getNumber() 2236 << " with dummyExitBlock\n"; 2237 } 2238 BlockT *predb = *curBlk->pred_begin(); 2239 predb->removeSuccessor(curBlk); 2240 curBlk = predb; 2241 } //handle empty curBlk 2242#endif 2243 curBlk->addSuccessor(dummyExitBlk); 2244 if (DEBUGME) { 2245 errs() << "Add dummyExitBlock to BB" << curBlk->getNumber() 2246 << " successors\n"; 2247 } 2248 } //for 2249 2250 SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: "); 2251} //addDummyExitBlock 2252 2253template<class PassT> 2254void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) { 2255 while (srcBlk->succ_size()) { 2256 srcBlk->removeSuccessor(*srcBlk->succ_begin()); 2257 } 2258} 2259 2260template<class PassT> 2261void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) { 2262 BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk]; 2263 2264 if (srcBlkInfo == NULL) { 2265 srcBlkInfo = new BlockInfo(); 2266 } 2267 2268 srcBlkInfo->sccNum = sccNum; 2269} 2270 2271template<class PassT> 2272int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) { 2273 BlockInfo *srcBlkInfo = blockInfoMap[srcBlk]; 2274 return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM; 2275} 2276 2277template<class PassT> 2278void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) { 2279 if (DEBUGME) { 2280 errs() << "Retiring BB" << srcBlk->getNumber() << "\n"; 2281 } 2282 2283 BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk]; 2284 2285 if (srcBlkInfo == NULL) { 2286 srcBlkInfo = new BlockInfo(); 2287 } 2288 2289 srcBlkInfo->isRetired = true; 2290 //int i = srcBlk->succ_size(); 2291 //int j = srcBlk->pred_size(); 2292 assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0 2293 && "can't retire block yet"); 2294} 2295 2296template<class PassT> 2297bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) { 2298 BlockInfo *srcBlkInfo = blockInfoMap[srcBlk]; 2299 return (srcBlkInfo && srcBlkInfo->isRetired); 2300} 2301 2302template<class PassT> 2303bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) { 2304 LoopT *loopRep = loopInfo->getLoopFor(curBlk); 2305 while (loopRep && loopRep->getHeader() == curBlk) { 2306 LoopLandInfo *loopLand = getLoopLandInfo(loopRep); 2307 2308 if(loopLand == NULL) 2309 return true; 2310 2311 BlockT *landBlk = loopLand->landBlk; 2312 assert(landBlk); 2313 if (!isRetiredBlock(landBlk)) { 2314 return true; 2315 } 2316 2317 loopRep = loopRep->getParentLoop(); 2318 } 2319 2320 return false; 2321} //isActiveLoophead 2322 2323template<class PassT> 2324bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) { 2325 const unsigned blockSizeThreshold = 30; 2326 const unsigned cloneInstrThreshold = 100; 2327 2328 bool multiplePreds = blk && (blk->pred_size() > 1); 2329 2330 if(!multiplePreds) 2331 return false; 2332 2333 unsigned blkSize = blk->size(); 2334 return ((blkSize > blockSizeThreshold) 2335 && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold)); 2336} //needMigrateBlock 2337 2338template<class PassT> 2339typename CFGStructurizer<PassT>::BlockT * 2340CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk, 2341 BlockTSmallerVector &exitBlks, 2342 std::set<BlockT *> &exitBlkSet) { 2343 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks; //in exit path blocks 2344 2345 for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(), 2346 predIterEnd = landBlk->pred_end(); 2347 predIter != predIterEnd; ++predIter) { 2348 BlockT *curBlk = *predIter; 2349 if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) { 2350 inpathBlks.push_back(curBlk); 2351 } 2352 } //for 2353 2354 //if landBlk has predecessors that are not in the given loop, 2355 //create a new block 2356 BlockT *newLandBlk = landBlk; 2357 if (inpathBlks.size() != landBlk->pred_size()) { 2358 newLandBlk = funcRep->CreateMachineBasicBlock(); 2359 funcRep->push_back(newLandBlk); //insert to function 2360 newLandBlk->addSuccessor(landBlk); 2361 for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter = 2362 inpathBlks.begin(), 2363 iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) { 2364 BlockT *curBlk = *iter; 2365 CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk); 2366 //srcBlk, oldBlk, newBlk 2367 curBlk->removeSuccessor(landBlk); 2368 curBlk->addSuccessor(newLandBlk); 2369 } 2370 for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) { 2371 if (exitBlks[i] == landBlk) { 2372 exitBlks[i] = newLandBlk; 2373 } 2374 } 2375 SHOWNEWBLK(newLandBlk, "NewLandingBlock: "); 2376 } 2377 2378 setLoopLandBlock(loopRep, newLandBlk); 2379 2380 return newLandBlk; 2381} // recordLoopbreakLand 2382 2383template<class PassT> 2384void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) { 2385 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; 2386 2387 if (theEntry == NULL) { 2388 theEntry = new LoopLandInfo(); 2389 } 2390 assert(theEntry->landBlk == NULL); 2391 2392 if (blk == NULL) { 2393 blk = funcRep->CreateMachineBasicBlock(); 2394 funcRep->push_back(blk); //insert to function 2395 SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: "); 2396 } 2397 2398 theEntry->landBlk = blk; 2399 2400 if (DEBUGME) { 2401 errs() << "setLoopLandBlock loop-header = BB" 2402 << loopRep->getHeader()->getNumber() 2403 << " landing-block = BB" << blk->getNumber() << "\n"; 2404 } 2405} // setLoopLandBlock 2406 2407template<class PassT> 2408void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) { 2409 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; 2410 2411 if (theEntry == NULL) { 2412 theEntry = new LoopLandInfo(); 2413 } 2414 2415 theEntry->breakOnRegs.insert(regNum); 2416 2417 if (DEBUGME) { 2418 errs() << "addLoopBreakOnReg loop-header = BB" 2419 << loopRep->getHeader()->getNumber() 2420 << " regNum = " << regNum << "\n"; 2421 } 2422} // addLoopBreakOnReg 2423 2424template<class PassT> 2425void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) { 2426 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; 2427 2428 if (theEntry == NULL) { 2429 theEntry = new LoopLandInfo(); 2430 } 2431 theEntry->contOnRegs.insert(regNum); 2432 2433 if (DEBUGME) { 2434 errs() << "addLoopContOnReg loop-header = BB" 2435 << loopRep->getHeader()->getNumber() 2436 << " regNum = " << regNum << "\n"; 2437 } 2438} // addLoopContOnReg 2439 2440template<class PassT> 2441void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) { 2442 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; 2443 2444 if (theEntry == NULL) { 2445 theEntry = new LoopLandInfo(); 2446 } 2447 theEntry->breakInitRegs.insert(regNum); 2448 2449 if (DEBUGME) { 2450 errs() << "addLoopBreakInitReg loop-header = BB" 2451 << loopRep->getHeader()->getNumber() 2452 << " regNum = " << regNum << "\n"; 2453 } 2454} // addLoopBreakInitReg 2455 2456template<class PassT> 2457void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) { 2458 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; 2459 2460 if (theEntry == NULL) { 2461 theEntry = new LoopLandInfo(); 2462 } 2463 theEntry->contInitRegs.insert(regNum); 2464 2465 if (DEBUGME) { 2466 errs() << "addLoopContInitReg loop-header = BB" 2467 << loopRep->getHeader()->getNumber() 2468 << " regNum = " << regNum << "\n"; 2469 } 2470} // addLoopContInitReg 2471 2472template<class PassT> 2473void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep, 2474 RegiT regNum) { 2475 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; 2476 2477 if (theEntry == NULL) { 2478 theEntry = new LoopLandInfo(); 2479 } 2480 theEntry->endbranchInitRegs.insert(regNum); 2481 2482 if (DEBUGME) 2483 { 2484 errs() << "addLoopEndbranchInitReg loop-header = BB" 2485 << loopRep->getHeader()->getNumber() 2486 << " regNum = " << regNum << "\n"; 2487 } 2488} // addLoopEndbranchInitReg 2489 2490template<class PassT> 2491typename CFGStructurizer<PassT>::LoopLandInfo * 2492CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) { 2493 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; 2494 2495 return theEntry; 2496} // getLoopLandInfo 2497 2498template<class PassT> 2499typename CFGStructurizer<PassT>::BlockT * 2500CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) { 2501 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; 2502 2503 return theEntry ? theEntry->landBlk : NULL; 2504} // getLoopLandBlock 2505 2506 2507template<class PassT> 2508bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) { 2509 LoopT *loopRep = loopInfo->getLoopFor(curBlk); 2510 if (loopRep == NULL) 2511 return false; 2512 2513 BlockT *loopHeader = loopRep->getHeader(); 2514 2515 return curBlk->isSuccessor(loopHeader); 2516 2517} //hasBackEdge 2518 2519template<class PassT> 2520unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) { 2521 return loopRep ? loopRep->getLoopDepth() : 0; 2522} //getLoopDepth 2523 2524template<class PassT> 2525int CFGStructurizer<PassT>::countActiveBlock 2526(typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart, 2527 typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) { 2528 int count = 0; 2529 while (iterStart != iterEnd) { 2530 if (!isRetiredBlock(*iterStart)) { 2531 ++count; 2532 } 2533 ++iterStart; 2534 } 2535 2536 return count; 2537} //countActiveBlock 2538 2539// This is work around solution for findNearestCommonDominator not avaiable to 2540// post dom a proper fix should go to Dominators.h. 2541 2542template<class PassT> 2543typename CFGStructurizer<PassT>::BlockT* 2544CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) { 2545 2546 if (postDomTree->dominates(blk1, blk2)) { 2547 return blk1; 2548 } 2549 if (postDomTree->dominates(blk2, blk1)) { 2550 return blk2; 2551 } 2552 2553 DomTreeNodeT *node1 = postDomTree->getNode(blk1); 2554 DomTreeNodeT *node2 = postDomTree->getNode(blk2); 2555 2556 // Handle newly cloned node. 2557 if (node1 == NULL && blk1->succ_size() == 1) { 2558 return findNearestCommonPostDom(*blk1->succ_begin(), blk2); 2559 } 2560 if (node2 == NULL && blk2->succ_size() == 1) { 2561 return findNearestCommonPostDom(blk1, *blk2->succ_begin()); 2562 } 2563 2564 if (node1 == NULL || node2 == NULL) { 2565 return NULL; 2566 } 2567 2568 node1 = node1->getIDom(); 2569 while (node1) { 2570 if (postDomTree->dominates(node1, node2)) { 2571 return node1->getBlock(); 2572 } 2573 node1 = node1->getIDom(); 2574 } 2575 2576 return NULL; 2577} 2578 2579template<class PassT> 2580typename CFGStructurizer<PassT>::BlockT * 2581CFGStructurizer<PassT>::findNearestCommonPostDom 2582(typename std::set<BlockT *> &blks) { 2583 BlockT *commonDom; 2584 typename std::set<BlockT *>::const_iterator iter = blks.begin(); 2585 typename std::set<BlockT *>::const_iterator iterEnd = blks.end(); 2586 for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) { 2587 BlockT *curBlk = *iter; 2588 if (curBlk != commonDom) { 2589 commonDom = findNearestCommonPostDom(curBlk, commonDom); 2590 } 2591 } 2592 2593 if (DEBUGME) { 2594 errs() << "Common post dominator for exit blocks is "; 2595 if (commonDom) { 2596 errs() << "BB" << commonDom->getNumber() << "\n"; 2597 } else { 2598 errs() << "NULL\n"; 2599 } 2600 } 2601 2602 return commonDom; 2603} //findNearestCommonPostDom 2604 2605} //end namespace llvm 2606 2607//todo: move-end 2608 2609 2610//===----------------------------------------------------------------------===// 2611// 2612// CFGStructurizer for AMDIL 2613// 2614//===----------------------------------------------------------------------===// 2615 2616 2617using namespace llvmCFGStruct; 2618 2619namespace llvm 2620{ 2621class AMDILCFGStructurizer : public MachineFunctionPass 2622{ 2623public: 2624 typedef MachineInstr InstructionType; 2625 typedef MachineFunction FunctionType; 2626 typedef MachineBasicBlock BlockType; 2627 typedef MachineLoopInfo LoopinfoType; 2628 typedef MachineDominatorTree DominatortreeType; 2629 typedef MachinePostDominatorTree PostDominatortreeType; 2630 typedef MachineDomTreeNode DomTreeNodeType; 2631 typedef MachineLoop LoopType; 2632 2633protected: 2634 TargetMachine &TM; 2635 const TargetInstrInfo *TII; 2636 const AMDGPURegisterInfo *TRI; 2637 2638public: 2639 AMDILCFGStructurizer(char &pid, TargetMachine &tm AMDIL_OPT_LEVEL_DECL); 2640 const TargetInstrInfo *getTargetInstrInfo() const; 2641 //bool runOnMachineFunction(MachineFunction &F); 2642 2643private: 2644 2645}; //end of class AMDILCFGStructurizer 2646 2647//char AMDILCFGStructurizer::ID = 0; 2648} //end of namespace llvm 2649AMDILCFGStructurizer::AMDILCFGStructurizer(char &pid, TargetMachine &tm 2650 AMDIL_OPT_LEVEL_DECL) 2651: MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()), 2652 TRI(static_cast<const AMDGPURegisterInfo *>(tm.getRegisterInfo()) 2653 ) { 2654} 2655 2656const TargetInstrInfo *AMDILCFGStructurizer::getTargetInstrInfo() const { 2657 return TII; 2658} 2659//===----------------------------------------------------------------------===// 2660// 2661// CFGPrepare 2662// 2663//===----------------------------------------------------------------------===// 2664 2665 2666using namespace llvmCFGStruct; 2667 2668namespace llvm 2669{ 2670class AMDILCFGPrepare : public AMDILCFGStructurizer 2671{ 2672public: 2673 static char ID; 2674 2675public: 2676 AMDILCFGPrepare(TargetMachine &tm AMDIL_OPT_LEVEL_DECL); 2677 2678 virtual const char *getPassName() const; 2679 virtual void getAnalysisUsage(AnalysisUsage &AU) const; 2680 2681 bool runOnMachineFunction(MachineFunction &F); 2682 2683private: 2684 2685}; //end of class AMDILCFGPrepare 2686 2687char AMDILCFGPrepare::ID = 0; 2688} //end of namespace llvm 2689 2690AMDILCFGPrepare::AMDILCFGPrepare(TargetMachine &tm AMDIL_OPT_LEVEL_DECL) 2691 : AMDILCFGStructurizer(ID, tm AMDIL_OPT_LEVEL_VAR) 2692{ 2693} 2694const char *AMDILCFGPrepare::getPassName() const { 2695 return "AMD IL Control Flow Graph Preparation Pass"; 2696} 2697 2698void AMDILCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const { 2699 AU.addPreserved<MachineFunctionAnalysis>(); 2700 AU.addRequired<MachineFunctionAnalysis>(); 2701 AU.addRequired<MachineDominatorTree>(); 2702 AU.addRequired<MachinePostDominatorTree>(); 2703 AU.addRequired<MachineLoopInfo>(); 2704} 2705 2706//===----------------------------------------------------------------------===// 2707// 2708// CFGPerform 2709// 2710//===----------------------------------------------------------------------===// 2711 2712 2713using namespace llvmCFGStruct; 2714 2715namespace llvm 2716{ 2717class AMDILCFGPerform : public AMDILCFGStructurizer 2718{ 2719public: 2720 static char ID; 2721 2722public: 2723 AMDILCFGPerform(TargetMachine &tm AMDIL_OPT_LEVEL_DECL); 2724 virtual const char *getPassName() const; 2725 virtual void getAnalysisUsage(AnalysisUsage &AU) const; 2726 bool runOnMachineFunction(MachineFunction &F); 2727 2728private: 2729 2730}; //end of class AMDILCFGPerform 2731 2732char AMDILCFGPerform::ID = 0; 2733} //end of namespace llvm 2734 2735 AMDILCFGPerform::AMDILCFGPerform(TargetMachine &tm AMDIL_OPT_LEVEL_DECL) 2736: AMDILCFGStructurizer(ID, tm AMDIL_OPT_LEVEL_VAR) 2737{ 2738} 2739 2740const char *AMDILCFGPerform::getPassName() const { 2741 return "AMD IL Control Flow Graph structurizer Pass"; 2742} 2743 2744void AMDILCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const { 2745 AU.addPreserved<MachineFunctionAnalysis>(); 2746 AU.addRequired<MachineFunctionAnalysis>(); 2747 AU.addRequired<MachineDominatorTree>(); 2748 AU.addRequired<MachinePostDominatorTree>(); 2749 AU.addRequired<MachineLoopInfo>(); 2750} 2751 2752//===----------------------------------------------------------------------===// 2753// 2754// CFGStructTraits<AMDILCFGStructurizer> 2755// 2756//===----------------------------------------------------------------------===// 2757 2758namespace llvmCFGStruct 2759{ 2760// this class is tailor to the AMDIL backend 2761template<> 2762struct CFGStructTraits<AMDILCFGStructurizer> 2763{ 2764 typedef int RegiT; 2765 2766 static int getBreakNzeroOpcode(int oldOpcode) { 2767 switch(oldOpcode) { 2768 ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::BREAK_LOGICALNZ); 2769 default: 2770 assert(0 && "internal error"); 2771 }; 2772 return -1; 2773 } 2774 2775 static int getBreakZeroOpcode(int oldOpcode) { 2776 switch(oldOpcode) { 2777 ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::BREAK_LOGICALZ); 2778 default: 2779 assert(0 && "internal error"); 2780 }; 2781 return -1; 2782 } 2783 2784 static int getBranchNzeroOpcode(int oldOpcode) { 2785 switch(oldOpcode) { 2786 ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALNZ); 2787 case AMDGPU::SI_IF_NZ: return AMDGPU::SI_IF_NZ; 2788 default: 2789 assert(0 && "internal error"); 2790 }; 2791 return -1; 2792 } 2793 2794 static int getBranchZeroOpcode(int oldOpcode) { 2795 switch(oldOpcode) { 2796 ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALZ); 2797 case AMDGPU::SI_IF_Z: return AMDGPU::SI_IF_Z; 2798 default: 2799 assert(0 && "internal error"); 2800 }; 2801 return -1; 2802 } 2803 2804 static int getContinueNzeroOpcode(int oldOpcode) 2805 { 2806 switch(oldOpcode) { 2807 ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::CONTINUE_LOGICALNZ); 2808 default: 2809 assert(0 && "internal error"); 2810 }; 2811 return -1; 2812 } 2813 2814 static int getContinueZeroOpcode(int oldOpcode) { 2815 switch(oldOpcode) { 2816 ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::CONTINUE_LOGICALZ); 2817 default: 2818 assert(0 && "internal error"); 2819 }; 2820 return -1; 2821 } 2822 2823// the explicitly represented branch target is the true branch target 2824#define getExplicitBranch getTrueBranch 2825#define setExplicitBranch setTrueBranch 2826 2827 static MachineBasicBlock *getTrueBranch(MachineInstr *instr) { 2828 return instr->getOperand(0).getMBB(); 2829 } 2830 2831 static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) { 2832 instr->getOperand(0).setMBB(blk); 2833 } 2834 2835 static MachineBasicBlock * 2836 getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) { 2837 assert(blk->succ_size() == 2); 2838 MachineBasicBlock *trueBranch = getTrueBranch(instr); 2839 MachineBasicBlock::succ_iterator iter = blk->succ_begin(); 2840 MachineBasicBlock::succ_iterator iterNext = iter; 2841 ++iterNext; 2842 2843 return (*iter == trueBranch) ? *iterNext : *iter; 2844 } 2845 2846 static bool isCondBranch(MachineInstr *instr) { 2847 switch (instr->getOpcode()) { 2848 ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND); 2849 case AMDGPU::SI_IF_NZ: 2850 case AMDGPU::SI_IF_Z: 2851 break; 2852 default: 2853 return false; 2854 } 2855 return true; 2856 } 2857 2858 static bool isUncondBranch(MachineInstr *instr) { 2859 switch (instr->getOpcode()) { 2860 case AMDGPU::BRANCH: 2861 break; 2862 default: 2863 return false; 2864 } 2865 return true; 2866 } 2867 2868 static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) { 2869 //get DebugLoc from the first MachineBasicBlock instruction with debug info 2870 DebugLoc DL; 2871 for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) { 2872 MachineInstr *instr = &(*iter); 2873 if (instr->getDebugLoc().isUnknown() == false) { 2874 DL = instr->getDebugLoc(); 2875 } 2876 } 2877 return DL; 2878 } 2879 2880 static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) { 2881 MachineBasicBlock::reverse_iterator iter = blk->rbegin(); 2882 MachineInstr *instr = &*iter; 2883 if (instr && (isCondBranch(instr) || isUncondBranch(instr))) { 2884 return instr; 2885 } 2886 return NULL; 2887 } 2888 2889 // The correct naming for this is getPossibleLoopendBlockBranchInstr. 2890 // 2891 // BB with backward-edge could have move instructions after the branch 2892 // instruction. Such move instruction "belong to" the loop backward-edge. 2893 // 2894 static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) { 2895 const AMDILInstrInfo * TII = static_cast<const AMDILInstrInfo *>( 2896 blk->getParent()->getTarget().getInstrInfo()); 2897 2898 for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(), 2899 iterEnd = blk->rend(); iter != iterEnd; ++iter) { 2900 // FIXME: Simplify 2901 MachineInstr *instr = &*iter; 2902 if (instr) { 2903 if (isCondBranch(instr) || isUncondBranch(instr)) { 2904 return instr; 2905 } else if (!TII->isMov(instr->getOpcode())) { 2906 break; 2907 } 2908 } 2909 } 2910 return NULL; 2911 } 2912 2913 static MachineInstr *getReturnInstr(MachineBasicBlock *blk) { 2914 MachineBasicBlock::reverse_iterator iter = blk->rbegin(); 2915 if (iter != blk->rend()) { 2916 MachineInstr *instr = &(*iter); 2917 if (instr->getOpcode() == AMDGPU::RETURN) { 2918 return instr; 2919 } 2920 } 2921 return NULL; 2922 } 2923 2924 static MachineInstr *getContinueInstr(MachineBasicBlock *blk) { 2925 MachineBasicBlock::reverse_iterator iter = blk->rbegin(); 2926 if (iter != blk->rend()) { 2927 MachineInstr *instr = &(*iter); 2928 if (instr->getOpcode() == AMDGPU::CONTINUE) { 2929 return instr; 2930 } 2931 } 2932 return NULL; 2933 } 2934 2935 static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) { 2936 for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) { 2937 MachineInstr *instr = &(*iter); 2938 if ((instr->getOpcode() == AMDGPU::BREAK_LOGICALNZ_i32) || (instr->getOpcode() == AMDGPU::BREAK_LOGICALZ_i32)) { 2939 return instr; 2940 } 2941 } 2942 return NULL; 2943 } 2944 2945 static bool isReturnBlock(MachineBasicBlock *blk) { 2946 MachineInstr *instr = getReturnInstr(blk); 2947 bool isReturn = (blk->succ_size() == 0); 2948 if (instr) { 2949 assert(isReturn); 2950 } else if (isReturn) { 2951 if (DEBUGME) { 2952 errs() << "BB" << blk->getNumber() 2953 <<" is return block without RETURN instr\n"; 2954 } 2955 } 2956 2957 return isReturn; 2958 } 2959 2960 static MachineBasicBlock::iterator 2961 getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) { 2962 assert(instr->getParent() == blk && "instruction doesn't belong to block"); 2963 MachineBasicBlock::iterator iter = blk->begin(); 2964 MachineBasicBlock::iterator iterEnd = blk->end(); 2965 while (&(*iter) != instr && iter != iterEnd) { 2966 ++iter; 2967 } 2968 2969 assert(iter != iterEnd); 2970 return iter; 2971 }//getInstrPos 2972 2973 static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode, 2974 AMDILCFGStructurizer *passRep) { 2975 return insertInstrBefore(blk,newOpcode,passRep,DebugLoc()); 2976 } //insertInstrBefore 2977 2978 static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode, 2979 AMDILCFGStructurizer *passRep, DebugLoc DL) { 2980 const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); 2981 MachineInstr *newInstr = 2982 blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL); 2983 2984 MachineBasicBlock::iterator res; 2985 if (blk->begin() != blk->end()) { 2986 blk->insert(blk->begin(), newInstr); 2987 } else { 2988 blk->push_back(newInstr); 2989 } 2990 2991 SHOWNEWINSTR(newInstr); 2992 2993 return newInstr; 2994 } //insertInstrBefore 2995 2996 static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode, 2997 AMDILCFGStructurizer *passRep) { 2998 insertInstrEnd(blk,newOpcode,passRep,DebugLoc()); 2999 } //insertInstrEnd 3000 3001 static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode, 3002 AMDILCFGStructurizer *passRep, DebugLoc DL) { 3003 const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); 3004 MachineInstr *newInstr = blk->getParent() 3005 ->CreateMachineInstr(tii->get(newOpcode), DL); 3006 3007 blk->push_back(newInstr); 3008 //assume the instruction doesn't take any reg operand ... 3009 3010 SHOWNEWINSTR(newInstr); 3011 } //insertInstrEnd 3012 3013 static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos, 3014 int newOpcode, 3015 AMDILCFGStructurizer *passRep) { 3016 MachineInstr *oldInstr = &(*instrPos); 3017 const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); 3018 MachineBasicBlock *blk = oldInstr->getParent(); 3019 MachineInstr *newInstr = 3020 blk->getParent()->CreateMachineInstr(tii->get(newOpcode), 3021 DebugLoc()); 3022 3023 blk->insert(instrPos, newInstr); 3024 //assume the instruction doesn't take any reg operand ... 3025 3026 SHOWNEWINSTR(newInstr); 3027 return newInstr; 3028 } //insertInstrBefore 3029 3030 static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos, 3031 int newOpcode, 3032 AMDILCFGStructurizer *passRep, 3033 DebugLoc DL) { 3034 MachineInstr *oldInstr = &(*instrPos); 3035 const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); 3036 MachineBasicBlock *blk = oldInstr->getParent(); 3037 MachineInstr *newInstr = 3038 blk->getParent()->CreateMachineInstr(tii->get(newOpcode), 3039 DL); 3040 3041 blk->insert(instrPos, newInstr); 3042 MachineInstrBuilder(newInstr).addReg(oldInstr->getOperand(1).getReg(), 3043 false); 3044 3045 SHOWNEWINSTR(newInstr); 3046 //erase later oldInstr->eraseFromParent(); 3047 } //insertCondBranchBefore 3048 3049 static void insertCondBranchBefore(MachineBasicBlock *blk, 3050 MachineBasicBlock::iterator insertPos, 3051 int newOpcode, 3052 AMDILCFGStructurizer *passRep, 3053 RegiT regNum, 3054 DebugLoc DL) { 3055 const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); 3056 3057 MachineInstr *newInstr = 3058 blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL); 3059 3060 //insert before 3061 blk->insert(insertPos, newInstr); 3062 MachineInstrBuilder(newInstr).addReg(regNum, false); 3063 3064 SHOWNEWINSTR(newInstr); 3065 } //insertCondBranchBefore 3066 3067 static void insertCondBranchEnd(MachineBasicBlock *blk, 3068 int newOpcode, 3069 AMDILCFGStructurizer *passRep, 3070 RegiT regNum) { 3071 const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); 3072 MachineInstr *newInstr = 3073 blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DebugLoc()); 3074 3075 blk->push_back(newInstr); 3076 MachineInstrBuilder(newInstr).addReg(regNum, false); 3077 3078 SHOWNEWINSTR(newInstr); 3079 } //insertCondBranchEnd 3080 3081 3082 static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos, 3083 AMDILCFGStructurizer *passRep, 3084 RegiT regNum, int regVal) { 3085 MachineInstr *oldInstr = &(*instrPos); 3086 const AMDILInstrInfo *tii = 3087 static_cast<const AMDILInstrInfo *>(passRep->getTargetInstrInfo()); 3088 MachineBasicBlock *blk = oldInstr->getParent(); 3089 MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum, 3090 regVal); 3091 blk->insert(instrPos, newInstr); 3092 3093 SHOWNEWINSTR(newInstr); 3094 } //insertAssignInstrBefore 3095 3096 static void insertAssignInstrBefore(MachineBasicBlock *blk, 3097 AMDILCFGStructurizer *passRep, 3098 RegiT regNum, int regVal) { 3099 const AMDILInstrInfo *tii = 3100 static_cast<const AMDILInstrInfo *>(passRep->getTargetInstrInfo()); 3101 3102 MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum, 3103 regVal); 3104 if (blk->begin() != blk->end()) { 3105 blk->insert(blk->begin(), newInstr); 3106 } else { 3107 blk->push_back(newInstr); 3108 } 3109 3110 SHOWNEWINSTR(newInstr); 3111 3112 } //insertInstrBefore 3113 3114 static void insertCompareInstrBefore(MachineBasicBlock *blk, 3115 MachineBasicBlock::iterator instrPos, 3116 AMDILCFGStructurizer *passRep, 3117 RegiT dstReg, RegiT src1Reg, 3118 RegiT src2Reg) { 3119 const AMDILInstrInfo *tii = 3120 static_cast<const AMDILInstrInfo *>(passRep->getTargetInstrInfo()); 3121 MachineInstr *newInstr = 3122 blk->getParent()->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc()); 3123 3124 MachineInstrBuilder(newInstr).addReg(dstReg, RegState::Define); //set target 3125 MachineInstrBuilder(newInstr).addReg(src1Reg); //set src value 3126 MachineInstrBuilder(newInstr).addReg(src2Reg); //set src value 3127 3128 blk->insert(instrPos, newInstr); 3129 SHOWNEWINSTR(newInstr); 3130 3131 } //insertCompareInstrBefore 3132 3133 static void cloneSuccessorList(MachineBasicBlock *dstBlk, 3134 MachineBasicBlock *srcBlk) { 3135 for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(), 3136 iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) { 3137 dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of 3138 } 3139 } //cloneSuccessorList 3140 3141 static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) { 3142 MachineFunction *func = srcBlk->getParent(); 3143 MachineBasicBlock *newBlk = func->CreateMachineBasicBlock(); 3144 func->push_back(newBlk); //insert to function 3145 //newBlk->setNumber(srcBlk->getNumber()); 3146 for (MachineBasicBlock::iterator iter = srcBlk->begin(), 3147 iterEnd = srcBlk->end(); 3148 iter != iterEnd; ++iter) { 3149 MachineInstr *instr = func->CloneMachineInstr(iter); 3150 newBlk->push_back(instr); 3151 } 3152 return newBlk; 3153 } 3154 3155 //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because 3156 //the AMDIL instruction is not recognized as terminator fix this and retire 3157 //this routine 3158 static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk, 3159 MachineBasicBlock *oldBlk, 3160 MachineBasicBlock *newBlk) { 3161 MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk); 3162 if (branchInstr && isCondBranch(branchInstr) && 3163 getExplicitBranch(branchInstr) == oldBlk) { 3164 setExplicitBranch(branchInstr, newBlk); 3165 } 3166 } 3167 3168 static void wrapup(MachineBasicBlock *entryBlk) { 3169 assert((!entryBlk->getParent()->getJumpTableInfo() 3170 || entryBlk->getParent()->getJumpTableInfo()->isEmpty()) 3171 && "found a jump table"); 3172 3173 //collect continue right before endloop 3174 SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr; 3175 MachineBasicBlock::iterator pre = entryBlk->begin(); 3176 MachineBasicBlock::iterator iterEnd = entryBlk->end(); 3177 MachineBasicBlock::iterator iter = pre; 3178 while (iter != iterEnd) { 3179 if (pre->getOpcode() == AMDGPU::CONTINUE 3180 && iter->getOpcode() == AMDGPU::ENDLOOP) { 3181 contInstr.push_back(pre); 3182 } 3183 pre = iter; 3184 ++iter; 3185 } //end while 3186 3187 //delete continue right before endloop 3188 for (unsigned i = 0; i < contInstr.size(); ++i) { 3189 contInstr[i]->eraseFromParent(); 3190 } 3191 3192 // TODO to fix up jump table so later phase won't be confused. if 3193 // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but 3194 // there isn't such an interface yet. alternatively, replace all the other 3195 // blocks in the jump table with the entryBlk //} 3196 3197 } //wrapup 3198 3199 static MachineDominatorTree *getDominatorTree(AMDILCFGStructurizer &pass) { 3200 return &pass.getAnalysis<MachineDominatorTree>(); 3201 } 3202 3203 static MachinePostDominatorTree* 3204 getPostDominatorTree(AMDILCFGStructurizer &pass) { 3205 return &pass.getAnalysis<MachinePostDominatorTree>(); 3206 } 3207 3208 static MachineLoopInfo *getLoopInfo(AMDILCFGStructurizer &pass) { 3209 return &pass.getAnalysis<MachineLoopInfo>(); 3210 } 3211}; // template class CFGStructTraits 3212} //end of namespace llvm 3213 3214// createAMDILCFGPreparationPass- Returns a pass 3215FunctionPass *llvm::createAMDILCFGPreparationPass(TargetMachine &tm 3216 AMDIL_OPT_LEVEL_DECL) { 3217 return new AMDILCFGPrepare(tm AMDIL_OPT_LEVEL_VAR); 3218} 3219 3220bool AMDILCFGPrepare::runOnMachineFunction(MachineFunction &func) { 3221 return llvmCFGStruct::CFGStructurizer<AMDILCFGStructurizer>().prepare(func, 3222 *this, 3223 TRI); 3224} 3225 3226// createAMDILCFGStructurizerPass- Returns a pass 3227FunctionPass *llvm::createAMDILCFGStructurizerPass(TargetMachine &tm 3228 AMDIL_OPT_LEVEL_DECL) { 3229 return new AMDILCFGPerform(tm AMDIL_OPT_LEVEL_VAR); 3230} 3231 3232bool AMDILCFGPerform::runOnMachineFunction(MachineFunction &func) { 3233 return llvmCFGStruct::CFGStructurizer<AMDILCFGStructurizer>().run(func, 3234 *this, 3235 TRI); 3236} 3237 3238//end of file newline goes below 3239 3240