1//===-- WebAssemblyCFGStackify.cpp - CFG Stackification -------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief This file implements a CFG stacking pass. 12/// 13/// This pass reorders the blocks in a function to put them into topological 14/// order, ignoring loop backedges, and without any loop being interrupted 15/// by a block not dominated by the loop header, with special care to keep the 16/// order as similar as possible to the original order. 17/// 18/// Then, it inserts BLOCK and LOOP markers to mark the start of scopes, since 19/// scope boundaries serve as the labels for WebAssembly's control transfers. 20/// 21/// This is sufficient to convert arbitrary CFGs into a form that works on 22/// WebAssembly, provided that all loops are single-entry. 23/// 24//===----------------------------------------------------------------------===// 25 26#include "WebAssembly.h" 27#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 28#include "WebAssemblyMachineFunctionInfo.h" 29#include "WebAssemblySubtarget.h" 30#include "llvm/ADT/PriorityQueue.h" 31#include "llvm/ADT/SetVector.h" 32#include "llvm/CodeGen/MachineDominators.h" 33#include "llvm/CodeGen/MachineFunction.h" 34#include "llvm/CodeGen/MachineInstrBuilder.h" 35#include "llvm/CodeGen/MachineLoopInfo.h" 36#include "llvm/CodeGen/MachineRegisterInfo.h" 37#include "llvm/CodeGen/Passes.h" 38#include "llvm/Support/Debug.h" 39#include "llvm/Support/raw_ostream.h" 40using namespace llvm; 41 42#define DEBUG_TYPE "wasm-cfg-stackify" 43 44namespace { 45class WebAssemblyCFGStackify final : public MachineFunctionPass { 46 const char *getPassName() const override { 47 return "WebAssembly CFG Stackify"; 48 } 49 50 void getAnalysisUsage(AnalysisUsage &AU) const override { 51 AU.setPreservesCFG(); 52 AU.addRequired<MachineDominatorTree>(); 53 AU.addPreserved<MachineDominatorTree>(); 54 AU.addRequired<MachineLoopInfo>(); 55 AU.addPreserved<MachineLoopInfo>(); 56 MachineFunctionPass::getAnalysisUsage(AU); 57 } 58 59 bool runOnMachineFunction(MachineFunction &MF) override; 60 61public: 62 static char ID; // Pass identification, replacement for typeid 63 WebAssemblyCFGStackify() : MachineFunctionPass(ID) {} 64}; 65} // end anonymous namespace 66 67char WebAssemblyCFGStackify::ID = 0; 68FunctionPass *llvm::createWebAssemblyCFGStackify() { 69 return new WebAssemblyCFGStackify(); 70} 71 72/// Return the "bottom" block of a loop. This differs from 73/// MachineLoop::getBottomBlock in that it works even if the loop is 74/// discontiguous. 75static MachineBasicBlock *LoopBottom(const MachineLoop *Loop) { 76 MachineBasicBlock *Bottom = Loop->getHeader(); 77 for (MachineBasicBlock *MBB : Loop->blocks()) 78 if (MBB->getNumber() > Bottom->getNumber()) 79 Bottom = MBB; 80 return Bottom; 81} 82 83static void MaybeUpdateTerminator(MachineBasicBlock *MBB) { 84#ifndef NDEBUG 85 bool AnyBarrier = false; 86#endif 87 bool AllAnalyzable = true; 88 for (const MachineInstr &Term : MBB->terminators()) { 89#ifndef NDEBUG 90 AnyBarrier |= Term.isBarrier(); 91#endif 92 AllAnalyzable &= Term.isBranch() && !Term.isIndirectBranch(); 93 } 94 assert((AnyBarrier || AllAnalyzable) && 95 "AnalyzeBranch needs to analyze any block with a fallthrough"); 96 if (AllAnalyzable) 97 MBB->updateTerminator(); 98} 99 100namespace { 101/// Sort blocks by their number. 102struct CompareBlockNumbers { 103 bool operator()(const MachineBasicBlock *A, 104 const MachineBasicBlock *B) const { 105 return A->getNumber() > B->getNumber(); 106 } 107}; 108/// Sort blocks by their number in the opposite order.. 109struct CompareBlockNumbersBackwards { 110 bool operator()(const MachineBasicBlock *A, 111 const MachineBasicBlock *B) const { 112 return A->getNumber() < B->getNumber(); 113 } 114}; 115/// Bookkeeping for a loop to help ensure that we don't mix blocks not dominated 116/// by the loop header among the loop's blocks. 117struct Entry { 118 const MachineLoop *Loop; 119 unsigned NumBlocksLeft; 120 121 /// List of blocks not dominated by Loop's header that are deferred until 122 /// after all of Loop's blocks have been seen. 123 std::vector<MachineBasicBlock *> Deferred; 124 125 explicit Entry(const MachineLoop *L) 126 : Loop(L), NumBlocksLeft(L->getNumBlocks()) {} 127}; 128} 129 130/// Sort the blocks, taking special care to make sure that loops are not 131/// interrupted by blocks not dominated by their header. 132/// TODO: There are many opportunities for improving the heuristics here. 133/// Explore them. 134static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI, 135 const MachineDominatorTree &MDT) { 136 // Prepare for a topological sort: Record the number of predecessors each 137 // block has, ignoring loop backedges. 138 MF.RenumberBlocks(); 139 SmallVector<unsigned, 16> NumPredsLeft(MF.getNumBlockIDs(), 0); 140 for (MachineBasicBlock &MBB : MF) { 141 unsigned N = MBB.pred_size(); 142 if (MachineLoop *L = MLI.getLoopFor(&MBB)) 143 if (L->getHeader() == &MBB) 144 for (const MachineBasicBlock *Pred : MBB.predecessors()) 145 if (L->contains(Pred)) 146 --N; 147 NumPredsLeft[MBB.getNumber()] = N; 148 } 149 150 // Topological sort the CFG, with additional constraints: 151 // - Between a loop header and the last block in the loop, there can be 152 // no blocks not dominated by the loop header. 153 // - It's desirable to preserve the original block order when possible. 154 // We use two ready lists; Preferred and Ready. Preferred has recently 155 // processed sucessors, to help preserve block sequences from the original 156 // order. Ready has the remaining ready blocks. 157 PriorityQueue<MachineBasicBlock *, std::vector<MachineBasicBlock *>, 158 CompareBlockNumbers> 159 Preferred; 160 PriorityQueue<MachineBasicBlock *, std::vector<MachineBasicBlock *>, 161 CompareBlockNumbersBackwards> 162 Ready; 163 SmallVector<Entry, 4> Loops; 164 for (MachineBasicBlock *MBB = &MF.front();;) { 165 const MachineLoop *L = MLI.getLoopFor(MBB); 166 if (L) { 167 // If MBB is a loop header, add it to the active loop list. We can't put 168 // any blocks that it doesn't dominate until we see the end of the loop. 169 if (L->getHeader() == MBB) 170 Loops.push_back(Entry(L)); 171 // For each active loop the block is in, decrement the count. If MBB is 172 // the last block in an active loop, take it off the list and pick up any 173 // blocks deferred because the header didn't dominate them. 174 for (Entry &E : Loops) 175 if (E.Loop->contains(MBB) && --E.NumBlocksLeft == 0) 176 for (auto DeferredBlock : E.Deferred) 177 Ready.push(DeferredBlock); 178 while (!Loops.empty() && Loops.back().NumBlocksLeft == 0) 179 Loops.pop_back(); 180 } 181 // The main topological sort logic. 182 for (MachineBasicBlock *Succ : MBB->successors()) { 183 // Ignore backedges. 184 if (MachineLoop *SuccL = MLI.getLoopFor(Succ)) 185 if (SuccL->getHeader() == Succ && SuccL->contains(MBB)) 186 continue; 187 // Decrement the predecessor count. If it's now zero, it's ready. 188 if (--NumPredsLeft[Succ->getNumber()] == 0) 189 Preferred.push(Succ); 190 } 191 // Determine the block to follow MBB. First try to find a preferred block, 192 // to preserve the original block order when possible. 193 MachineBasicBlock *Next = nullptr; 194 while (!Preferred.empty()) { 195 Next = Preferred.top(); 196 Preferred.pop(); 197 // If X isn't dominated by the top active loop header, defer it until that 198 // loop is done. 199 if (!Loops.empty() && 200 !MDT.dominates(Loops.back().Loop->getHeader(), Next)) { 201 Loops.back().Deferred.push_back(Next); 202 Next = nullptr; 203 continue; 204 } 205 // If Next was originally ordered before MBB, and it isn't because it was 206 // loop-rotated above the header, it's not preferred. 207 if (Next->getNumber() < MBB->getNumber() && 208 (!L || !L->contains(Next) || 209 L->getHeader()->getNumber() < Next->getNumber())) { 210 Ready.push(Next); 211 Next = nullptr; 212 continue; 213 } 214 break; 215 } 216 // If we didn't find a suitable block in the Preferred list, check the 217 // general Ready list. 218 if (!Next) { 219 // If there are no more blocks to process, we're done. 220 if (Ready.empty()) { 221 MaybeUpdateTerminator(MBB); 222 break; 223 } 224 for (;;) { 225 Next = Ready.top(); 226 Ready.pop(); 227 // If Next isn't dominated by the top active loop header, defer it until 228 // that loop is done. 229 if (!Loops.empty() && 230 !MDT.dominates(Loops.back().Loop->getHeader(), Next)) { 231 Loops.back().Deferred.push_back(Next); 232 continue; 233 } 234 break; 235 } 236 } 237 // Move the next block into place and iterate. 238 Next->moveAfter(MBB); 239 MaybeUpdateTerminator(MBB); 240 MBB = Next; 241 } 242 assert(Loops.empty() && "Active loop list not finished"); 243 MF.RenumberBlocks(); 244 245#ifndef NDEBUG 246 SmallSetVector<MachineLoop *, 8> OnStack; 247 248 // Insert a sentinel representing the degenerate loop that starts at the 249 // function entry block and includes the entire function as a "loop" that 250 // executes once. 251 OnStack.insert(nullptr); 252 253 for (auto &MBB : MF) { 254 assert(MBB.getNumber() >= 0 && "Renumbered blocks should be non-negative."); 255 256 MachineLoop *Loop = MLI.getLoopFor(&MBB); 257 if (Loop && &MBB == Loop->getHeader()) { 258 // Loop header. The loop predecessor should be sorted above, and the other 259 // predecessors should be backedges below. 260 for (auto Pred : MBB.predecessors()) 261 assert( 262 (Pred->getNumber() < MBB.getNumber() || Loop->contains(Pred)) && 263 "Loop header predecessors must be loop predecessors or backedges"); 264 assert(OnStack.insert(Loop) && "Loops should be declared at most once."); 265 } else { 266 // Not a loop header. All predecessors should be sorted above. 267 for (auto Pred : MBB.predecessors()) 268 assert(Pred->getNumber() < MBB.getNumber() && 269 "Non-loop-header predecessors should be topologically sorted"); 270 assert(OnStack.count(MLI.getLoopFor(&MBB)) && 271 "Blocks must be nested in their loops"); 272 } 273 while (OnStack.size() > 1 && &MBB == LoopBottom(OnStack.back())) 274 OnStack.pop_back(); 275 } 276 assert(OnStack.pop_back_val() == nullptr && 277 "The function entry block shouldn't actually be a loop header"); 278 assert(OnStack.empty() && 279 "Control flow stack pushes and pops should be balanced."); 280#endif 281} 282 283/// Test whether Pred has any terminators explicitly branching to MBB, as 284/// opposed to falling through. Note that it's possible (eg. in unoptimized 285/// code) for a branch instruction to both branch to a block and fallthrough 286/// to it, so we check the actual branch operands to see if there are any 287/// explicit mentions. 288static bool ExplicitlyBranchesTo(MachineBasicBlock *Pred, 289 MachineBasicBlock *MBB) { 290 for (MachineInstr &MI : Pred->terminators()) 291 for (MachineOperand &MO : MI.explicit_operands()) 292 if (MO.isMBB() && MO.getMBB() == MBB) 293 return true; 294 return false; 295} 296 297/// Test whether MI is a child of some other node in an expression tree. 298static bool IsChild(const MachineInstr &MI, 299 const WebAssemblyFunctionInfo &MFI) { 300 if (MI.getNumOperands() == 0) 301 return false; 302 const MachineOperand &MO = MI.getOperand(0); 303 if (!MO.isReg() || MO.isImplicit() || !MO.isDef()) 304 return false; 305 unsigned Reg = MO.getReg(); 306 return TargetRegisterInfo::isVirtualRegister(Reg) && 307 MFI.isVRegStackified(Reg); 308} 309 310/// Insert a BLOCK marker for branches to MBB (if needed). 311static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF, 312 SmallVectorImpl<MachineBasicBlock *> &ScopeTops, 313 const WebAssemblyInstrInfo &TII, 314 const MachineLoopInfo &MLI, 315 MachineDominatorTree &MDT, 316 WebAssemblyFunctionInfo &MFI) { 317 // First compute the nearest common dominator of all forward non-fallthrough 318 // predecessors so that we minimize the time that the BLOCK is on the stack, 319 // which reduces overall stack height. 320 MachineBasicBlock *Header = nullptr; 321 bool IsBranchedTo = false; 322 int MBBNumber = MBB.getNumber(); 323 for (MachineBasicBlock *Pred : MBB.predecessors()) 324 if (Pred->getNumber() < MBBNumber) { 325 Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred; 326 if (ExplicitlyBranchesTo(Pred, &MBB)) 327 IsBranchedTo = true; 328 } 329 if (!Header) 330 return; 331 if (!IsBranchedTo) 332 return; 333 334 assert(&MBB != &MF.front() && "Header blocks shouldn't have predecessors"); 335 MachineBasicBlock *LayoutPred = &*prev(MachineFunction::iterator(&MBB)); 336 337 // If the nearest common dominator is inside a more deeply nested context, 338 // walk out to the nearest scope which isn't more deeply nested. 339 for (MachineFunction::iterator I(LayoutPred), E(Header); I != E; --I) { 340 if (MachineBasicBlock *ScopeTop = ScopeTops[I->getNumber()]) { 341 if (ScopeTop->getNumber() > Header->getNumber()) { 342 // Skip over an intervening scope. 343 I = next(MachineFunction::iterator(ScopeTop)); 344 } else { 345 // We found a scope level at an appropriate depth. 346 Header = ScopeTop; 347 break; 348 } 349 } 350 } 351 352 // If there's a loop which ends just before MBB which contains Header, we can 353 // reuse its label instead of inserting a new BLOCK. 354 for (MachineLoop *Loop = MLI.getLoopFor(LayoutPred); 355 Loop && Loop->contains(LayoutPred); Loop = Loop->getParentLoop()) 356 if (Loop && LoopBottom(Loop) == LayoutPred && Loop->contains(Header)) 357 return; 358 359 // Decide where in Header to put the BLOCK. 360 MachineBasicBlock::iterator InsertPos; 361 MachineLoop *HeaderLoop = MLI.getLoopFor(Header); 362 if (HeaderLoop && MBB.getNumber() > LoopBottom(HeaderLoop)->getNumber()) { 363 // Header is the header of a loop that does not lexically contain MBB, so 364 // the BLOCK needs to be above the LOOP, after any END constructs. 365 InsertPos = Header->begin(); 366 while (InsertPos->getOpcode() != WebAssembly::LOOP) 367 ++InsertPos; 368 } else { 369 // Otherwise, insert the BLOCK as late in Header as we can, but before the 370 // beginning of the local expression tree and any nested BLOCKs. 371 InsertPos = Header->getFirstTerminator(); 372 while (InsertPos != Header->begin() && IsChild(*prev(InsertPos), MFI) && 373 prev(InsertPos)->getOpcode() != WebAssembly::LOOP && 374 prev(InsertPos)->getOpcode() != WebAssembly::END_BLOCK && 375 prev(InsertPos)->getOpcode() != WebAssembly::END_LOOP) 376 --InsertPos; 377 } 378 379 // Add the BLOCK. 380 BuildMI(*Header, InsertPos, DebugLoc(), TII.get(WebAssembly::BLOCK)); 381 382 // Mark the end of the block. 383 InsertPos = MBB.begin(); 384 while (InsertPos != MBB.end() && 385 InsertPos->getOpcode() == WebAssembly::END_LOOP) 386 ++InsertPos; 387 BuildMI(MBB, InsertPos, DebugLoc(), TII.get(WebAssembly::END_BLOCK)); 388 389 // Track the farthest-spanning scope that ends at this point. 390 int Number = MBB.getNumber(); 391 if (!ScopeTops[Number] || 392 ScopeTops[Number]->getNumber() > Header->getNumber()) 393 ScopeTops[Number] = Header; 394} 395 396/// Insert a LOOP marker for a loop starting at MBB (if it's a loop header). 397static void PlaceLoopMarker( 398 MachineBasicBlock &MBB, MachineFunction &MF, 399 SmallVectorImpl<MachineBasicBlock *> &ScopeTops, 400 DenseMap<const MachineInstr *, const MachineBasicBlock *> &LoopTops, 401 const WebAssemblyInstrInfo &TII, const MachineLoopInfo &MLI) { 402 MachineLoop *Loop = MLI.getLoopFor(&MBB); 403 if (!Loop || Loop->getHeader() != &MBB) 404 return; 405 406 // The operand of a LOOP is the first block after the loop. If the loop is the 407 // bottom of the function, insert a dummy block at the end. 408 MachineBasicBlock *Bottom = LoopBottom(Loop); 409 auto Iter = next(MachineFunction::iterator(Bottom)); 410 if (Iter == MF.end()) { 411 MachineBasicBlock *Label = MF.CreateMachineBasicBlock(); 412 // Give it a fake predecessor so that AsmPrinter prints its label. 413 Label->addSuccessor(Label); 414 MF.push_back(Label); 415 Iter = next(MachineFunction::iterator(Bottom)); 416 } 417 MachineBasicBlock *AfterLoop = &*Iter; 418 419 // Mark the beginning of the loop (after the end of any existing loop that 420 // ends here). 421 auto InsertPos = MBB.begin(); 422 while (InsertPos != MBB.end() && 423 InsertPos->getOpcode() == WebAssembly::END_LOOP) 424 ++InsertPos; 425 BuildMI(MBB, InsertPos, DebugLoc(), TII.get(WebAssembly::LOOP)); 426 427 // Mark the end of the loop. 428 MachineInstr *End = BuildMI(*AfterLoop, AfterLoop->begin(), DebugLoc(), 429 TII.get(WebAssembly::END_LOOP)); 430 LoopTops[End] = &MBB; 431 432 assert((!ScopeTops[AfterLoop->getNumber()] || 433 ScopeTops[AfterLoop->getNumber()]->getNumber() < MBB.getNumber()) && 434 "With block sorting the outermost loop for a block should be first."); 435 if (!ScopeTops[AfterLoop->getNumber()]) 436 ScopeTops[AfterLoop->getNumber()] = &MBB; 437} 438 439static unsigned 440GetDepth(const SmallVectorImpl<const MachineBasicBlock *> &Stack, 441 const MachineBasicBlock *MBB) { 442 unsigned Depth = 0; 443 for (auto X : reverse(Stack)) { 444 if (X == MBB) 445 break; 446 ++Depth; 447 } 448 assert(Depth < Stack.size() && "Branch destination should be in scope"); 449 return Depth; 450} 451 452/// Insert LOOP and BLOCK markers at appropriate places. 453static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI, 454 const WebAssemblyInstrInfo &TII, 455 MachineDominatorTree &MDT, 456 WebAssemblyFunctionInfo &MFI) { 457 // For each block whose label represents the end of a scope, record the block 458 // which holds the beginning of the scope. This will allow us to quickly skip 459 // over scoped regions when walking blocks. We allocate one more than the 460 // number of blocks in the function to accommodate for the possible fake block 461 // we may insert at the end. 462 SmallVector<MachineBasicBlock *, 8> ScopeTops(MF.getNumBlockIDs() + 1); 463 464 // For eacn LOOP_END, the corresponding LOOP. 465 DenseMap<const MachineInstr *, const MachineBasicBlock *> LoopTops; 466 467 for (auto &MBB : MF) { 468 // Place the LOOP for MBB if MBB is the header of a loop. 469 PlaceLoopMarker(MBB, MF, ScopeTops, LoopTops, TII, MLI); 470 471 // Place the BLOCK for MBB if MBB is branched to from above. 472 PlaceBlockMarker(MBB, MF, ScopeTops, TII, MLI, MDT, MFI); 473 } 474 475 // Now rewrite references to basic blocks to be depth immediates. 476 SmallVector<const MachineBasicBlock *, 8> Stack; 477 for (auto &MBB : reverse(MF)) { 478 for (auto &MI : reverse(MBB)) { 479 switch (MI.getOpcode()) { 480 case WebAssembly::BLOCK: 481 assert(ScopeTops[Stack.back()->getNumber()] == &MBB && 482 "Block should be balanced"); 483 Stack.pop_back(); 484 break; 485 case WebAssembly::LOOP: 486 assert(Stack.back() == &MBB && "Loop top should be balanced"); 487 Stack.pop_back(); 488 Stack.pop_back(); 489 break; 490 case WebAssembly::END_BLOCK: 491 Stack.push_back(&MBB); 492 break; 493 case WebAssembly::END_LOOP: 494 Stack.push_back(&MBB); 495 Stack.push_back(LoopTops[&MI]); 496 break; 497 default: 498 if (MI.isTerminator()) { 499 // Rewrite MBB operands to be depth immediates. 500 SmallVector<MachineOperand, 4> Ops(MI.operands()); 501 while (MI.getNumOperands() > 0) 502 MI.RemoveOperand(MI.getNumOperands() - 1); 503 for (auto MO : Ops) { 504 if (MO.isMBB()) 505 MO = MachineOperand::CreateImm(GetDepth(Stack, MO.getMBB())); 506 MI.addOperand(MF, MO); 507 } 508 } 509 break; 510 } 511 } 512 } 513 assert(Stack.empty() && "Control flow should be balanced"); 514} 515 516bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) { 517 DEBUG(dbgs() << "********** CFG Stackifying **********\n" 518 "********** Function: " 519 << MF.getName() << '\n'); 520 521 const auto &MLI = getAnalysis<MachineLoopInfo>(); 522 auto &MDT = getAnalysis<MachineDominatorTree>(); 523 // Liveness is not tracked for EXPR_STACK physreg. 524 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 525 WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); 526 MF.getRegInfo().invalidateLiveness(); 527 528 // Sort the blocks, with contiguous loops. 529 SortBlocks(MF, MLI, MDT); 530 531 // Place the BLOCK and LOOP markers to indicate the beginnings of scopes. 532 PlaceMarkers(MF, MLI, TII, MDT, MFI); 533 534 return true; 535} 536