1//===-- ShrinkWrapping.cpp - Reduce spills/restores of callee-saved regs --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements a shrink wrapping variant of prolog/epilog insertion: 11// - Spills and restores of callee-saved registers (CSRs) are placed in the 12// machine CFG to tightly surround their uses so that execution paths that 13// do not use CSRs do not pay the spill/restore penalty. 14// 15// - Avoiding placment of spills/restores in loops: if a CSR is used inside a 16// loop the spills are placed in the loop preheader, and restores are 17// placed in the loop exit nodes (the successors of loop _exiting_ nodes). 18// 19// - Covering paths without CSR uses: 20// If a region in a CFG uses CSRs and has multiple entry and/or exit points, 21// the use info for the CSRs inside the region is propagated outward in the 22// CFG to ensure validity of the spill/restore placements. This decreases 23// the effectiveness of shrink wrapping but does not require edge splitting 24// in the machine CFG. 25// 26// This shrink wrapping implementation uses an iterative analysis to determine 27// which basic blocks require spills and restores for CSRs. 28// 29// This pass uses MachineDominators and MachineLoopInfo. Loop information 30// is used to prevent placement of callee-saved register spills/restores 31// in the bodies of loops. 32// 33//===----------------------------------------------------------------------===// 34 35#define DEBUG_TYPE "shrink-wrap" 36 37#include "PrologEpilogInserter.h" 38#include "llvm/CodeGen/MachineDominators.h" 39#include "llvm/CodeGen/MachineLoopInfo.h" 40#include "llvm/CodeGen/MachineInstr.h" 41#include "llvm/CodeGen/MachineFrameInfo.h" 42#include "llvm/CodeGen/MachineRegisterInfo.h" 43#include "llvm/Target/TargetMachine.h" 44#include "llvm/Target/TargetRegisterInfo.h" 45#include "llvm/ADT/SparseBitVector.h" 46#include "llvm/ADT/DenseMap.h" 47#include "llvm/ADT/PostOrderIterator.h" 48#include "llvm/ADT/Statistic.h" 49#include "llvm/Support/CommandLine.h" 50#include "llvm/Support/Compiler.h" 51#include "llvm/Support/Debug.h" 52#include "llvm/ADT/STLExtras.h" 53#include "llvm/ADT/Statistic.h" 54#include <sstream> 55 56using namespace llvm; 57 58STATISTIC(numSRReduced, "Number of CSR spills+restores reduced."); 59 60// Shrink Wrapping: 61static cl::opt<bool> 62ShrinkWrapping("shrink-wrap", 63 cl::desc("Shrink wrap callee-saved register spills/restores")); 64 65// Shrink wrap only the specified function, a debugging aid. 66static cl::opt<std::string> 67ShrinkWrapFunc("shrink-wrap-func", cl::Hidden, 68 cl::desc("Shrink wrap the specified function"), 69 cl::value_desc("funcname"), 70 cl::init("")); 71 72// Debugging level for shrink wrapping. 73enum ShrinkWrapDebugLevel { 74 None, BasicInfo, Iterations, Details 75}; 76 77static cl::opt<enum ShrinkWrapDebugLevel> 78ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden, 79 cl::desc("Print shrink wrapping debugging information"), 80 cl::values( 81 clEnumVal(None , "disable debug output"), 82 clEnumVal(BasicInfo , "print basic DF sets"), 83 clEnumVal(Iterations, "print SR sets for each iteration"), 84 clEnumVal(Details , "print all DF sets"), 85 clEnumValEnd)); 86 87 88void PEI::getAnalysisUsage(AnalysisUsage &AU) const { 89 AU.setPreservesCFG(); 90 if (ShrinkWrapping || ShrinkWrapFunc != "") { 91 AU.addRequired<MachineLoopInfo>(); 92 AU.addRequired<MachineDominatorTree>(); 93 } 94 AU.addPreserved<MachineLoopInfo>(); 95 AU.addPreserved<MachineDominatorTree>(); 96 AU.addRequired<TargetPassConfig>(); 97 MachineFunctionPass::getAnalysisUsage(AU); 98} 99 100//===----------------------------------------------------------------------===// 101// ShrinkWrapping implementation 102//===----------------------------------------------------------------------===// 103 104// Convienences for dealing with machine loops. 105MachineBasicBlock* PEI::getTopLevelLoopPreheader(MachineLoop* LP) { 106 assert(LP && "Machine loop is NULL."); 107 MachineBasicBlock* PHDR = LP->getLoopPreheader(); 108 MachineLoop* PLP = LP->getParentLoop(); 109 while (PLP) { 110 PHDR = PLP->getLoopPreheader(); 111 PLP = PLP->getParentLoop(); 112 } 113 return PHDR; 114} 115 116MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) { 117 if (LP == 0) 118 return 0; 119 MachineLoop* PLP = LP->getParentLoop(); 120 while (PLP) { 121 LP = PLP; 122 PLP = PLP->getParentLoop(); 123 } 124 return LP; 125} 126 127bool PEI::isReturnBlock(MachineBasicBlock* MBB) { 128 return (MBB && !MBB->empty() && MBB->back().isReturn()); 129} 130 131// Initialize shrink wrapping DFA sets, called before iterations. 132void PEI::clearAnticAvailSets() { 133 AnticIn.clear(); 134 AnticOut.clear(); 135 AvailIn.clear(); 136 AvailOut.clear(); 137} 138 139// Clear all sets constructed by shrink wrapping. 140void PEI::clearAllSets() { 141 ReturnBlocks.clear(); 142 clearAnticAvailSets(); 143 UsedCSRegs.clear(); 144 CSRUsed.clear(); 145 TLLoops.clear(); 146 CSRSave.clear(); 147 CSRRestore.clear(); 148} 149 150// Initialize all shrink wrapping data. 151void PEI::initShrinkWrappingInfo() { 152 clearAllSets(); 153 EntryBlock = 0; 154#ifndef NDEBUG 155 HasFastExitPath = false; 156#endif 157 ShrinkWrapThisFunction = ShrinkWrapping; 158 // DEBUG: enable or disable shrink wrapping for the current function 159 // via --shrink-wrap-func=<funcname>. 160#ifndef NDEBUG 161 if (ShrinkWrapFunc != "") { 162 std::string MFName = MF->getName().str(); 163 ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc); 164 } 165#endif 166} 167 168 169/// placeCSRSpillsAndRestores - determine which MBBs of the function 170/// need save, restore code for callee-saved registers by doing a DF analysis 171/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs 172/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo 173/// is used to ensure that CSR save/restore code is not placed inside loops. 174/// This function computes the maps of MBBs -> CSRs to spill and restore 175/// in CSRSave, CSRRestore. 176/// 177/// If shrink wrapping is not being performed, place all spills in 178/// the entry block, all restores in return blocks. In this case, 179/// CSRSave has a single mapping, CSRRestore has mappings for each 180/// return block. 181/// 182void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) { 183 184 DEBUG(MF = &Fn); 185 186 initShrinkWrappingInfo(); 187 188 DEBUG(if (ShrinkWrapThisFunction) { 189 dbgs() << "Place CSR spills/restores for " 190 << MF->getName() << "\n"; 191 }); 192 193 if (calculateSets(Fn)) 194 placeSpillsAndRestores(Fn); 195} 196 197/// calcAnticInOut - calculate the anticipated in/out reg sets 198/// for the given MBB by looking forward in the MCFG at MBB's 199/// successors. 200/// 201bool PEI::calcAnticInOut(MachineBasicBlock* MBB) { 202 bool changed = false; 203 204 // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB)) 205 SmallVector<MachineBasicBlock*, 4> successors; 206 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 207 SE = MBB->succ_end(); SI != SE; ++SI) { 208 MachineBasicBlock* SUCC = *SI; 209 if (SUCC != MBB) 210 successors.push_back(SUCC); 211 } 212 213 unsigned i = 0, e = successors.size(); 214 if (i != e) { 215 CSRegSet prevAnticOut = AnticOut[MBB]; 216 MachineBasicBlock* SUCC = successors[i]; 217 218 AnticOut[MBB] = AnticIn[SUCC]; 219 for (++i; i != e; ++i) { 220 SUCC = successors[i]; 221 AnticOut[MBB] &= AnticIn[SUCC]; 222 } 223 if (prevAnticOut != AnticOut[MBB]) 224 changed = true; 225 } 226 227 // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]); 228 CSRegSet prevAnticIn = AnticIn[MBB]; 229 AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB]; 230 if (prevAnticIn != AnticIn[MBB]) 231 changed = true; 232 return changed; 233} 234 235/// calcAvailInOut - calculate the available in/out reg sets 236/// for the given MBB by looking backward in the MCFG at MBB's 237/// predecessors. 238/// 239bool PEI::calcAvailInOut(MachineBasicBlock* MBB) { 240 bool changed = false; 241 242 // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB)) 243 SmallVector<MachineBasicBlock*, 4> predecessors; 244 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), 245 PE = MBB->pred_end(); PI != PE; ++PI) { 246 MachineBasicBlock* PRED = *PI; 247 if (PRED != MBB) 248 predecessors.push_back(PRED); 249 } 250 251 unsigned i = 0, e = predecessors.size(); 252 if (i != e) { 253 CSRegSet prevAvailIn = AvailIn[MBB]; 254 MachineBasicBlock* PRED = predecessors[i]; 255 256 AvailIn[MBB] = AvailOut[PRED]; 257 for (++i; i != e; ++i) { 258 PRED = predecessors[i]; 259 AvailIn[MBB] &= AvailOut[PRED]; 260 } 261 if (prevAvailIn != AvailIn[MBB]) 262 changed = true; 263 } 264 265 // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]); 266 CSRegSet prevAvailOut = AvailOut[MBB]; 267 AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB]; 268 if (prevAvailOut != AvailOut[MBB]) 269 changed = true; 270 return changed; 271} 272 273/// calculateAnticAvail - build the sets anticipated and available 274/// registers in the MCFG of the current function iteratively, 275/// doing a combined forward and backward analysis. 276/// 277void PEI::calculateAnticAvail(MachineFunction &Fn) { 278 // Initialize data flow sets. 279 clearAnticAvailSets(); 280 281 // Calculate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG. 282 bool changed = true; 283 unsigned iterations = 0; 284 while (changed) { 285 changed = false; 286 ++iterations; 287 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); 288 MBBI != MBBE; ++MBBI) { 289 MachineBasicBlock* MBB = MBBI; 290 291 // Calculate anticipated in, out regs at MBB from 292 // anticipated at successors of MBB. 293 changed |= calcAnticInOut(MBB); 294 295 // Calculate available in, out regs at MBB from 296 // available at predecessors of MBB. 297 changed |= calcAvailInOut(MBB); 298 } 299 } 300 301 DEBUG({ 302 if (ShrinkWrapDebugging >= Details) { 303 dbgs() 304 << "-----------------------------------------------------------\n" 305 << " Antic/Avail Sets:\n" 306 << "-----------------------------------------------------------\n" 307 << "iterations = " << iterations << "\n" 308 << "-----------------------------------------------------------\n" 309 << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n" 310 << "-----------------------------------------------------------\n"; 311 312 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); 313 MBBI != MBBE; ++MBBI) { 314 MachineBasicBlock* MBB = MBBI; 315 dumpSets(MBB); 316 } 317 318 dbgs() 319 << "-----------------------------------------------------------\n"; 320 } 321 }); 322} 323 324/// propagateUsesAroundLoop - copy used register info from MBB to all blocks 325/// of the loop given by LP and its parent loops. This prevents spills/restores 326/// from being placed in the bodies of loops. 327/// 328void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) { 329 if (! MBB || !LP) 330 return; 331 332 std::vector<MachineBasicBlock*> loopBlocks = LP->getBlocks(); 333 for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) { 334 MachineBasicBlock* LBB = loopBlocks[i]; 335 if (LBB == MBB) 336 continue; 337 if (CSRUsed[LBB].contains(CSRUsed[MBB])) 338 continue; 339 CSRUsed[LBB] |= CSRUsed[MBB]; 340 } 341} 342 343/// calculateSets - collect the CSRs used in this function, compute 344/// the DF sets that describe the initial minimal regions in the 345/// Machine CFG around which CSR spills and restores must be placed. 346/// 347/// Additionally, this function decides if shrink wrapping should 348/// be disabled for the current function, checking the following: 349/// 1. the current function has more than 500 MBBs: heuristic limit 350/// on function size to reduce compile time impact of the current 351/// iterative algorithm. 352/// 2. all CSRs are used in the entry block. 353/// 3. all CSRs are used in all immediate successors of the entry block. 354/// 4. all CSRs are used in a subset of blocks, each of which dominates 355/// all return blocks. These blocks, taken as a subgraph of the MCFG, 356/// are equivalent to the entry block since all execution paths pass 357/// through them. 358/// 359bool PEI::calculateSets(MachineFunction &Fn) { 360 // Sets used to compute spill, restore placement sets. 361 const std::vector<CalleeSavedInfo> CSI = 362 Fn.getFrameInfo()->getCalleeSavedInfo(); 363 364 // If no CSRs used, we are done. 365 if (CSI.empty()) { 366 DEBUG(if (ShrinkWrapThisFunction) 367 dbgs() << "DISABLED: " << Fn.getName() 368 << ": uses no callee-saved registers\n"); 369 return false; 370 } 371 372 // Save refs to entry and return blocks. 373 EntryBlock = Fn.begin(); 374 for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end(); 375 MBB != E; ++MBB) 376 if (isReturnBlock(MBB)) 377 ReturnBlocks.push_back(MBB); 378 379 // Determine if this function has fast exit paths. 380 DEBUG(if (ShrinkWrapThisFunction) 381 findFastExitPath()); 382 383 // Limit shrink wrapping via the current iterative bit vector 384 // implementation to functions with <= 500 MBBs. 385 if (Fn.size() > 500) { 386 DEBUG(if (ShrinkWrapThisFunction) 387 dbgs() << "DISABLED: " << Fn.getName() 388 << ": too large (" << Fn.size() << " MBBs)\n"); 389 ShrinkWrapThisFunction = false; 390 } 391 392 // Return now if not shrink wrapping. 393 if (! ShrinkWrapThisFunction) 394 return false; 395 396 // Collect set of used CSRs. 397 for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { 398 UsedCSRegs.set(inx); 399 } 400 401 // Walk instructions in all MBBs, create CSRUsed[] sets, choose 402 // whether or not to shrink wrap this function. 403 MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>(); 404 MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>(); 405 const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); 406 407 bool allCSRUsesInEntryBlock = true; 408 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); 409 MBBI != MBBE; ++MBBI) { 410 MachineBasicBlock* MBB = MBBI; 411 for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) { 412 for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { 413 unsigned Reg = CSI[inx].getReg(); 414 // If instruction I reads or modifies Reg, add it to UsedCSRegs, 415 // CSRUsed map for the current block. 416 for (unsigned opInx = 0, opEnd = I->getNumOperands(); 417 opInx != opEnd; ++opInx) { 418 const MachineOperand &MO = I->getOperand(opInx); 419 if (! (MO.isReg() && (MO.isUse() || MO.isDef()))) 420 continue; 421 unsigned MOReg = MO.getReg(); 422 if (!MOReg) 423 continue; 424 if (MOReg == Reg || 425 (TargetRegisterInfo::isPhysicalRegister(MOReg) && 426 TargetRegisterInfo::isPhysicalRegister(Reg) && 427 TRI->isSubRegister(Reg, MOReg))) { 428 // CSR Reg is defined/used in block MBB. 429 CSRUsed[MBB].set(inx); 430 // Check for uses in EntryBlock. 431 if (MBB != EntryBlock) 432 allCSRUsesInEntryBlock = false; 433 } 434 } 435 } 436 } 437 438 if (CSRUsed[MBB].empty()) 439 continue; 440 441 // Propagate CSRUsed[MBB] in loops 442 if (MachineLoop* LP = LI.getLoopFor(MBB)) { 443 // Add top level loop to work list. 444 MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP); 445 MachineLoop* PLP = getTopLevelLoopParent(LP); 446 447 if (! HDR) { 448 HDR = PLP->getHeader(); 449 assert(HDR->pred_size() > 0 && "Loop header has no predecessors?"); 450 MachineBasicBlock::pred_iterator PI = HDR->pred_begin(); 451 HDR = *PI; 452 } 453 TLLoops[HDR] = PLP; 454 455 // Push uses from inside loop to its parent loops, 456 // or to all other MBBs in its loop. 457 if (LP->getLoopDepth() > 1) { 458 for (MachineLoop* PLP = LP->getParentLoop(); PLP; 459 PLP = PLP->getParentLoop()) { 460 propagateUsesAroundLoop(MBB, PLP); 461 } 462 } else { 463 propagateUsesAroundLoop(MBB, LP); 464 } 465 } 466 } 467 468 if (allCSRUsesInEntryBlock) { 469 DEBUG(dbgs() << "DISABLED: " << Fn.getName() 470 << ": all CSRs used in EntryBlock\n"); 471 ShrinkWrapThisFunction = false; 472 } else { 473 bool allCSRsUsedInEntryFanout = true; 474 for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), 475 SE = EntryBlock->succ_end(); SI != SE; ++SI) { 476 MachineBasicBlock* SUCC = *SI; 477 if (CSRUsed[SUCC] != UsedCSRegs) 478 allCSRsUsedInEntryFanout = false; 479 } 480 if (allCSRsUsedInEntryFanout) { 481 DEBUG(dbgs() << "DISABLED: " << Fn.getName() 482 << ": all CSRs used in imm successors of EntryBlock\n"); 483 ShrinkWrapThisFunction = false; 484 } 485 } 486 487 if (ShrinkWrapThisFunction) { 488 // Check if MBB uses CSRs and dominates all exit nodes. 489 // Such nodes are equiv. to the entry node w.r.t. 490 // CSR uses: every path through the function must 491 // pass through this node. If each CSR is used at least 492 // once by these nodes, shrink wrapping is disabled. 493 CSRegSet CSRUsedInChokePoints; 494 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); 495 MBBI != MBBE; ++MBBI) { 496 MachineBasicBlock* MBB = MBBI; 497 if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1) 498 continue; 499 bool dominatesExitNodes = true; 500 for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) 501 if (! DT.dominates(MBB, ReturnBlocks[ri])) { 502 dominatesExitNodes = false; 503 break; 504 } 505 if (dominatesExitNodes) { 506 CSRUsedInChokePoints |= CSRUsed[MBB]; 507 if (CSRUsedInChokePoints == UsedCSRegs) { 508 DEBUG(dbgs() << "DISABLED: " << Fn.getName() 509 << ": all CSRs used in choke point(s) at " 510 << getBasicBlockName(MBB) << "\n"); 511 ShrinkWrapThisFunction = false; 512 break; 513 } 514 } 515 } 516 } 517 518 // Return now if we have decided not to apply shrink wrapping 519 // to the current function. 520 if (! ShrinkWrapThisFunction) 521 return false; 522 523 DEBUG({ 524 dbgs() << "ENABLED: " << Fn.getName(); 525 if (HasFastExitPath) 526 dbgs() << " (fast exit path)"; 527 dbgs() << "\n"; 528 if (ShrinkWrapDebugging >= BasicInfo) { 529 dbgs() << "------------------------------" 530 << "-----------------------------\n"; 531 dbgs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n"; 532 if (ShrinkWrapDebugging >= Details) { 533 dbgs() << "------------------------------" 534 << "-----------------------------\n"; 535 dumpAllUsed(); 536 } 537 } 538 }); 539 540 // Build initial DF sets to determine minimal regions in the 541 // Machine CFG around which CSRs must be spilled and restored. 542 calculateAnticAvail(Fn); 543 544 return true; 545} 546 547/// addUsesForMEMERegion - add uses of CSRs spilled or restored in 548/// multi-entry, multi-exit (MEME) regions so spill and restore 549/// placement will not break code that enters or leaves a 550/// shrink-wrapped region by inducing spills with no matching 551/// restores or restores with no matching spills. A MEME region 552/// is a subgraph of the MCFG with multiple entry edges, multiple 553/// exit edges, or both. This code propagates use information 554/// through the MCFG until all paths requiring spills and restores 555/// _outside_ the computed minimal placement regions have been covered. 556/// 557bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB, 558 SmallVector<MachineBasicBlock*, 4>& blks) { 559 if (MBB->succ_size() < 2 && MBB->pred_size() < 2) { 560 bool processThisBlock = false; 561 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 562 SE = MBB->succ_end(); SI != SE; ++SI) { 563 MachineBasicBlock* SUCC = *SI; 564 if (SUCC->pred_size() > 1) { 565 processThisBlock = true; 566 break; 567 } 568 } 569 if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) { 570 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), 571 PE = MBB->pred_end(); PI != PE; ++PI) { 572 MachineBasicBlock* PRED = *PI; 573 if (PRED->succ_size() > 1) { 574 processThisBlock = true; 575 break; 576 } 577 } 578 } 579 if (! processThisBlock) 580 return false; 581 } 582 583 CSRegSet prop; 584 if (!CSRSave[MBB].empty()) 585 prop = CSRSave[MBB]; 586 else if (!CSRRestore[MBB].empty()) 587 prop = CSRRestore[MBB]; 588 else 589 prop = CSRUsed[MBB]; 590 if (prop.empty()) 591 return false; 592 593 // Propagate selected bits to successors, predecessors of MBB. 594 bool addedUses = false; 595 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 596 SE = MBB->succ_end(); SI != SE; ++SI) { 597 MachineBasicBlock* SUCC = *SI; 598 // Self-loop 599 if (SUCC == MBB) 600 continue; 601 if (! CSRUsed[SUCC].contains(prop)) { 602 CSRUsed[SUCC] |= prop; 603 addedUses = true; 604 blks.push_back(SUCC); 605 DEBUG(if (ShrinkWrapDebugging >= Iterations) 606 dbgs() << getBasicBlockName(MBB) 607 << "(" << stringifyCSRegSet(prop) << ")->" 608 << "successor " << getBasicBlockName(SUCC) << "\n"); 609 } 610 } 611 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), 612 PE = MBB->pred_end(); PI != PE; ++PI) { 613 MachineBasicBlock* PRED = *PI; 614 // Self-loop 615 if (PRED == MBB) 616 continue; 617 if (! CSRUsed[PRED].contains(prop)) { 618 CSRUsed[PRED] |= prop; 619 addedUses = true; 620 blks.push_back(PRED); 621 DEBUG(if (ShrinkWrapDebugging >= Iterations) 622 dbgs() << getBasicBlockName(MBB) 623 << "(" << stringifyCSRegSet(prop) << ")->" 624 << "predecessor " << getBasicBlockName(PRED) << "\n"); 625 } 626 } 627 return addedUses; 628} 629 630/// addUsesForTopLevelLoops - add uses for CSRs used inside top 631/// level loops to the exit blocks of those loops. 632/// 633bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) { 634 bool addedUses = false; 635 636 // Place restores for top level loops where needed. 637 for (DenseMap<MachineBasicBlock*, MachineLoop*>::iterator 638 I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) { 639 MachineBasicBlock* MBB = I->first; 640 MachineLoop* LP = I->second; 641 MachineBasicBlock* HDR = LP->getHeader(); 642 SmallVector<MachineBasicBlock*, 4> exitBlocks; 643 CSRegSet loopSpills; 644 645 loopSpills = CSRSave[MBB]; 646 if (CSRSave[MBB].empty()) { 647 loopSpills = CSRUsed[HDR]; 648 assert(!loopSpills.empty() && "No CSRs used in loop?"); 649 } else if (CSRRestore[MBB].contains(CSRSave[MBB])) 650 continue; 651 652 LP->getExitBlocks(exitBlocks); 653 assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?"); 654 for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) { 655 MachineBasicBlock* EXB = exitBlocks[i]; 656 if (! CSRUsed[EXB].contains(loopSpills)) { 657 CSRUsed[EXB] |= loopSpills; 658 addedUses = true; 659 DEBUG(if (ShrinkWrapDebugging >= Iterations) 660 dbgs() << "LOOP " << getBasicBlockName(MBB) 661 << "(" << stringifyCSRegSet(loopSpills) << ")->" 662 << getBasicBlockName(EXB) << "\n"); 663 if (EXB->succ_size() > 1 || EXB->pred_size() > 1) 664 blks.push_back(EXB); 665 } 666 } 667 } 668 return addedUses; 669} 670 671/// calcSpillPlacements - determine which CSRs should be spilled 672/// in MBB using AnticIn sets of MBB's predecessors, keeping track 673/// of changes to spilled reg sets. Add MBB to the set of blocks 674/// that need to be processed for propagating use info to cover 675/// multi-entry/exit regions. 676/// 677bool PEI::calcSpillPlacements(MachineBasicBlock* MBB, 678 SmallVector<MachineBasicBlock*, 4> &blks, 679 CSRegBlockMap &prevSpills) { 680 bool placedSpills = false; 681 // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB) 682 CSRegSet anticInPreds; 683 SmallVector<MachineBasicBlock*, 4> predecessors; 684 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), 685 PE = MBB->pred_end(); PI != PE; ++PI) { 686 MachineBasicBlock* PRED = *PI; 687 if (PRED != MBB) 688 predecessors.push_back(PRED); 689 } 690 unsigned i = 0, e = predecessors.size(); 691 if (i != e) { 692 MachineBasicBlock* PRED = predecessors[i]; 693 anticInPreds = UsedCSRegs - AnticIn[PRED]; 694 for (++i; i != e; ++i) { 695 PRED = predecessors[i]; 696 anticInPreds &= (UsedCSRegs - AnticIn[PRED]); 697 } 698 } else { 699 // Handle uses in entry blocks (which have no predecessors). 700 // This is necessary because the DFA formulation assumes the 701 // entry and (multiple) exit nodes cannot have CSR uses, which 702 // is not the case in the real world. 703 anticInPreds = UsedCSRegs; 704 } 705 // Compute spills required at MBB: 706 CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds; 707 708 if (! CSRSave[MBB].empty()) { 709 if (MBB == EntryBlock) { 710 for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) 711 CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB]; 712 } else { 713 // Reset all regs spilled in MBB that are also spilled in EntryBlock. 714 if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) { 715 CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock]; 716 } 717 } 718 } 719 placedSpills = (CSRSave[MBB] != prevSpills[MBB]); 720 prevSpills[MBB] = CSRSave[MBB]; 721 // Remember this block for adding restores to successor 722 // blocks for multi-entry region. 723 if (placedSpills) 724 blks.push_back(MBB); 725 726 DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations) 727 dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " 728 << stringifyCSRegSet(CSRSave[MBB]) << "\n"); 729 730 return placedSpills; 731} 732 733/// calcRestorePlacements - determine which CSRs should be restored 734/// in MBB using AvailOut sets of MBB's succcessors, keeping track 735/// of changes to restored reg sets. Add MBB to the set of blocks 736/// that need to be processed for propagating use info to cover 737/// multi-entry/exit regions. 738/// 739bool PEI::calcRestorePlacements(MachineBasicBlock* MBB, 740 SmallVector<MachineBasicBlock*, 4> &blks, 741 CSRegBlockMap &prevRestores) { 742 bool placedRestores = false; 743 // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB) 744 CSRegSet availOutSucc; 745 SmallVector<MachineBasicBlock*, 4> successors; 746 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 747 SE = MBB->succ_end(); SI != SE; ++SI) { 748 MachineBasicBlock* SUCC = *SI; 749 if (SUCC != MBB) 750 successors.push_back(SUCC); 751 } 752 unsigned i = 0, e = successors.size(); 753 if (i != e) { 754 MachineBasicBlock* SUCC = successors[i]; 755 availOutSucc = UsedCSRegs - AvailOut[SUCC]; 756 for (++i; i != e; ++i) { 757 SUCC = successors[i]; 758 availOutSucc &= (UsedCSRegs - AvailOut[SUCC]); 759 } 760 } else { 761 if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) { 762 // Handle uses in return blocks (which have no successors). 763 // This is necessary because the DFA formulation assumes the 764 // entry and (multiple) exit nodes cannot have CSR uses, which 765 // is not the case in the real world. 766 availOutSucc = UsedCSRegs; 767 } 768 } 769 // Compute restores required at MBB: 770 CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc; 771 772 // Postprocess restore placements at MBB. 773 // Remove the CSRs that are restored in the return blocks. 774 // Lest this be confusing, note that: 775 // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks. 776 if (MBB->succ_size() && ! CSRRestore[MBB].empty()) { 777 if (! CSRSave[EntryBlock].empty()) 778 CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock]; 779 } 780 placedRestores = (CSRRestore[MBB] != prevRestores[MBB]); 781 prevRestores[MBB] = CSRRestore[MBB]; 782 // Remember this block for adding saves to predecessor 783 // blocks for multi-entry region. 784 if (placedRestores) 785 blks.push_back(MBB); 786 787 DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations) 788 dbgs() << "RESTORE[" << getBasicBlockName(MBB) << "] = " 789 << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); 790 791 return placedRestores; 792} 793 794/// placeSpillsAndRestores - place spills and restores of CSRs 795/// used in MBBs in minimal regions that contain the uses. 796/// 797void PEI::placeSpillsAndRestores(MachineFunction &Fn) { 798 CSRegBlockMap prevCSRSave; 799 CSRegBlockMap prevCSRRestore; 800 SmallVector<MachineBasicBlock*, 4> cvBlocks, ncvBlocks; 801 bool changed = true; 802 unsigned iterations = 0; 803 804 // Iterate computation of spill and restore placements in the MCFG until: 805 // 1. CSR use info has been fully propagated around the MCFG, and 806 // 2. computation of CSRSave[], CSRRestore[] reach fixed points. 807 while (changed) { 808 changed = false; 809 ++iterations; 810 811 DEBUG(if (ShrinkWrapDebugging >= Iterations) 812 dbgs() << "iter " << iterations 813 << " --------------------------------------------------\n"); 814 815 // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG, 816 // which determines the placements of spills and restores. 817 // Keep track of changes to spills, restores in each iteration to 818 // minimize the total iterations. 819 bool SRChanged = false; 820 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); 821 MBBI != MBBE; ++MBBI) { 822 MachineBasicBlock* MBB = MBBI; 823 824 // Place spills for CSRs in MBB. 825 SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave); 826 827 // Place restores for CSRs in MBB. 828 SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore); 829 } 830 831 // Add uses of CSRs used inside loops where needed. 832 changed |= addUsesForTopLevelLoops(cvBlocks); 833 834 // Add uses for CSRs spilled or restored at branch, join points. 835 if (changed || SRChanged) { 836 while (! cvBlocks.empty()) { 837 MachineBasicBlock* MBB = cvBlocks.pop_back_val(); 838 changed |= addUsesForMEMERegion(MBB, ncvBlocks); 839 } 840 if (! ncvBlocks.empty()) { 841 cvBlocks = ncvBlocks; 842 ncvBlocks.clear(); 843 } 844 } 845 846 if (changed) { 847 calculateAnticAvail(Fn); 848 CSRSave.clear(); 849 CSRRestore.clear(); 850 } 851 } 852 853 // Check for effectiveness: 854 // SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks} 855 // numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock] 856 // Gives a measure of how many CSR spills have been moved from EntryBlock 857 // to minimal regions enclosing their uses. 858 CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]); 859 unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count(); 860 numSRReduced += numSRReducedThisFunc; 861 DEBUG(if (ShrinkWrapDebugging >= BasicInfo) { 862 dbgs() << "-----------------------------------------------------------\n"; 863 dbgs() << "total iterations = " << iterations << " ( " 864 << Fn.getName() 865 << " " << numSRReducedThisFunc 866 << " " << Fn.size() 867 << " )\n"; 868 dbgs() << "-----------------------------------------------------------\n"; 869 dumpSRSets(); 870 dbgs() << "-----------------------------------------------------------\n"; 871 if (numSRReducedThisFunc) 872 verifySpillRestorePlacement(); 873 }); 874} 875 876// Debugging methods. 877#ifndef NDEBUG 878/// findFastExitPath - debugging method used to detect functions 879/// with at least one path from the entry block to a return block 880/// directly or which has a very small number of edges. 881/// 882void PEI::findFastExitPath() { 883 if (! EntryBlock) 884 return; 885 // Fina a path from EntryBlock to any return block that does not branch: 886 // Entry 887 // | ... 888 // v | 889 // B1<-----+ 890 // | 891 // v 892 // Return 893 for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), 894 SE = EntryBlock->succ_end(); SI != SE; ++SI) { 895 MachineBasicBlock* SUCC = *SI; 896 897 // Assume positive, disprove existence of fast path. 898 HasFastExitPath = true; 899 900 // Check the immediate successors. 901 if (isReturnBlock(SUCC)) { 902 if (ShrinkWrapDebugging >= BasicInfo) 903 dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock) 904 << "->" << getBasicBlockName(SUCC) << "\n"; 905 break; 906 } 907 // Traverse df from SUCC, look for a branch block. 908 std::string exitPath = getBasicBlockName(SUCC); 909 for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC), 910 BE = df_end(SUCC); BI != BE; ++BI) { 911 MachineBasicBlock* SBB = *BI; 912 // Reject paths with branch nodes. 913 if (SBB->succ_size() > 1) { 914 HasFastExitPath = false; 915 break; 916 } 917 exitPath += "->" + getBasicBlockName(SBB); 918 } 919 if (HasFastExitPath) { 920 if (ShrinkWrapDebugging >= BasicInfo) 921 dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock) 922 << "->" << exitPath << "\n"; 923 break; 924 } 925 } 926} 927 928/// verifySpillRestorePlacement - check the current spill/restore 929/// sets for safety. Attempt to find spills without restores or 930/// restores without spills. 931/// Spills: walk df from each MBB in spill set ensuring that 932/// all CSRs spilled at MMBB are restored on all paths 933/// from MBB to all exit blocks. 934/// Restores: walk idf from each MBB in restore set ensuring that 935/// all CSRs restored at MBB are spilled on all paths 936/// reaching MBB. 937/// 938void PEI::verifySpillRestorePlacement() { 939 unsigned numReturnBlocks = 0; 940 for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); 941 MBBI != MBBE; ++MBBI) { 942 MachineBasicBlock* MBB = MBBI; 943 if (isReturnBlock(MBB) || MBB->succ_size() == 0) 944 ++numReturnBlocks; 945 } 946 for (CSRegBlockMap::iterator BI = CSRSave.begin(), 947 BE = CSRSave.end(); BI != BE; ++BI) { 948 MachineBasicBlock* MBB = BI->first; 949 CSRegSet spilled = BI->second; 950 CSRegSet restored; 951 952 if (spilled.empty()) 953 continue; 954 955 DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " 956 << stringifyCSRegSet(spilled) 957 << " RESTORE[" << getBasicBlockName(MBB) << "] = " 958 << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); 959 960 if (CSRRestore[MBB].intersects(spilled)) { 961 restored |= (CSRRestore[MBB] & spilled); 962 } 963 964 // Walk depth first from MBB to find restores of all CSRs spilled at MBB: 965 // we must find restores for all spills w/no intervening spills on all 966 // paths from MBB to all return blocks. 967 for (df_iterator<MachineBasicBlock*> BI = df_begin(MBB), 968 BE = df_end(MBB); BI != BE; ++BI) { 969 MachineBasicBlock* SBB = *BI; 970 if (SBB == MBB) 971 continue; 972 // Stop when we encounter spills of any CSRs spilled at MBB that 973 // have not yet been seen to be restored. 974 if (CSRSave[SBB].intersects(spilled) && 975 !restored.contains(CSRSave[SBB] & spilled)) 976 break; 977 // Collect the CSRs spilled at MBB that are restored 978 // at this DF successor of MBB. 979 if (CSRRestore[SBB].intersects(spilled)) 980 restored |= (CSRRestore[SBB] & spilled); 981 // If we are at a retun block, check that the restores 982 // we have seen so far exhaust the spills at MBB, then 983 // reset the restores. 984 if (isReturnBlock(SBB) || SBB->succ_size() == 0) { 985 if (restored != spilled) { 986 CSRegSet notRestored = (spilled - restored); 987 DEBUG(dbgs() << MF->getName() << ": " 988 << stringifyCSRegSet(notRestored) 989 << " spilled at " << getBasicBlockName(MBB) 990 << " are never restored on path to return " 991 << getBasicBlockName(SBB) << "\n"); 992 } 993 restored.clear(); 994 } 995 } 996 } 997 998 // Check restore placements. 999 for (CSRegBlockMap::iterator BI = CSRRestore.begin(), 1000 BE = CSRRestore.end(); BI != BE; ++BI) { 1001 MachineBasicBlock* MBB = BI->first; 1002 CSRegSet restored = BI->second; 1003 CSRegSet spilled; 1004 1005 if (restored.empty()) 1006 continue; 1007 1008 DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " 1009 << stringifyCSRegSet(CSRSave[MBB]) 1010 << " RESTORE[" << getBasicBlockName(MBB) << "] = " 1011 << stringifyCSRegSet(restored) << "\n"); 1012 1013 if (CSRSave[MBB].intersects(restored)) { 1014 spilled |= (CSRSave[MBB] & restored); 1015 } 1016 // Walk inverse depth first from MBB to find spills of all 1017 // CSRs restored at MBB: 1018 for (idf_iterator<MachineBasicBlock*> BI = idf_begin(MBB), 1019 BE = idf_end(MBB); BI != BE; ++BI) { 1020 MachineBasicBlock* PBB = *BI; 1021 if (PBB == MBB) 1022 continue; 1023 // Stop when we encounter restores of any CSRs restored at MBB that 1024 // have not yet been seen to be spilled. 1025 if (CSRRestore[PBB].intersects(restored) && 1026 !spilled.contains(CSRRestore[PBB] & restored)) 1027 break; 1028 // Collect the CSRs restored at MBB that are spilled 1029 // at this DF predecessor of MBB. 1030 if (CSRSave[PBB].intersects(restored)) 1031 spilled |= (CSRSave[PBB] & restored); 1032 } 1033 if (spilled != restored) { 1034 CSRegSet notSpilled = (restored - spilled); 1035 DEBUG(dbgs() << MF->getName() << ": " 1036 << stringifyCSRegSet(notSpilled) 1037 << " restored at " << getBasicBlockName(MBB) 1038 << " are never spilled\n"); 1039 } 1040 } 1041} 1042 1043// Debugging print methods. 1044std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) { 1045 if (!MBB) 1046 return ""; 1047 1048 if (MBB->getBasicBlock()) 1049 return MBB->getBasicBlock()->getName().str(); 1050 1051 std::ostringstream name; 1052 name << "_MBB_" << MBB->getNumber(); 1053 return name.str(); 1054} 1055 1056std::string PEI::stringifyCSRegSet(const CSRegSet& s) { 1057 const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo(); 1058 const std::vector<CalleeSavedInfo> CSI = 1059 MF->getFrameInfo()->getCalleeSavedInfo(); 1060 1061 std::ostringstream srep; 1062 if (CSI.size() == 0) { 1063 srep << "[]"; 1064 return srep.str(); 1065 } 1066 srep << "["; 1067 CSRegSet::iterator I = s.begin(), E = s.end(); 1068 if (I != E) { 1069 unsigned reg = CSI[*I].getReg(); 1070 srep << TRI->getName(reg); 1071 for (++I; I != E; ++I) { 1072 reg = CSI[*I].getReg(); 1073 srep << ","; 1074 srep << TRI->getName(reg); 1075 } 1076 } 1077 srep << "]"; 1078 return srep.str(); 1079} 1080 1081void PEI::dumpSet(const CSRegSet& s) { 1082 DEBUG(dbgs() << stringifyCSRegSet(s) << "\n"); 1083} 1084 1085void PEI::dumpUsed(MachineBasicBlock* MBB) { 1086 DEBUG({ 1087 if (MBB) 1088 dbgs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = " 1089 << stringifyCSRegSet(CSRUsed[MBB]) << "\n"; 1090 }); 1091} 1092 1093void PEI::dumpAllUsed() { 1094 for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); 1095 MBBI != MBBE; ++MBBI) { 1096 MachineBasicBlock* MBB = MBBI; 1097 dumpUsed(MBB); 1098 } 1099} 1100 1101void PEI::dumpSets(MachineBasicBlock* MBB) { 1102 DEBUG({ 1103 if (MBB) 1104 dbgs() << getBasicBlockName(MBB) << " | " 1105 << stringifyCSRegSet(CSRUsed[MBB]) << " | " 1106 << stringifyCSRegSet(AnticIn[MBB]) << " | " 1107 << stringifyCSRegSet(AnticOut[MBB]) << " | " 1108 << stringifyCSRegSet(AvailIn[MBB]) << " | " 1109 << stringifyCSRegSet(AvailOut[MBB]) << "\n"; 1110 }); 1111} 1112 1113void PEI::dumpSets1(MachineBasicBlock* MBB) { 1114 DEBUG({ 1115 if (MBB) 1116 dbgs() << getBasicBlockName(MBB) << " | " 1117 << stringifyCSRegSet(CSRUsed[MBB]) << " | " 1118 << stringifyCSRegSet(AnticIn[MBB]) << " | " 1119 << stringifyCSRegSet(AnticOut[MBB]) << " | " 1120 << stringifyCSRegSet(AvailIn[MBB]) << " | " 1121 << stringifyCSRegSet(AvailOut[MBB]) << " | " 1122 << stringifyCSRegSet(CSRSave[MBB]) << " | " 1123 << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; 1124 }); 1125} 1126 1127void PEI::dumpAllSets() { 1128 for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); 1129 MBBI != MBBE; ++MBBI) { 1130 MachineBasicBlock* MBB = MBBI; 1131 dumpSets1(MBB); 1132 } 1133} 1134 1135void PEI::dumpSRSets() { 1136 DEBUG({ 1137 for (MachineFunction::iterator MBB = MF->begin(), E = MF->end(); 1138 MBB != E; ++MBB) { 1139 if (!CSRSave[MBB].empty()) { 1140 dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " 1141 << stringifyCSRegSet(CSRSave[MBB]); 1142 if (CSRRestore[MBB].empty()) 1143 dbgs() << '\n'; 1144 } 1145 1146 if (!CSRRestore[MBB].empty() && !CSRSave[MBB].empty()) 1147 dbgs() << " " 1148 << "RESTORE[" << getBasicBlockName(MBB) << "] = " 1149 << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; 1150 } 1151 }); 1152} 1153#endif 1154