PrologEpilogInserter.cpp revision 714cea3af986b10a529df5d8f370530b25cbaaab
1//===-- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass is responsible for finalizing the functions frame layout, saving 11// callee saved registers, and for emitting prolog & epilog code for the 12// function. 13// 14// This pass must be run after register allocation. After this pass is 15// executed, it is illegal to construct MO_FrameIndex operands. 16// 17// This pass implements a shrink wrapping variant of prolog/epilog insertion: 18// - Places callee saved register (CSR) spills and restores in the CFG to 19// tightly surround uses so that execution paths that do not use CSRs do not 20// pay the spill/restore penalty. 21// 22// - Avoiding placment of spills/restores in loops: if a CSR is used inside a 23// loop(nest), the spills are placed in the loop preheader, and restores are 24// placed in the loop exit nodes (the successors of the loop _exiting_ nodes). 25// 26// - Covering paths without CSR uses: e.g. if a restore is placed in a join 27// block, a matching spill is added to the end of all immediate predecessor 28// blocks that are not reached by a spill. Similarly for saves placed in 29// branch blocks. 30// 31// Shrink wrapping uses an analysis similar to the one in GVNPRE to determine 32// which basic blocks require callee-saved register save/restore code. 33// 34// This pass uses MachineDominators and MachineLoopInfo. Loop information 35// is used to prevent shrink wrapping of callee-saved register save/restore 36// code into loops. 37// 38//===----------------------------------------------------------------------===// 39 40#define DEBUG_TYPE "shrink-wrap" 41 42#include "llvm/CodeGen/Passes.h" 43#include "llvm/CodeGen/MachineDominators.h" 44#include "llvm/CodeGen/MachineLoopInfo.h" 45#include "llvm/CodeGen/MachineFunctionPass.h" 46#include "llvm/CodeGen/MachineInstr.h" 47#include "llvm/CodeGen/MachineFrameInfo.h" 48#include "llvm/CodeGen/MachineModuleInfo.h" 49#include "llvm/CodeGen/MachineRegisterInfo.h" 50#include "llvm/CodeGen/RegisterScavenging.h" 51#include "llvm/Target/TargetMachine.h" 52#include "llvm/Target/TargetRegisterInfo.h" 53#include "llvm/Target/TargetFrameInfo.h" 54#include "llvm/Target/TargetInstrInfo.h" 55#include "llvm/ADT/SparseBitVector.h" 56#include "llvm/ADT/DenseMap.h" 57#include "llvm/ADT/PostOrderIterator.h" 58#include "llvm/ADT/Statistic.h" 59#include "llvm/Support/CommandLine.h" 60#include "llvm/Support/Compiler.h" 61#include "llvm/Support/Debug.h" 62#include "llvm/ADT/STLExtras.h" 63#include "llvm/ADT/Statistic.h" 64#include <climits> 65#include <sstream> 66 67using namespace llvm; 68 69STATISTIC(numSRReduced, "Number of CSR spills+restores reduced."); 70 71// Shrink Wrapping: 72static cl::opt<bool> 73ShrinkWrapping("shrink-wrap", 74 cl::desc("Shrink wrap callee-saved register spills/restores")); 75 76// Shrink wrap only the specified function, a debugging aid. 77static cl::opt<std::string> 78ShrinkWrapFunc("shrink-wrap-func", cl::Hidden, 79 cl::desc("Shrink wrap the specified function"), 80 cl::value_desc("funcname"), 81 cl::init("")); 82 83// Debugging level for shrink wrapping. 84enum ShrinkWrapDebugLevel { 85 None, BasicInfo, Iterations, Details 86}; 87 88static cl::opt<enum ShrinkWrapDebugLevel> 89ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden, 90 cl::desc("Print shrink wrapping debugging information"), 91 cl::values( 92 clEnumVal(None , "disable debug output"), 93 clEnumVal(BasicInfo , "print basic DF sets"), 94 clEnumVal(Iterations, "print SR sets for each iteration"), 95 clEnumVal(Details , "print all DF sets"), 96 clEnumValEnd)); 97 98 99namespace { 100 struct VISIBILITY_HIDDEN PEI : public MachineFunctionPass { 101 static char ID; 102 PEI() : MachineFunctionPass(&ID) {} 103 104 const char *getPassName() const { 105 return "Prolog/Epilog Insertion & Frame Finalization"; 106 } 107 108 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 109 AU.setPreservesCFG(); 110 if (ShrinkWrapping || ShrinkWrapFunc != "") { 111 AU.addRequired<MachineLoopInfo>(); 112 AU.addRequired<MachineDominatorTree>(); 113 } 114 AU.addPreserved<MachineLoopInfo>(); 115 AU.addPreserved<MachineDominatorTree>(); 116 MachineFunctionPass::getAnalysisUsage(AU); 117 } 118 119 /// runOnMachineFunction - Insert prolog/epilog code and replace abstract 120 /// frame indexes with appropriate references. 121 /// 122 bool runOnMachineFunction(MachineFunction &Fn) { 123 const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); 124 RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL; 125 126 DEBUG(MF = &Fn); 127 128 // Get MachineModuleInfo so that we can track the construction of the 129 // frame. 130 if (MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>()) 131 Fn.getFrameInfo()->setMachineModuleInfo(MMI); 132 133 // Allow the target machine to make some adjustments to the function 134 // e.g. UsedPhysRegs before calculateCalleeSavedRegisters. 135 TRI->processFunctionBeforeCalleeSavedScan(Fn, RS); 136 137 // Scan the function for modified callee saved registers and insert spill 138 // code for any callee saved registers that are modified. Also calculate 139 // the MaxCallFrameSize and HasCalls variables for the function's frame 140 // information and eliminates call frame pseudo instructions. 141 calculateCalleeSavedRegisters(Fn); 142 143 // Determine placement of CSR spill/restore code: 144 // - with shrink wrapping, place spills and restores to tightly 145 // enclose regions in the Machine CFG of the function where 146 // they are used. Without shrink wrapping 147 // - default (no shrink wrapping), place all spills in the 148 // entry block, all restores in return blocks. 149 placeCSRSpillsAndRestores(Fn); 150 151 // Add the code to save and restore the callee saved registers 152 insertCSRSpillsAndRestores(Fn); 153 154 // Allow the target machine to make final modifications to the function 155 // before the frame layout is finalized. 156 TRI->processFunctionBeforeFrameFinalized(Fn); 157 158 // Calculate actual frame offsets for all abstract stack objects... 159 calculateFrameObjectOffsets(Fn); 160 161 // Add prolog and epilog code to the function. This function is required 162 // to align the stack frame as necessary for any stack variables or 163 // called functions. Because of this, calculateCalleeSavedRegisters 164 // must be called before this function in order to set the HasCalls 165 // and MaxCallFrameSize variables. 166 insertPrologEpilogCode(Fn); 167 168 // Replace all MO_FrameIndex operands with physical register references 169 // and actual offsets. 170 // 171 replaceFrameIndices(Fn); 172 173 delete RS; 174 clearAllSets(); 175 return true; 176 } 177 178 private: 179 RegScavenger *RS; 180 181 // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved 182 // stack frame indexes. 183 unsigned MinCSFrameIndex, MaxCSFrameIndex; 184 185 // Analysis info for spill/restore placement. 186 // "CSR": "callee saved register". 187 188 // CSRegSet contains indices into the Callee Saved Register Info 189 // vector built by calculateCalleeSavedRegisters() and accessed 190 // via MF.getFrameInfo()->getCalleeSavedInfo(). 191 typedef SparseBitVector<> CSRegSet; 192 193 // CSRegBlockMap maps MachineBasicBlocks to sets of callee 194 // saved register indices. 195 typedef DenseMap<MachineBasicBlock*, CSRegSet> CSRegBlockMap; 196 197 // Set and maps for computing CSR spill/restore placement: 198 // used in function (UsedCSRegs) 199 // used in a basic block (CSRUsed) 200 // anticipatable in a basic block (Antic{In,Out}) 201 // available in a basic block (Avail{In,Out}) 202 // to be spilled at the entry to a basic block (CSRSave) 203 // to be restored at the end of a basic block (CSRRestore) 204 CSRegSet UsedCSRegs; 205 CSRegBlockMap CSRUsed; 206 CSRegBlockMap AnticIn, AnticOut; 207 CSRegBlockMap AvailIn, AvailOut; 208 CSRegBlockMap CSRSave; 209 CSRegBlockMap CSRRestore; 210 211 // Entry and return blocks of the current function. 212 MachineBasicBlock* EntryBlock; 213 SmallVector<MachineBasicBlock*, 4> ReturnBlocks; 214 215 // Map of MBBs to top level MachineLoops. 216 DenseMap<MachineBasicBlock*, MachineLoop*> TLLoops; 217 218 // Flag to control shrink wrapping per-function: 219 // may choose to skip shrink wrapping for certain 220 // functions. 221 bool ShrinkWrapThisFunction; 222 223#ifndef NDEBUG 224 // Machine function handle. 225 MachineFunction* MF; 226 227 // Flag indicating that the current function 228 // has at least one "short" path in the machine 229 // CFG from the entry block to an exit block. 230 bool HasFastExitPath; 231#endif 232 233 bool calculateSets(MachineFunction &Fn); 234 bool calcAnticInOut(MachineBasicBlock* MBB); 235 bool calcAvailInOut(MachineBasicBlock* MBB); 236 void calculateAnticAvail(MachineFunction &Fn); 237 bool addUsesForMEMERegion(MachineBasicBlock* MBB, 238 SmallVector<MachineBasicBlock*, 4>& blks); 239 bool addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks); 240 bool calcSpillPlacements(MachineBasicBlock* MBB, 241 SmallVector<MachineBasicBlock*, 4> &blks, 242 CSRegBlockMap &prevSpills); 243 bool calcRestorePlacements(MachineBasicBlock* MBB, 244 SmallVector<MachineBasicBlock*, 4> &blks, 245 CSRegBlockMap &prevRestores); 246 void placeSpillsAndRestores(MachineFunction &Fn); 247 void placeCSRSpillsAndRestores(MachineFunction &Fn); 248 void calculateCalleeSavedRegisters(MachineFunction &Fn); 249 void insertCSRSpillsAndRestores(MachineFunction &Fn); 250 void calculateFrameObjectOffsets(MachineFunction &Fn); 251 void replaceFrameIndices(MachineFunction &Fn); 252 void insertPrologEpilogCode(MachineFunction &Fn); 253 254 // Initialize DFA sets, called before iterations. 255 void clearAnticAvailSets(); 256 // Clear all sets constructed by shrink wrapping. 257 void clearAllSets(); 258 259 // Initialize all shrink wrapping data. 260 void initShrinkWrappingInfo(); 261 262 // Convienences for dealing with machine loops. 263 MachineBasicBlock* getTopLevelLoopPreheader(MachineLoop* LP) { 264 assert(LP && "Machine loop is NULL."); 265 MachineBasicBlock* PHDR = LP->getLoopPreheader(); 266 MachineLoop* PLP = LP->getParentLoop(); 267 while (PLP) { 268 PHDR = PLP->getLoopPreheader(); 269 PLP = PLP->getParentLoop(); 270 } 271 return PHDR; 272 } 273 274 MachineLoop* getTopLevelLoopParent(MachineLoop *LP) { 275 if (LP == 0) 276 return 0; 277 MachineLoop* PLP = LP->getParentLoop(); 278 while (PLP) { 279 LP = PLP; 280 PLP = PLP->getParentLoop(); 281 } 282 return LP; 283 } 284 285 // Propgate CSRs used in MBB to all MBBs of loop LP. 286 void propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP); 287 288 // Convenience for recognizing return blocks. 289 bool isReturnBlock(MachineBasicBlock* MBB) { 290 return (MBB && !MBB->empty() && MBB->back().getDesc().isReturn()); 291 } 292 293#ifndef NDEBUG 294 // Debugging methods. 295 296 // Mark this function as having fast exit paths. 297 void findFastExitPath(); 298 299 // Verify placement of spills/restores. 300 void verifySpillRestorePlacement(); 301 302 std::string getBasicBlockName(const MachineBasicBlock* MBB); 303 std::string stringifyCSRegSet(const CSRegSet& s); 304 void dumpSet(const CSRegSet& s); 305 void dumpUsed(MachineBasicBlock* MBB); 306 void dumpAllUsed(); 307 void dumpSets(MachineBasicBlock* MBB); 308 void dumpSets1(MachineBasicBlock* MBB); 309 void dumpAllSets(); 310 void dumpSRSets(); 311#endif 312 313 }; 314 char PEI::ID = 0; 315} 316 317// Initialize shrink wrapping DFA sets, called before iterations. 318void PEI::clearAnticAvailSets() { 319 AnticIn.clear(); 320 AnticOut.clear(); 321 AvailIn.clear(); 322 AvailOut.clear(); 323} 324 325// Clear all sets constructed by shrink wrapping. 326void PEI::clearAllSets() { 327 ReturnBlocks.clear(); 328 clearAnticAvailSets(); 329 UsedCSRegs.clear(); 330 CSRUsed.clear(); 331 TLLoops.clear(); 332 CSRSave.clear(); 333 CSRRestore.clear(); 334} 335 336// Initialize all shrink wrapping data. 337void PEI::initShrinkWrappingInfo() { 338 clearAllSets(); 339 EntryBlock = 0; 340#ifndef NDEBUG 341 HasFastExitPath = false; 342#endif 343 ShrinkWrapThisFunction = ShrinkWrapping; 344 // DEBUG: enable or disable shrink wrapping for the current function 345 // via --shrink-wrap-func=<funcname>. 346#ifndef NDEBUG 347 if (ShrinkWrapFunc != "") { 348 std::string MFName = MF->getFunction()->getName(); 349 ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc); 350 } 351#endif 352} 353 354 355/// createPrologEpilogCodeInserter - This function returns a pass that inserts 356/// prolog and epilog code, and eliminates abstract frame references. 357/// 358FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); } 359 360/// placeCSRSpillsAndRestores - determine which MBBs of the function 361/// need save, restore code for callee-saved registers by doing a DF analysis 362/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs 363/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo 364/// is used to ensure that CSR save/restore code is not placed inside loops. 365/// This function computes the maps of MBBs -> CSRs to spill and restore 366/// in CSRSave, CSRRestore. 367/// 368/// If shrink wrapping is not being performed, place all spills in 369/// the entry block, all restores in return blocks. In this case, 370/// CSRSave has a single mapping, CSRRestore has mappings for each 371/// return block. 372/// 373void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) { 374 375 initShrinkWrappingInfo(); 376 377 DEBUG(if (ShrinkWrapThisFunction) { 378 DOUT << "Place CSR spills/restores for " 379 << MF->getFunction()->getName() << "\n"; 380 }); 381 382 if (calculateSets(Fn)) 383 placeSpillsAndRestores(Fn); 384} 385 386/// calcAnticInOut - calculate the anticipated in/out reg sets 387/// for the given MBB by looking forward in the MCFG at MBB's 388/// successors. 389/// 390bool PEI::calcAnticInOut(MachineBasicBlock* MBB) { 391 bool changed = false; 392 393 // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB)) 394 SmallVector<MachineBasicBlock*, 4> successors; 395 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 396 SE = MBB->succ_end(); SI != SE; ++SI) { 397 MachineBasicBlock* SUCC = *SI; 398 if (SUCC != MBB) 399 successors.push_back(SUCC); 400 } 401 402 unsigned i = 0, e = successors.size(); 403 if (i != e) { 404 CSRegSet prevAnticOut = AnticOut[MBB]; 405 MachineBasicBlock* SUCC = successors[i]; 406 407 AnticOut[MBB] = AnticIn[SUCC]; 408 for (++i; i != e; ++i) { 409 SUCC = successors[i]; 410 AnticOut[MBB] &= AnticIn[SUCC]; 411 } 412 if (prevAnticOut != AnticOut[MBB]) 413 changed = true; 414 } 415 416 // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]); 417 CSRegSet prevAnticIn = AnticIn[MBB]; 418 AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB]; 419 if (prevAnticIn |= AnticIn[MBB]) 420 changed = true; 421 return changed; 422} 423 424/// calcAvailInOut - calculate the available in/out reg sets 425/// for the given MBB by looking backward in the MCFG at MBB's 426/// predecessors. 427/// 428bool PEI::calcAvailInOut(MachineBasicBlock* MBB) { 429 bool changed = false; 430 431 // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB)) 432 SmallVector<MachineBasicBlock*, 4> predecessors; 433 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), 434 PE = MBB->pred_end(); PI != PE; ++PI) { 435 MachineBasicBlock* PRED = *PI; 436 if (PRED != MBB) 437 predecessors.push_back(PRED); 438 } 439 440 unsigned i = 0, e = predecessors.size(); 441 if (i != e) { 442 CSRegSet prevAvailIn = AvailIn[MBB]; 443 MachineBasicBlock* PRED = predecessors[i]; 444 445 AvailIn[MBB] = AvailOut[PRED]; 446 for (++i; i != e; ++i) { 447 PRED = predecessors[i]; 448 AvailIn[MBB] &= AvailOut[PRED]; 449 } 450 if (prevAvailIn != AvailIn[MBB]) 451 changed = true; 452 } 453 454 // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]); 455 CSRegSet prevAvailOut = AvailOut[MBB]; 456 AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB]; 457 if (prevAvailOut |= AvailOut[MBB]) 458 changed = true; 459 return changed; 460} 461 462/// calculateAnticAvail - build the sets anticipated and available 463/// registers in the MCFG of the current function iteratively, 464/// doing a combined forward and backward analysis. 465/// 466void PEI::calculateAnticAvail(MachineFunction &Fn) { 467 // Initialize data flow sets. 468 clearAnticAvailSets(); 469 470 // Calulate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG. 471 bool changed = true; 472 unsigned iterations = 0; 473 while (changed) { 474 changed = false; 475 ++iterations; 476 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); 477 MBBI != MBBE; ++MBBI) { 478 MachineBasicBlock* MBB = MBBI; 479 480 // Calculate anticipated in, out regs at MBB from 481 // anticipated at successors of MBB. 482 changed |= calcAnticInOut(MBB); 483 484 // Calculate available in, out regs at MBB from 485 // available at predecessors of MBB. 486 changed |= calcAvailInOut(MBB); 487 } 488 } 489 490 DEBUG(if (ShrinkWrapDebugging >= Details) { 491 DOUT << "-----------------------------------------------------------\n"; 492 DOUT << " Antic/Avail Sets:\n"; 493 DOUT << "-----------------------------------------------------------\n"; 494 DOUT << "iterations = " << iterations << "\n"; 495 DOUT << "-----------------------------------------------------------\n"; 496 DOUT << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n"; 497 DOUT << "-----------------------------------------------------------\n"; 498 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); 499 MBBI != MBBE; ++MBBI) { 500 MachineBasicBlock* MBB = MBBI; 501 dumpSets(MBB); 502 } 503 DOUT << "-----------------------------------------------------------\n"; 504 }); 505} 506 507/// propagateUsesAroundLoop - copy used register info from MBB to all blocks 508/// of the loop given by LP and its parent loops. This prevents spills/restores 509/// from being placed in the bodies of loops. 510/// 511void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) { 512 if (! MBB || !LP) 513 return; 514 515 std::vector<MachineBasicBlock*> loopBlocks = LP->getBlocks(); 516 for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) { 517 MachineBasicBlock* LBB = loopBlocks[i]; 518 if (LBB == MBB) 519 continue; 520 if (CSRUsed[LBB].contains(CSRUsed[MBB])) 521 continue; 522 CSRUsed[LBB] |= CSRUsed[MBB]; 523 } 524} 525 526/// calculateSets - collect the CSRs used in this function, compute 527/// the DF sets that describe the initial minimal regions in the 528/// Machine CFG around which CSR spills and restores must be placed. 529/// 530/// Additionally, this function decides if shrink wrapping should 531/// be disabled for the current function, checking the following: 532/// 1. the current function has more than 500 MBBs: heuristic limit 533/// on function size to reduce compile time impact of the current 534/// iterative algorithm. 535/// 2. all CSRs are used in the entry block. 536/// 3. all CSRs are used in all immediate successors of the entry block. 537/// 4. all CSRs are used in a subset of blocks, each of which dominates 538/// all return blocks. These blocks, taken as a subgraph of the MCFG, 539/// are equivalent to the entry block since all execution paths pass 540/// through them. 541/// 542bool PEI::calculateSets(MachineFunction &Fn) { 543 // Sets used to compute spill, restore placement sets. 544 const std::vector<CalleeSavedInfo> CSI = 545 Fn.getFrameInfo()->getCalleeSavedInfo(); 546 547 // If no CSRs used, we are done. 548 if (CSI.empty()) { 549 DEBUG(if (ShrinkWrapThisFunction) 550 DOUT << "DISABLED: " << Fn.getFunction()->getName() 551 << ": uses no callee-saved registers\n"); 552 return false; 553 } 554 555 // Save refs to entry and return blocks. 556 EntryBlock = Fn.begin(); 557 for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end(); 558 MBB != E; ++MBB) 559 if (isReturnBlock(MBB)) 560 ReturnBlocks.push_back(MBB); 561 562 // Determine if this function has fast exit paths. 563 DEBUG(if (ShrinkWrapThisFunction) 564 findFastExitPath()); 565 566 // Limit shrink wrapping via the current iterative bit vector 567 // implementation to functions with <= 500 MBBs. 568 if (Fn.size() > 500) { 569 DEBUG(if (ShrinkWrapThisFunction) 570 DOUT << "DISABLED: " << Fn.getFunction()->getName() 571 << ": too large (" << Fn.size() << " MBBs)\n"); 572 ShrinkWrapThisFunction = false; 573 } 574 575 // Return now if not shrink wrapping. 576 if (! ShrinkWrapThisFunction) 577 return false; 578 579 // Collect set of used CSRs. 580 for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { 581 UsedCSRegs.set(inx); 582 } 583 584 // Walk instructions in all MBBs, create CSRUsed[] sets, choose 585 // whether or not to shrink wrap this function. 586 MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>(); 587 MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>(); 588 const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); 589 590 bool allCSRUsesInEntryBlock = true; 591 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); 592 MBBI != MBBE; ++MBBI) { 593 MachineBasicBlock* MBB = MBBI; 594 for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) { 595 for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { 596 unsigned Reg = CSI[inx].getReg(); 597 // If instruction I reads or modifies Reg, add it to UsedCSRegs, 598 // CSRUsed map for the current block. 599 for (unsigned opInx = 0, opEnd = I->getNumOperands(); 600 opInx != opEnd; ++opInx) { 601 const MachineOperand &MO = I->getOperand(opInx); 602 if (! (MO.isReg() && (MO.isUse() || MO.isDef()))) 603 continue; 604 unsigned MOReg = MO.getReg(); 605 if (!MOReg) 606 continue; 607 if (MOReg == Reg || 608 (TargetRegisterInfo::isPhysicalRegister(MOReg) && 609 TargetRegisterInfo::isPhysicalRegister(Reg) && 610 TRI->isSubRegister(Reg, MOReg))) { 611 // CSR Reg is defined/used in block MBB. 612 CSRUsed[MBB].set(inx); 613 // Check for uses in EntryBlock. 614 if (MBB != EntryBlock) 615 allCSRUsesInEntryBlock = false; 616 } 617 } 618 } 619 } 620 621 if (CSRUsed[MBB].empty()) 622 continue; 623 624 // Propagate CSRUsed[MBB] in loops 625 if (MachineLoop* LP = LI.getLoopFor(MBB)) { 626 // Add top level loop to work list. 627 MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP); 628 MachineLoop* PLP = getTopLevelLoopParent(LP); 629 630 if (! HDR) { 631 HDR = PLP->getHeader(); 632 assert(HDR->pred_size() > 0 && "Loop header has no predecessors?"); 633 MachineBasicBlock::pred_iterator PI = HDR->pred_begin(); 634 HDR = *PI; 635 } 636 TLLoops[HDR] = PLP; 637 638 // Push uses from inside loop to its parent loops, 639 // or to all other MBBs in its loop. 640 if (LP->getLoopDepth() > 1) { 641 for (MachineLoop* PLP = LP->getParentLoop(); PLP; 642 PLP = PLP->getParentLoop()) { 643 propagateUsesAroundLoop(MBB, PLP); 644 } 645 } else { 646 propagateUsesAroundLoop(MBB, LP); 647 } 648 } 649 } 650 651 if (allCSRUsesInEntryBlock) { 652 DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName() 653 << ": all CSRs used in EntryBlock\n"); 654 ShrinkWrapThisFunction = false; 655 } else { 656 bool allCSRsUsedInEntryFanout = true; 657 for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), 658 SE = EntryBlock->succ_end(); SI != SE; ++SI) { 659 MachineBasicBlock* SUCC = *SI; 660 if (CSRUsed[SUCC] != UsedCSRegs) 661 allCSRsUsedInEntryFanout = false; 662 } 663 if (allCSRsUsedInEntryFanout) { 664 DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName() 665 << ": all CSRs used in imm successors of EntryBlock\n"); 666 ShrinkWrapThisFunction = false; 667 } 668 } 669 670 if (ShrinkWrapThisFunction) { 671 // Check if MBB uses CSRs and dominates all exit nodes. 672 // Such nodes are equiv. to the entry node w.r.t. 673 // CSR uses: every path through the function must 674 // pass through this node. If each CSR is used at least 675 // once by these nodes, shrink wrapping is disabled. 676 CSRegSet CSRUsedInChokePoints; 677 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); 678 MBBI != MBBE; ++MBBI) { 679 MachineBasicBlock* MBB = MBBI; 680 if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1) 681 continue; 682 bool dominatesExitNodes = true; 683 for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) 684 if (! DT.dominates(MBB, ReturnBlocks[ri])) { 685 dominatesExitNodes = false; 686 break; 687 } 688 if (dominatesExitNodes) { 689 CSRUsedInChokePoints |= CSRUsed[MBB]; 690 if (CSRUsedInChokePoints == UsedCSRegs) { 691 DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName() 692 << ": all CSRs used in choke point(s) at " 693 << getBasicBlockName(MBB) << "\n"); 694 ShrinkWrapThisFunction = false; 695 break; 696 } 697 } 698 } 699 } 700 701 // Return now if we have decided not to apply shrink wrapping 702 // to the current function. 703 if (! ShrinkWrapThisFunction) 704 return false; 705 706 DEBUG({ 707 DOUT << "ENABLED: " << Fn.getFunction()->getName(); 708 if (HasFastExitPath) 709 DOUT << " (fast exit path)"; 710 DOUT << "\n"; 711 if (ShrinkWrapDebugging >= BasicInfo) { 712 DOUT << "------------------------------" 713 << "-----------------------------\n"; 714 DOUT << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n"; 715 if (ShrinkWrapDebugging >= Details) { 716 DOUT << "------------------------------" 717 << "-----------------------------\n"; 718 dumpAllUsed(); 719 } 720 } 721 }); 722 723 // Build initial DF sets to determine minimal regions in the 724 // Machine CFG around which CSRs must be spilled and restored. 725 calculateAnticAvail(Fn); 726 727 return true; 728} 729 730/// addUsesForMEMERegion - add uses of CSRs spilled or restored in 731/// multi-entry, multi-exit (MEME) regions so spill and restore 732/// placement will not break code that enters or leaves a 733/// shrink-wrapped region by inducing spills with no matching 734/// restores or restores with no matching spills. A MEME region 735/// is a subgraph of the MCFG with multiple entry edges, multiple 736/// exit edges, or both. This code propagates use information 737/// through the MCFG until all paths requiring spills and restores 738/// _outside_ the computed minimal placement regions have been covered. 739/// 740bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB, 741 SmallVector<MachineBasicBlock*, 4>& blks) { 742 if (MBB->succ_size() < 2 && MBB->pred_size() < 2) { 743 bool processThisBlock = false; 744 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 745 SE = MBB->succ_end(); SI != SE; ++SI) { 746 MachineBasicBlock* SUCC = *SI; 747 if (SUCC->pred_size() > 1) { 748 processThisBlock = true; 749 break; 750 } 751 } 752 if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) { 753 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), 754 PE = MBB->pred_end(); PI != PE; ++PI) { 755 MachineBasicBlock* PRED = *PI; 756 if (PRED->succ_size() > 1) { 757 processThisBlock = true; 758 break; 759 } 760 } 761 } 762 if (! processThisBlock) 763 return false; 764 } 765 766 CSRegSet prop; 767 if (!CSRSave[MBB].empty()) 768 prop = CSRSave[MBB]; 769 else if (!CSRRestore[MBB].empty()) 770 prop = CSRRestore[MBB]; 771 else 772 prop = CSRUsed[MBB]; 773 if (prop.empty()) 774 return false; 775 776 // Propagate selected bits to successors, predecessors of MBB. 777 bool addedUses = false; 778 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 779 SE = MBB->succ_end(); SI != SE; ++SI) { 780 MachineBasicBlock* SUCC = *SI; 781 // Self-loop 782 if (SUCC == MBB) 783 continue; 784 if (! CSRUsed[SUCC].contains(prop)) { 785 CSRUsed[SUCC] |= prop; 786 addedUses = true; 787 blks.push_back(SUCC); 788 DEBUG(if (ShrinkWrapDebugging >= Iterations) 789 DOUT << getBasicBlockName(MBB) 790 << "(" << stringifyCSRegSet(prop) << ")->" 791 << "successor " << getBasicBlockName(SUCC) << "\n"); 792 } 793 } 794 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), 795 PE = MBB->pred_end(); PI != PE; ++PI) { 796 MachineBasicBlock* PRED = *PI; 797 // Self-loop 798 if (PRED == MBB) 799 continue; 800 if (! CSRUsed[PRED].contains(prop)) { 801 CSRUsed[PRED] |= prop; 802 addedUses = true; 803 blks.push_back(PRED); 804 DEBUG(if (ShrinkWrapDebugging >= Iterations) 805 DOUT << getBasicBlockName(MBB) 806 << "(" << stringifyCSRegSet(prop) << ")->" 807 << "predecessor " << getBasicBlockName(PRED) << "\n"); 808 } 809 } 810 return addedUses; 811} 812 813/// addUsesForTopLevelLoops - add uses for CSRs used inside top 814/// level loops to the exit blocks of those loops. 815/// 816bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) { 817 bool addedUses = false; 818 819 // Place restores for top level loops where needed. 820 for (DenseMap<MachineBasicBlock*, MachineLoop*>::iterator 821 I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) { 822 MachineBasicBlock* MBB = I->first; 823 MachineLoop* LP = I->second; 824 MachineBasicBlock* HDR = LP->getHeader(); 825 SmallVector<MachineBasicBlock*, 4> exitBlocks; 826 CSRegSet loopSpills; 827 828 loopSpills = CSRSave[MBB]; 829 if (CSRSave[MBB].empty()) { 830 loopSpills = CSRUsed[HDR]; 831 assert(!loopSpills.empty() && "No CSRs used in loop?"); 832 } else if (CSRRestore[MBB].contains(CSRSave[MBB])) 833 continue; 834 835 LP->getExitBlocks(exitBlocks); 836 assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?"); 837 for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) { 838 MachineBasicBlock* EXB = exitBlocks[i]; 839 if (! CSRUsed[EXB].contains(loopSpills)) { 840 CSRUsed[EXB] |= loopSpills; 841 addedUses = true; 842 DEBUG(if (ShrinkWrapDebugging >= Iterations) 843 DOUT << "LOOP " << getBasicBlockName(MBB) 844 << "(" << stringifyCSRegSet(loopSpills) << ")->" 845 << getBasicBlockName(EXB) << "\n"); 846 if (EXB->succ_size() > 1 || EXB->pred_size() > 1) 847 blks.push_back(EXB); 848 } 849 } 850 } 851 return addedUses; 852} 853 854/// calcSpillPlacements - determine which CSRs should be spilled 855/// in MBB using AnticIn sets of MBB's predecessors, keeping track 856/// of changes to spilled reg sets. Add MBB to the set of blocks 857/// that need to be processed for propagating use info to cover 858/// multi-entry/exit regions. 859/// 860bool PEI::calcSpillPlacements(MachineBasicBlock* MBB, 861 SmallVector<MachineBasicBlock*, 4> &blks, 862 CSRegBlockMap &prevSpills) { 863 bool placedSpills = false; 864 // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB) 865 CSRegSet anticInPreds; 866 SmallVector<MachineBasicBlock*, 4> predecessors; 867 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), 868 PE = MBB->pred_end(); PI != PE; ++PI) { 869 MachineBasicBlock* PRED = *PI; 870 if (PRED != MBB) 871 predecessors.push_back(PRED); 872 } 873 unsigned i = 0, e = predecessors.size(); 874 if (i != e) { 875 MachineBasicBlock* PRED = predecessors[i]; 876 anticInPreds = UsedCSRegs - AnticIn[PRED]; 877 for (++i; i != e; ++i) { 878 PRED = predecessors[i]; 879 anticInPreds &= (UsedCSRegs - AnticIn[PRED]); 880 } 881 } else { 882 // Handle uses in entry blocks (which have no predecessors). 883 // This is necessary because the DFA formulation assumes the 884 // entry and (multiple) exit nodes cannot have CSR uses, which 885 // is not the case in the real world. 886 anticInPreds = UsedCSRegs; 887 } 888 // Compute spills required at MBB: 889 CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds; 890 891 if (! CSRSave[MBB].empty()) { 892 if (MBB == EntryBlock) { 893 for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) 894 CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB]; 895 } else { 896 // Reset all regs spilled in MBB that are also spilled in EntryBlock. 897 if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) { 898 CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock]; 899 } 900 } 901 } 902 placedSpills = (CSRSave[MBB] != prevSpills[MBB]); 903 prevSpills[MBB] = CSRSave[MBB]; 904 // Remember this block for adding restores to successor 905 // blocks for multi-entry region. 906 if (placedSpills) 907 blks.push_back(MBB); 908 909 DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations) 910 DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " 911 << stringifyCSRegSet(CSRSave[MBB]) << "\n"); 912 913 return placedSpills; 914} 915 916/// calcRestorePlacements - determine which CSRs should be restored 917/// in MBB using AvailOut sets of MBB's succcessors, keeping track 918/// of changes to restored reg sets. Add MBB to the set of blocks 919/// that need to be processed for propagating use info to cover 920/// multi-entry/exit regions. 921/// 922bool PEI::calcRestorePlacements(MachineBasicBlock* MBB, 923 SmallVector<MachineBasicBlock*, 4> &blks, 924 CSRegBlockMap &prevRestores) { 925 bool placedRestores = false; 926 // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB) 927 CSRegSet availOutSucc; 928 SmallVector<MachineBasicBlock*, 4> successors; 929 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 930 SE = MBB->succ_end(); SI != SE; ++SI) { 931 MachineBasicBlock* SUCC = *SI; 932 if (SUCC != MBB) 933 successors.push_back(SUCC); 934 } 935 unsigned i = 0, e = successors.size(); 936 if (i != e) { 937 MachineBasicBlock* SUCC = successors[i]; 938 availOutSucc = UsedCSRegs - AvailOut[SUCC]; 939 for (++i; i != e; ++i) { 940 SUCC = successors[i]; 941 availOutSucc &= (UsedCSRegs - AvailOut[SUCC]); 942 } 943 } else { 944 if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) { 945 // Handle uses in return blocks (which have no successors). 946 // This is necessary because the DFA formulation assumes the 947 // entry and (multiple) exit nodes cannot have CSR uses, which 948 // is not the case in the real world. 949 availOutSucc = UsedCSRegs; 950 } 951 } 952 // Compute restores required at MBB: 953 CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc; 954 955 // Postprocess restore placements at MBB. 956 // Remove the CSRs that are restored in the return blocks. 957 // Lest this be confusing, note that: 958 // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks. 959 if (MBB->succ_size() && ! CSRRestore[MBB].empty()) { 960 if (! CSRSave[EntryBlock].empty()) 961 CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock]; 962 } 963 placedRestores = (CSRRestore[MBB] != prevRestores[MBB]); 964 prevRestores[MBB] = CSRRestore[MBB]; 965 // Remember this block for adding saves to predecessor 966 // blocks for multi-entry region. 967 if (placedRestores) 968 blks.push_back(MBB); 969 970 DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations) 971 DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = " 972 << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); 973 974 return placedRestores; 975} 976 977/// placeSpillsAndRestores - place spills and restores of CSRs 978/// used in MBBs in minimal regions that contain the uses. 979/// 980void PEI::placeSpillsAndRestores(MachineFunction &Fn) { 981 CSRegBlockMap prevCSRSave; 982 CSRegBlockMap prevCSRRestore; 983 SmallVector<MachineBasicBlock*, 4> cvBlocks, ncvBlocks; 984 bool changed = true; 985 unsigned iterations = 0; 986 987 // Iterate computation of spill and restore placements in the MCFG until: 988 // 1. CSR use info has been fully propagated around the MCFG, and 989 // 2. computation of CSRSave[], CSRRestore[] reach fixed points. 990 while (changed) { 991 changed = false; 992 ++iterations; 993 994 DEBUG(if (ShrinkWrapDebugging >= Iterations) 995 DOUT << "iter " << iterations 996 << " --------------------------------------------------\n"); 997 998 // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG, 999 // which determines the placements of spills and restores. 1000 // Keep track of changes to spills, restores in each iteration to 1001 // minimize the total iterations. 1002 bool SRChanged = false; 1003 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); 1004 MBBI != MBBE; ++MBBI) { 1005 MachineBasicBlock* MBB = MBBI; 1006 1007 // Place spills for CSRs in MBB. 1008 SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave); 1009 1010 // Place restores for CSRs in MBB. 1011 SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore); 1012 } 1013 1014 // Add uses of CSRs used inside loops where needed. 1015 changed |= addUsesForTopLevelLoops(cvBlocks); 1016 1017 // Add uses for CSRs spilled or restored at branch, join points. 1018 if (changed || SRChanged) { 1019 while (! cvBlocks.empty()) { 1020 MachineBasicBlock* MBB = cvBlocks.pop_back_val(); 1021 changed |= addUsesForMEMERegion(MBB, ncvBlocks); 1022 } 1023 if (! ncvBlocks.empty()) { 1024 cvBlocks = ncvBlocks; 1025 ncvBlocks.clear(); 1026 } 1027 } 1028 1029 if (changed) { 1030 calculateAnticAvail(Fn); 1031 CSRSave.clear(); 1032 CSRRestore.clear(); 1033 } 1034 } 1035 1036 // Check for effectiveness: 1037 // SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks} 1038 // numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock] 1039 // Gives a measure of how many CSR spills have been moved from EntryBlock 1040 // to minimal regions enclosing their uses. 1041 CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]); 1042 unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count(); 1043 numSRReduced += numSRReducedThisFunc; 1044 DEBUG(if (ShrinkWrapDebugging >= BasicInfo) { 1045 DOUT << "-----------------------------------------------------------\n"; 1046 DOUT << "total iterations = " << iterations << " ( " 1047 << Fn.getFunction()->getName() 1048 << " " << numSRReducedThisFunc 1049 << " " << Fn.size() 1050 << " )\n"; 1051 DOUT << "-----------------------------------------------------------\n"; 1052 dumpSRSets(); 1053 DOUT << "-----------------------------------------------------------\n"; 1054 if (numSRReducedThisFunc) 1055 verifySpillRestorePlacement(); 1056 }); 1057} 1058 1059/// calculateCalleeSavedRegisters - Scan the function for modified callee saved 1060/// registers. Also calculate the MaxCallFrameSize and HasCalls variables for 1061/// the function's frame information and eliminates call frame pseudo 1062/// instructions. 1063/// 1064void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { 1065 const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); 1066 const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo(); 1067 1068 // Get the callee saved register list... 1069 const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn); 1070 1071 // Get the function call frame set-up and tear-down instruction opcode 1072 int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode(); 1073 int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode(); 1074 1075 // These are used to keep track the callee-save area. Initialize them. 1076 MinCSFrameIndex = INT_MAX; 1077 MaxCSFrameIndex = 0; 1078 1079 // Early exit for targets which have no callee saved registers and no call 1080 // frame setup/destroy pseudo instructions. 1081 if ((CSRegs == 0 || CSRegs[0] == 0) && 1082 FrameSetupOpcode == -1 && FrameDestroyOpcode == -1) 1083 return; 1084 1085 unsigned MaxCallFrameSize = 0; 1086 bool HasCalls = false; 1087 1088 std::vector<MachineBasicBlock::iterator> FrameSDOps; 1089 for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) 1090 for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) 1091 if (I->getOpcode() == FrameSetupOpcode || 1092 I->getOpcode() == FrameDestroyOpcode) { 1093 assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo" 1094 " instructions should have a single immediate argument!"); 1095 unsigned Size = I->getOperand(0).getImm(); 1096 if (Size > MaxCallFrameSize) MaxCallFrameSize = Size; 1097 HasCalls = true; 1098 FrameSDOps.push_back(I); 1099 } 1100 1101 MachineFrameInfo *FFI = Fn.getFrameInfo(); 1102 FFI->setHasCalls(HasCalls); 1103 FFI->setMaxCallFrameSize(MaxCallFrameSize); 1104 1105 for (unsigned i = 0, e = FrameSDOps.size(); i != e; ++i) { 1106 MachineBasicBlock::iterator I = FrameSDOps[i]; 1107 // If call frames are not being included as part of the stack frame, 1108 // and there is no dynamic allocation (therefore referencing frame slots 1109 // off sp), leave the pseudo ops alone. We'll eliminate them later. 1110 if (RegInfo->hasReservedCallFrame(Fn) || RegInfo->hasFP(Fn)) 1111 RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); 1112 } 1113 1114 // Now figure out which *callee saved* registers are modified by the current 1115 // function, thus needing to be saved and restored in the prolog/epilog. 1116 // 1117 const TargetRegisterClass* const *CSRegClasses = 1118 RegInfo->getCalleeSavedRegClasses(&Fn); 1119 std::vector<CalleeSavedInfo> CSI; 1120 for (unsigned i = 0; CSRegs[i]; ++i) { 1121 unsigned Reg = CSRegs[i]; 1122 if (Fn.getRegInfo().isPhysRegUsed(Reg)) { 1123 // If the reg is modified, save it! 1124 CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i])); 1125 } else { 1126 for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg); 1127 *AliasSet; ++AliasSet) { // Check alias registers too. 1128 if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) { 1129 CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i])); 1130 break; 1131 } 1132 } 1133 } 1134 } 1135 1136 if (CSI.empty()) 1137 return; // Early exit if no callee saved registers are modified! 1138 1139 unsigned NumFixedSpillSlots; 1140 const std::pair<unsigned,int> *FixedSpillSlots = 1141 TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); 1142 1143 // Now that we know which registers need to be saved and restored, allocate 1144 // stack slots for them. 1145 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1146 unsigned Reg = CSI[i].getReg(); 1147 const TargetRegisterClass *RC = CSI[i].getRegClass(); 1148 1149 // Check to see if this physreg must be spilled to a particular stack slot 1150 // on this target. 1151 const std::pair<unsigned,int> *FixedSlot = FixedSpillSlots; 1152 while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots && 1153 FixedSlot->first != Reg) 1154 ++FixedSlot; 1155 1156 int FrameIdx; 1157 if (FixedSlot == FixedSpillSlots+NumFixedSpillSlots) { 1158 // Nope, just spill it anywhere convenient. 1159 unsigned Align = RC->getAlignment(); 1160 unsigned StackAlign = TFI->getStackAlignment(); 1161 // We may not be able to sastify the desired alignment specification of 1162 // the TargetRegisterClass if the stack alignment is smaller. 1163 // Use the min. 1164 Align = std::min(Align, StackAlign); 1165 FrameIdx = FFI->CreateStackObject(RC->getSize(), Align); 1166 if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; 1167 if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; 1168 } else { 1169 // Spill it to the stack where we must. 1170 FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->second); 1171 } 1172 CSI[i].setFrameIdx(FrameIdx); 1173 } 1174 1175 FFI->setCalleeSavedInfo(CSI); 1176} 1177 1178/// insertCSRSpillsAndRestores - Insert spill and restore code for 1179/// callee saved registers used in the function, handling shrink wrapping. 1180/// 1181void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { 1182 // Get callee saved register information. 1183 MachineFrameInfo *FFI = Fn.getFrameInfo(); 1184 const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo(); 1185 1186 // Early exit if no callee saved registers are modified! 1187 if (CSI.empty()) 1188 return; 1189 1190 const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); 1191 MachineBasicBlock::iterator I; 1192 1193 DEBUG(if (ShrinkWrapThisFunction && ShrinkWrapDebugging >= Details) 1194 DOUT << "Inserting CSR spills/restores in function " 1195 << Fn.getFunction()->getName() << "\n"); 1196 1197 if (! ShrinkWrapThisFunction) { 1198 // Spill using target interface. 1199 I = EntryBlock->begin(); 1200 if (!TII.spillCalleeSavedRegisters(*EntryBlock, I, CSI)) { 1201 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1202 // Add the callee-saved register as live-in. 1203 // It's killed at the spill. 1204 EntryBlock->addLiveIn(CSI[i].getReg()); 1205 1206 // Insert the spill to the stack frame. 1207 TII.storeRegToStackSlot(*EntryBlock, I, CSI[i].getReg(), true, 1208 CSI[i].getFrameIdx(), CSI[i].getRegClass()); 1209 } 1210 } 1211 1212 // Restore using target interface. 1213 for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) { 1214 MachineBasicBlock* MBB = ReturnBlocks[ri]; 1215 I = MBB->end(); --I; 1216 1217 // Skip over all terminator instructions, which are part of the return 1218 // sequence. 1219 MachineBasicBlock::iterator I2 = I; 1220 while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator()) 1221 I = I2; 1222 1223 bool AtStart = I == MBB->begin(); 1224 MachineBasicBlock::iterator BeforeI = I; 1225 if (!AtStart) 1226 --BeforeI; 1227 1228 // Restore all registers immediately before the return and any 1229 // terminators that preceed it. 1230 if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI)) { 1231 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1232 TII.loadRegFromStackSlot(*MBB, I, CSI[i].getReg(), 1233 CSI[i].getFrameIdx(), 1234 CSI[i].getRegClass()); 1235 assert(I != MBB->begin() && 1236 "loadRegFromStackSlot didn't insert any code!"); 1237 // Insert in reverse order. loadRegFromStackSlot can insert 1238 // multiple instructions. 1239 if (AtStart) 1240 I = MBB->begin(); 1241 else { 1242 I = BeforeI; 1243 ++I; 1244 } 1245 } 1246 } 1247 } 1248 return; 1249 } 1250 1251 // Insert spills. 1252 std::vector<CalleeSavedInfo> blockCSI; 1253 for (CSRegBlockMap::iterator BI = CSRSave.begin(), 1254 BE = CSRSave.end(); BI != BE; ++BI) { 1255 MachineBasicBlock* MBB = BI->first; 1256 CSRegSet save = BI->second; 1257 1258 if (save.empty()) 1259 continue; 1260 1261 DEBUG(if (ShrinkWrapDebugging >= Details) 1262 DOUT << "Spilling " << stringifyCSRegSet(save) 1263 << " in " << getBasicBlockName(MBB) << "\n"); 1264 1265 blockCSI.clear(); 1266 for (CSRegSet::iterator RI = save.begin(), 1267 RE = save.end(); RI != RE; ++RI) { 1268 blockCSI.push_back(CSI[*RI]); 1269 } 1270 assert(blockCSI.size() > 0 && 1271 "Could not collect callee saved register info"); 1272 1273 I = MBB->begin(); 1274 1275 // When shrink wrapping, use stack slot stores/loads. 1276 for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { 1277 // Add the callee-saved register as live-in. 1278 // It's killed at the spill. 1279 MBB->addLiveIn(blockCSI[i].getReg()); 1280 1281 // Insert the spill to the stack frame. 1282 TII.storeRegToStackSlot(*MBB, I, blockCSI[i].getReg(), 1283 true, 1284 blockCSI[i].getFrameIdx(), 1285 blockCSI[i].getRegClass()); 1286 } 1287 } 1288 1289 DEBUG(if (ShrinkWrapDebugging >= Details) 1290 DOUT << "------------------------------" 1291 << "-----------------------------\n"); 1292 1293 for (CSRegBlockMap::iterator BI = CSRRestore.begin(), 1294 BE = CSRRestore.end(); BI != BE; ++BI) { 1295 MachineBasicBlock* MBB = BI->first; 1296 CSRegSet restore = BI->second; 1297 1298 if (restore.empty()) 1299 continue; 1300 1301 DEBUG(if (ShrinkWrapDebugging >= Details) 1302 DOUT << "Restoring " << stringifyCSRegSet(restore) 1303 << " in " << getBasicBlockName(MBB) << "\n"); 1304 1305 blockCSI.clear(); 1306 for (CSRegSet::iterator RI = restore.begin(), 1307 RE = restore.end(); RI != RE; ++RI) { 1308 blockCSI.push_back(CSI[*RI]); 1309 } 1310 assert(blockCSI.size() > 0 && 1311 "Could not find callee saved register info"); 1312 1313 // If MBB is empty and needs restores, insert at the _beginning_. 1314 if (MBB->empty()) { 1315 I = MBB->begin(); 1316 } else { 1317 I = MBB->end(); 1318 --I; 1319 1320 // Skip over all terminator instructions, which are part of the 1321 // return sequence. 1322 if (! I->getDesc().isTerminator()) { 1323 ++I; 1324 } else { 1325 MachineBasicBlock::iterator I2 = I; 1326 while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator()) 1327 I = I2; 1328 } 1329 } 1330 1331 bool AtStart = I == MBB->begin(); 1332 MachineBasicBlock::iterator BeforeI = I; 1333 if (!AtStart) 1334 --BeforeI; 1335 1336 // Restore all registers immediately before the return and any 1337 // terminators that preceed it. 1338 for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { 1339 TII.loadRegFromStackSlot(*MBB, I, blockCSI[i].getReg(), 1340 blockCSI[i].getFrameIdx(), 1341 blockCSI[i].getRegClass()); 1342 assert(I != MBB->begin() && 1343 "loadRegFromStackSlot didn't insert any code!"); 1344 // Insert in reverse order. loadRegFromStackSlot can insert 1345 // multiple instructions. 1346 if (AtStart) 1347 I = MBB->begin(); 1348 else { 1349 I = BeforeI; 1350 ++I; 1351 } 1352 } 1353 } 1354 1355 DEBUG(if (ShrinkWrapDebugging >= Details) 1356 DOUT << "------------------------------" 1357 << "-----------------------------\n"); 1358} 1359 1360/// AdjustStackOffset - Helper function used to adjust the stack frame offset. 1361static inline void 1362AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx, 1363 bool StackGrowsDown, int64_t &Offset, 1364 unsigned &MaxAlign) { 1365 // If stack grows down, we need to add size of find the lowest address of the 1366 // object. 1367 if (StackGrowsDown) 1368 Offset += FFI->getObjectSize(FrameIdx); 1369 1370 unsigned Align = FFI->getObjectAlignment(FrameIdx); 1371 1372 // If the alignment of this object is greater than that of the stack, then 1373 // increase the stack alignment to match. 1374 MaxAlign = std::max(MaxAlign, Align); 1375 1376 // Adjust to alignment boundary. 1377 Offset = (Offset + Align - 1) / Align * Align; 1378 1379 if (StackGrowsDown) { 1380 FFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset 1381 } else { 1382 FFI->setObjectOffset(FrameIdx, Offset); 1383 Offset += FFI->getObjectSize(FrameIdx); 1384 } 1385} 1386 1387/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the 1388/// abstract stack objects. 1389/// 1390void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { 1391 const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo(); 1392 1393 bool StackGrowsDown = 1394 TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; 1395 1396 // Loop over all of the stack objects, assigning sequential addresses... 1397 MachineFrameInfo *FFI = Fn.getFrameInfo(); 1398 1399 unsigned MaxAlign = FFI->getMaxAlignment(); 1400 1401 // Start at the beginning of the local area. 1402 // The Offset is the distance from the stack top in the direction 1403 // of stack growth -- so it's always nonnegative. 1404 int64_t Offset = TFI.getOffsetOfLocalArea(); 1405 if (StackGrowsDown) 1406 Offset = -Offset; 1407 assert(Offset >= 0 1408 && "Local area offset should be in direction of stack growth"); 1409 1410 // If there are fixed sized objects that are preallocated in the local area, 1411 // non-fixed objects can't be allocated right at the start of local area. 1412 // We currently don't support filling in holes in between fixed sized 1413 // objects, so we adjust 'Offset' to point to the end of last fixed sized 1414 // preallocated object. 1415 for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { 1416 int64_t FixedOff; 1417 if (StackGrowsDown) { 1418 // The maximum distance from the stack pointer is at lower address of 1419 // the object -- which is given by offset. For down growing stack 1420 // the offset is negative, so we negate the offset to get the distance. 1421 FixedOff = -FFI->getObjectOffset(i); 1422 } else { 1423 // The maximum distance from the start pointer is at the upper 1424 // address of the object. 1425 FixedOff = FFI->getObjectOffset(i) + FFI->getObjectSize(i); 1426 } 1427 if (FixedOff > Offset) Offset = FixedOff; 1428 } 1429 1430 // First assign frame offsets to stack objects that are used to spill 1431 // callee saved registers. 1432 if (StackGrowsDown) { 1433 for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { 1434 // If stack grows down, we need to add size of find the lowest 1435 // address of the object. 1436 Offset += FFI->getObjectSize(i); 1437 1438 unsigned Align = FFI->getObjectAlignment(i); 1439 // If the alignment of this object is greater than that of the stack, 1440 // then increase the stack alignment to match. 1441 MaxAlign = std::max(MaxAlign, Align); 1442 // Adjust to alignment boundary 1443 Offset = (Offset+Align-1)/Align*Align; 1444 1445 FFI->setObjectOffset(i, -Offset); // Set the computed offset 1446 } 1447 } else { 1448 int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex; 1449 for (int i = MaxCSFI; i >= MinCSFI ; --i) { 1450 unsigned Align = FFI->getObjectAlignment(i); 1451 // If the alignment of this object is greater than that of the stack, 1452 // then increase the stack alignment to match. 1453 MaxAlign = std::max(MaxAlign, Align); 1454 // Adjust to alignment boundary 1455 Offset = (Offset+Align-1)/Align*Align; 1456 1457 FFI->setObjectOffset(i, Offset); 1458 Offset += FFI->getObjectSize(i); 1459 } 1460 } 1461 1462 // Make sure the special register scavenging spill slot is closest to the 1463 // frame pointer if a frame pointer is required. 1464 const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); 1465 if (RS && RegInfo->hasFP(Fn)) { 1466 int SFI = RS->getScavengingFrameIndex(); 1467 if (SFI >= 0) 1468 AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign); 1469 } 1470 1471 // Make sure that the stack protector comes before the local variables on the 1472 // stack. 1473 if (FFI->getStackProtectorIndex() >= 0) 1474 AdjustStackOffset(FFI, FFI->getStackProtectorIndex(), StackGrowsDown, 1475 Offset, MaxAlign); 1476 1477 // Then assign frame offsets to stack objects that are not used to spill 1478 // callee saved registers. 1479 for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { 1480 if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) 1481 continue; 1482 if (RS && (int)i == RS->getScavengingFrameIndex()) 1483 continue; 1484 if (FFI->isDeadObjectIndex(i)) 1485 continue; 1486 if (FFI->getStackProtectorIndex() == (int)i) 1487 continue; 1488 1489 AdjustStackOffset(FFI, i, StackGrowsDown, Offset, MaxAlign); 1490 } 1491 1492 // Make sure the special register scavenging spill slot is closest to the 1493 // stack pointer. 1494 if (RS && !RegInfo->hasFP(Fn)) { 1495 int SFI = RS->getScavengingFrameIndex(); 1496 if (SFI >= 0) 1497 AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign); 1498 } 1499 1500 // Round up the size to a multiple of the alignment, but only if there are 1501 // calls or alloca's in the function. This ensures that any calls to 1502 // subroutines have their stack frames suitable aligned. 1503 // Also do this if we need runtime alignment of the stack. In this case 1504 // offsets will be relative to SP not FP; round up the stack size so this 1505 // works. 1506 if (!RegInfo->targetHandlesStackFrameRounding() && 1507 (FFI->hasCalls() || FFI->hasVarSizedObjects() || 1508 (RegInfo->needsStackRealignment(Fn) && 1509 FFI->getObjectIndexEnd() != 0))) { 1510 // If we have reserved argument space for call sites in the function 1511 // immediately on entry to the current function, count it as part of the 1512 // overall stack size. 1513 if (RegInfo->hasReservedCallFrame(Fn)) 1514 Offset += FFI->getMaxCallFrameSize(); 1515 1516 unsigned AlignMask = std::max(TFI.getStackAlignment(),MaxAlign) - 1; 1517 Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); 1518 } 1519 1520 // Update frame info to pretend that this is part of the stack... 1521 FFI->setStackSize(Offset+TFI.getOffsetOfLocalArea()); 1522 1523 // Remember the required stack alignment in case targets need it to perform 1524 // dynamic stack alignment. 1525 FFI->setMaxAlignment(MaxAlign); 1526} 1527 1528 1529/// insertPrologEpilogCode - Scan the function for modified callee saved 1530/// registers, insert spill code for these callee saved registers, then add 1531/// prolog and epilog code to the function. 1532/// 1533void PEI::insertPrologEpilogCode(MachineFunction &Fn) { 1534 const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); 1535 1536 // Add prologue to the function... 1537 TRI->emitPrologue(Fn); 1538 1539 // Add epilogue to restore the callee-save registers in each exiting block 1540 for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { 1541 // If last instruction is a return instruction, add an epilogue 1542 if (!I->empty() && I->back().getDesc().isReturn()) 1543 TRI->emitEpilogue(Fn, *I); 1544 } 1545} 1546 1547 1548/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical 1549/// register references and actual offsets. 1550/// 1551void PEI::replaceFrameIndices(MachineFunction &Fn) { 1552 if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do? 1553 1554 const TargetMachine &TM = Fn.getTarget(); 1555 assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!"); 1556 const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); 1557 const TargetFrameInfo *TFI = TM.getFrameInfo(); 1558 bool StackGrowsDown = 1559 TFI->getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; 1560 int FrameSetupOpcode = TRI.getCallFrameSetupOpcode(); 1561 int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode(); 1562 1563 for (MachineFunction::iterator BB = Fn.begin(), 1564 E = Fn.end(); BB != E; ++BB) { 1565 int SPAdj = 0; // SP offset due to call frame setup / destroy. 1566 if (RS) RS->enterBasicBlock(BB); 1567 1568 for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { 1569 if (I->getOpcode() == TargetInstrInfo::DECLARE) { 1570 // Ignore it. 1571 ++I; 1572 continue; 1573 } 1574 1575 if (I->getOpcode() == FrameSetupOpcode || 1576 I->getOpcode() == FrameDestroyOpcode) { 1577 // Remember how much SP has been adjusted to create the call 1578 // frame. 1579 int Size = I->getOperand(0).getImm(); 1580 1581 if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) || 1582 (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode)) 1583 Size = -Size; 1584 1585 SPAdj += Size; 1586 1587 MachineBasicBlock::iterator PrevI = BB->end(); 1588 if (I != BB->begin()) PrevI = prior(I); 1589 TRI.eliminateCallFramePseudoInstr(Fn, *BB, I); 1590 1591 // Visit the instructions created by eliminateCallFramePseudoInstr(). 1592 if (PrevI == BB->end()) 1593 I = BB->begin(); // The replaced instr was the first in the block. 1594 else 1595 I = next(PrevI); 1596 continue; 1597 } 1598 1599 MachineInstr *MI = I; 1600 bool DoIncr = true; 1601 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) 1602 if (MI->getOperand(i).isFI()) { 1603 // Some instructions (e.g. inline asm instructions) can have 1604 // multiple frame indices and/or cause eliminateFrameIndex 1605 // to insert more than one instruction. We need the register 1606 // scavenger to go through all of these instructions so that 1607 // it can update its register information. We keep the 1608 // iterator at the point before insertion so that we can 1609 // revisit them in full. 1610 bool AtBeginning = (I == BB->begin()); 1611 if (!AtBeginning) --I; 1612 1613 // If this instruction has a FrameIndex operand, we need to 1614 // use that target machine register info object to eliminate 1615 // it. 1616 1617 TRI.eliminateFrameIndex(MI, SPAdj, RS); 1618 1619 // Reset the iterator if we were at the beginning of the BB. 1620 if (AtBeginning) { 1621 I = BB->begin(); 1622 DoIncr = false; 1623 } 1624 1625 MI = 0; 1626 break; 1627 } 1628 1629 if (DoIncr && I != BB->end()) ++I; 1630 1631 // Update register states. 1632 if (RS && MI) RS->forward(MI); 1633 } 1634 1635 assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?"); 1636 } 1637} 1638 1639// Debugging methods for shrink wrapping. 1640#ifndef NDEBUG 1641/// findFastExitPath - debugging method used to detect functions 1642/// with at least one path from the entry block to a return block 1643/// directly or which has a very small number of edges. 1644/// 1645void PEI::findFastExitPath() { 1646 if (! EntryBlock) 1647 return; 1648 // Fina a path from EntryBlock to any return block that does not branch: 1649 // Entry 1650 // | ... 1651 // v | 1652 // B1<-----+ 1653 // | 1654 // v 1655 // Return 1656 for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), 1657 SE = EntryBlock->succ_end(); SI != SE; ++SI) { 1658 MachineBasicBlock* SUCC = *SI; 1659 1660 // Assume positive, disprove existence of fast path. 1661#ifndef NDEBUG 1662 HasFastExitPath = true; 1663#endif 1664 1665 // Check the immediate successors. 1666 if (isReturnBlock(SUCC)) { 1667 if (ShrinkWrapDebugging >= BasicInfo) 1668 DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock) 1669 << "->" << getBasicBlockName(SUCC) << "\n"; 1670 break; 1671 } 1672 // Traverse df from SUCC, look for a branch block. 1673 std::string exitPath = getBasicBlockName(SUCC); 1674 for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC), 1675 BE = df_end(SUCC); BI != BE; ++BI) { 1676 MachineBasicBlock* SBB = *BI; 1677 // Reject paths with branch nodes. 1678 if (SBB->succ_size() > 1) { 1679#ifndef NDEBUG 1680 HasFastExitPath = false; 1681#endif 1682 break; 1683 } 1684 exitPath += "->" + getBasicBlockName(SBB); 1685 } 1686#ifndef NDEBUG 1687 if (HasFastExitPath) { 1688 if (ShrinkWrapDebugging >= BasicInfo) 1689 DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock) 1690 << "->" << exitPath << "\n"; 1691 break; 1692 } 1693#endif 1694 } 1695} 1696 1697/// verifySpillRestorePlacement - check the current spill/restore 1698/// sets for safety. Attempt to find spills without restores or 1699/// restores without spills. 1700/// Spills: walk df from each MBB in spill set ensuring that 1701/// all CSRs spilled at MMBB are restored on all paths 1702/// from MBB to all exit blocks. 1703/// Restores: walk idf from each MBB in restore set ensuring that 1704/// all CSRs restored at MBB are spilled on all paths 1705/// reaching MBB. 1706/// 1707void PEI::verifySpillRestorePlacement() { 1708 unsigned numReturnBlocks = 0; 1709 for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); 1710 MBBI != MBBE; ++MBBI) { 1711 MachineBasicBlock* MBB = MBBI; 1712 if (isReturnBlock(MBB) || MBB->succ_size() == 0) 1713 ++numReturnBlocks; 1714 } 1715 for (CSRegBlockMap::iterator BI = CSRSave.begin(), 1716 BE = CSRSave.end(); BI != BE; ++BI) { 1717 MachineBasicBlock* MBB = BI->first; 1718 CSRegSet spilled = BI->second; 1719 CSRegSet restored; 1720 1721 if (spilled.empty()) 1722 continue; 1723 1724 DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " 1725 << stringifyCSRegSet(spilled) 1726 << " RESTORE[" << getBasicBlockName(MBB) << "] = " 1727 << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; 1728 1729 if (CSRRestore[MBB].intersects(spilled)) { 1730 restored |= (CSRRestore[MBB] & spilled); 1731 } 1732 1733 // Walk depth first from MBB to find restores of all CSRs spilled at MBB: 1734 // we must find restores for all spills w/no intervening spills on all 1735 // paths from MBB to all return blocks. 1736 for (df_iterator<MachineBasicBlock*> BI = df_begin(MBB), 1737 BE = df_end(MBB); BI != BE; ++BI) { 1738 MachineBasicBlock* SBB = *BI; 1739 if (SBB == MBB) 1740 continue; 1741 // Stop when we encounter spills of any CSRs spilled at MBB that 1742 // have not yet been seen to be restored. 1743 if (CSRSave[SBB].intersects(spilled) && 1744 !restored.contains(CSRSave[SBB] & spilled)) 1745 break; 1746 // Collect the CSRs spilled at MBB that are restored 1747 // at this DF successor of MBB. 1748 if (CSRRestore[SBB].intersects(spilled)) 1749 restored |= (CSRRestore[SBB] & spilled); 1750 // If we are at a retun block, check that the restores 1751 // we have seen so far exhaust the spills at MBB, then 1752 // reset the restores. 1753 if (isReturnBlock(SBB) || SBB->succ_size() == 0) { 1754 if (restored != spilled) { 1755 CSRegSet notRestored = (spilled - restored); 1756 DOUT << MF->getFunction()->getName() << ": " 1757 << stringifyCSRegSet(notRestored) 1758 << " spilled at " << getBasicBlockName(MBB) 1759 << " are never restored on path to return " 1760 << getBasicBlockName(SBB) << "\n"; 1761 } 1762 restored.clear(); 1763 } 1764 } 1765 } 1766 1767 // Check restore placements. 1768 for (CSRegBlockMap::iterator BI = CSRRestore.begin(), 1769 BE = CSRRestore.end(); BI != BE; ++BI) { 1770 MachineBasicBlock* MBB = BI->first; 1771 CSRegSet restored = BI->second; 1772 CSRegSet spilled; 1773 1774 if (restored.empty()) 1775 continue; 1776 1777 DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " 1778 << stringifyCSRegSet(CSRSave[MBB]) 1779 << " RESTORE[" << getBasicBlockName(MBB) << "] = " 1780 << stringifyCSRegSet(restored) << "\n"; 1781 1782 if (CSRSave[MBB].intersects(restored)) { 1783 spilled |= (CSRSave[MBB] & restored); 1784 } 1785 // Walk inverse depth first from MBB to find spills of all 1786 // CSRs restored at MBB: 1787 for (idf_iterator<MachineBasicBlock*> BI = idf_begin(MBB), 1788 BE = idf_end(MBB); BI != BE; ++BI) { 1789 MachineBasicBlock* PBB = *BI; 1790 if (PBB == MBB) 1791 continue; 1792 // Stop when we encounter restores of any CSRs restored at MBB that 1793 // have not yet been seen to be spilled. 1794 if (CSRRestore[PBB].intersects(restored) && 1795 !spilled.contains(CSRRestore[PBB] & restored)) 1796 break; 1797 // Collect the CSRs restored at MBB that are spilled 1798 // at this DF predecessor of MBB. 1799 if (CSRSave[PBB].intersects(restored)) 1800 spilled |= (CSRSave[PBB] & restored); 1801 } 1802 if (spilled != restored) { 1803 CSRegSet notSpilled = (restored - spilled); 1804 DOUT << MF->getFunction()->getName() << ": " 1805 << stringifyCSRegSet(notSpilled) 1806 << " restored at " << getBasicBlockName(MBB) 1807 << " are never spilled\n"; 1808 } 1809 } 1810} 1811 1812// Debugging print methods. 1813std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) { 1814 std::ostringstream name; 1815 if (MBB) { 1816 if (MBB->getBasicBlock()) 1817 name << MBB->getBasicBlock()->getName(); 1818 else 1819 name << "_MBB_" << MBB->getNumber(); 1820 } 1821 return name.str(); 1822} 1823 1824std::string PEI::stringifyCSRegSet(const CSRegSet& s) { 1825 const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo(); 1826 const std::vector<CalleeSavedInfo> CSI = 1827 MF->getFrameInfo()->getCalleeSavedInfo(); 1828 1829 std::ostringstream srep; 1830 if (CSI.size() == 0) { 1831 srep << "[]"; 1832 return srep.str(); 1833 } 1834 srep << "["; 1835 CSRegSet::iterator I = s.begin(), E = s.end(); 1836 if (I != E) { 1837 unsigned reg = CSI[*I].getReg(); 1838 srep << TRI->getName(reg); 1839 for (++I; I != E; ++I) { 1840 reg = CSI[*I].getReg(); 1841 srep << ","; 1842 srep << TRI->getName(reg); 1843 } 1844 } 1845 srep << "]"; 1846 return srep.str(); 1847} 1848 1849void PEI::dumpSet(const CSRegSet& s) { 1850 DOUT << stringifyCSRegSet(s) << "\n"; 1851} 1852 1853void PEI::dumpUsed(MachineBasicBlock* MBB) { 1854 if (MBB) { 1855 DOUT << "CSRUsed[" << getBasicBlockName(MBB) << "] = " 1856 << stringifyCSRegSet(CSRUsed[MBB]) << "\n"; 1857 } 1858} 1859 1860void PEI::dumpAllUsed() { 1861 for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); 1862 MBBI != MBBE; ++MBBI) { 1863 MachineBasicBlock* MBB = MBBI; 1864 dumpUsed(MBB); 1865 } 1866} 1867 1868void PEI::dumpSets(MachineBasicBlock* MBB) { 1869 if (MBB) { 1870 DOUT << getBasicBlockName(MBB) << " | " 1871 << stringifyCSRegSet(CSRUsed[MBB]) << " | " 1872 << stringifyCSRegSet(AnticIn[MBB]) << " | " 1873 << stringifyCSRegSet(AnticOut[MBB]) << " | " 1874 << stringifyCSRegSet(AvailIn[MBB]) << " | " 1875 << stringifyCSRegSet(AvailOut[MBB]) << "\n"; 1876 } 1877} 1878 1879void PEI::dumpSets1(MachineBasicBlock* MBB) { 1880 if (MBB) { 1881 DOUT << getBasicBlockName(MBB) << " | " 1882 << stringifyCSRegSet(CSRUsed[MBB]) << " | " 1883 << stringifyCSRegSet(AnticIn[MBB]) << " | " 1884 << stringifyCSRegSet(AnticOut[MBB]) << " | " 1885 << stringifyCSRegSet(AvailIn[MBB]) << " | " 1886 << stringifyCSRegSet(AvailOut[MBB]) << " | " 1887 << stringifyCSRegSet(CSRSave[MBB]) << " | " 1888 << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; 1889 } 1890} 1891 1892void PEI::dumpAllSets() { 1893 for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); 1894 MBBI != MBBE; ++MBBI) { 1895 MachineBasicBlock* MBB = MBBI; 1896 dumpSets1(MBB); 1897 } 1898} 1899 1900void PEI::dumpSRSets() { 1901 for (MachineFunction::iterator MBB = MF->begin(), E = MF->end(); 1902 MBB != E; ++MBB) { 1903 if (! CSRSave[MBB].empty()) { 1904 DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " 1905 << stringifyCSRegSet(CSRSave[MBB]); 1906 if (CSRRestore[MBB].empty()) 1907 DOUT << "\n"; 1908 } 1909 if (! CSRRestore[MBB].empty()) { 1910 if (! CSRSave[MBB].empty()) 1911 DOUT << " "; 1912 DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = " 1913 << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; 1914 } 1915 } 1916} 1917#endif 1918