PrologEpilogInserter.cpp revision b9cfbd94abb23ec8646b9b10aa4ac3d1cbf4461e
1//===-- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass is responsible for finalizing the functions frame layout, saving 11// callee saved registers, and for emitting prolog & epilog code for the 12// function. 13// 14// This pass must be run after register allocation. After this pass is 15// executed, it is illegal to construct MO_FrameIndex operands. 16// 17// This pass implements a shrink wrapping variant of prolog/epilog insertion: 18// - Places callee saved register (CSR) spills and restores in the CFG to 19// tightly surround uses so that execution paths that do not use CSRs do not 20// pay the spill/restore penalty. 21// 22// - Avoiding placment of spills/restores in loops: if a CSR is used inside a 23// loop(nest), the spills are placed in the loop preheader, and restores are 24// placed in the loop exit nodes (the successors of the loop _exiting_ nodes). 25// 26// - Covering paths without CSR uses: e.g. if a restore is placed in a join 27// block, a matching spill is added to the end of all immediate predecessor 28// blocks that are not reached by a spill. Similarly for saves placed in 29// branch blocks. 30// 31// Shrink wrapping uses an analysis similar to the one in GVNPRE to determine 32// which basic blocks require callee-saved register save/restore code. 33// 34// This pass uses MachineDominators and MachineLoopInfo. Loop information 35// is used to prevent shrink wrapping of callee-saved register save/restore 36// code into loops. 37// 38//===----------------------------------------------------------------------===// 39 40#define DEBUG_TYPE "shrink-wrap" 41 42#include "llvm/CodeGen/Passes.h" 43#include "llvm/CodeGen/MachineDominators.h" 44#include "llvm/CodeGen/MachineLoopInfo.h" 45#include "llvm/CodeGen/MachineFunctionPass.h" 46#include "llvm/CodeGen/MachineInstr.h" 47#include "llvm/CodeGen/MachineFrameInfo.h" 48#include "llvm/CodeGen/MachineModuleInfo.h" 49#include "llvm/CodeGen/MachineRegisterInfo.h" 50#include "llvm/CodeGen/RegisterScavenging.h" 51#include "llvm/Target/TargetMachine.h" 52#include "llvm/Target/TargetRegisterInfo.h" 53#include "llvm/Target/TargetFrameInfo.h" 54#include "llvm/Target/TargetInstrInfo.h" 55#include "llvm/ADT/SparseBitVector.h" 56#include "llvm/ADT/DenseMap.h" 57#include "llvm/ADT/PostOrderIterator.h" 58#include "llvm/ADT/Statistic.h" 59#include "llvm/Support/CommandLine.h" 60#include "llvm/Support/Compiler.h" 61#include "llvm/Support/Debug.h" 62#include "llvm/ADT/STLExtras.h" 63#include "llvm/ADT/Statistic.h" 64#include <climits> 65#include <sstream> 66 67using namespace llvm; 68 69STATISTIC(numSRReduced, "Number of CSR spills+restores reduced."); 70 71// Shrink Wrapping: 72static cl::opt<bool> 73ShrinkWrapping("shrink-wrap", 74 cl::desc("Shrink wrap callee-saved register spills/restores")); 75 76// Shrink wrap only the specified function, a debugging aid. 77static cl::opt<std::string> 78ShrinkWrapFunc("shrink-wrap-func", cl::Hidden, 79 cl::desc("Shrink wrap the specified function"), 80 cl::value_desc("funcname"), 81 cl::init("")); 82 83// Debugging level for shrink wrapping. 84enum ShrinkWrapDebugLevel { 85 None, BasicInfo, Iterations, Details 86}; 87 88static cl::opt<enum ShrinkWrapDebugLevel> 89ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden, 90 cl::desc("Print shrink wrapping debugging information"), 91 cl::values( 92 clEnumVal(None , "disable debug output"), 93 clEnumVal(BasicInfo , "print basic DF sets"), 94 clEnumVal(Iterations, "print SR sets for each iteration"), 95 clEnumVal(Details , "print all DF sets"), 96 clEnumValEnd)); 97 98 99namespace { 100 struct VISIBILITY_HIDDEN PEI : public MachineFunctionPass { 101 static char ID; 102 PEI() : MachineFunctionPass(&ID) {} 103 104 const char *getPassName() const { 105 return "Prolog/Epilog Insertion & Frame Finalization"; 106 } 107 108 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 109 AU.setPreservesCFG(); 110 if (ShrinkWrapping || ShrinkWrapFunc != "") { 111 AU.addRequired<MachineLoopInfo>(); 112 AU.addRequired<MachineDominatorTree>(); 113 } 114 AU.addPreserved<MachineLoopInfo>(); 115 AU.addPreserved<MachineDominatorTree>(); 116 MachineFunctionPass::getAnalysisUsage(AU); 117 } 118 119 /// runOnMachineFunction - Insert prolog/epilog code and replace abstract 120 /// frame indexes with appropriate references. 121 /// 122 bool runOnMachineFunction(MachineFunction &Fn) { 123 const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); 124 RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL; 125 126 DEBUG(MF = &Fn); 127 128 // Get MachineModuleInfo so that we can track the construction of the 129 // frame. 130 if (MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>()) 131 Fn.getFrameInfo()->setMachineModuleInfo(MMI); 132 133 // Allow the target machine to make some adjustments to the function 134 // e.g. UsedPhysRegs before calculateCalleeSavedRegisters. 135 TRI->processFunctionBeforeCalleeSavedScan(Fn, RS); 136 137 // Scan the function for modified callee saved registers and insert spill 138 // code for any callee saved registers that are modified. Also calculate 139 // the MaxCallFrameSize and HasCalls variables for the function's frame 140 // information and eliminates call frame pseudo instructions. 141 calculateCalleeSavedRegisters(Fn); 142 143 // Determine placement of CSR spill/restore code: 144 // - with shrink wrapping, place spills and restores to tightly 145 // enclose regions in the Machine CFG of the function where 146 // they are used. Without shrink wrapping 147 // - default (no shrink wrapping), place all spills in the 148 // entry block, all restores in return blocks. 149 placeCSRSpillsAndRestores(Fn); 150 151 // Add the code to save and restore the callee saved registers 152 insertCSRSpillsAndRestores(Fn); 153 154 // Allow the target machine to make final modifications to the function 155 // before the frame layout is finalized. 156 TRI->processFunctionBeforeFrameFinalized(Fn); 157 158 // Calculate actual frame offsets for all abstract stack objects... 159 calculateFrameObjectOffsets(Fn); 160 161 // Add prolog and epilog code to the function. This function is required 162 // to align the stack frame as necessary for any stack variables or 163 // called functions. Because of this, calculateCalleeSavedRegisters 164 // must be called before this function in order to set the HasCalls 165 // and MaxCallFrameSize variables. 166 insertPrologEpilogCode(Fn); 167 168 // Replace all MO_FrameIndex operands with physical register references 169 // and actual offsets. 170 // 171 replaceFrameIndices(Fn); 172 173 delete RS; 174 clearAllSets(); 175 return true; 176 } 177 178 private: 179 RegScavenger *RS; 180 181 // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved 182 // stack frame indexes. 183 unsigned MinCSFrameIndex, MaxCSFrameIndex; 184 185 // Analysis info for spill/restore placement. 186 // "CSR": "callee saved register". 187 188 // CSRegSet contains indices into the Callee Saved Register Info 189 // vector built by calculateCalleeSavedRegisters() and accessed 190 // via MF.getFrameInfo()->getCalleeSavedInfo(). 191 typedef SparseBitVector<> CSRegSet; 192 193 // CSRegBlockMap maps MachineBasicBlocks to sets of callee 194 // saved register indices. 195 typedef DenseMap<MachineBasicBlock*, CSRegSet> CSRegBlockMap; 196 197 // Set and maps for computing CSR spill/restore placement: 198 // used in function (UsedCSRegs) 199 // used in a basic block (CSRUsed) 200 // anticipatable in a basic block (Antic{In,Out}) 201 // available in a basic block (Avail{In,Out}) 202 // to be spilled at the entry to a basic block (CSRSave) 203 // to be restored at the end of a basic block (CSRRestore) 204 CSRegSet UsedCSRegs; 205 CSRegBlockMap CSRUsed; 206 CSRegBlockMap AnticIn, AnticOut; 207 CSRegBlockMap AvailIn, AvailOut; 208 CSRegBlockMap CSRSave; 209 CSRegBlockMap CSRRestore; 210 211 // Entry and return blocks of the current function. 212 MachineBasicBlock* EntryBlock; 213 SmallVector<MachineBasicBlock*, 4> ReturnBlocks; 214 215 // Map of MBBs to top level MachineLoops. 216 DenseMap<MachineBasicBlock*, MachineLoop*> TLLoops; 217 218 // Flag to control shrink wrapping per-function: 219 // may choose to skip shrink wrapping for certain 220 // functions. 221 bool ShrinkWrapThisFunction; 222 223#ifndef NDEBUG 224 // Machine function handle. 225 MachineFunction* MF; 226 227 // Flag indicating that the current function 228 // has at least one "short" path in the machine 229 // CFG from the entry block to an exit block. 230 bool HasFastExitPath; 231#endif 232 233 bool calculateSets(MachineFunction &Fn); 234 bool calcAnticInOut(MachineBasicBlock* MBB); 235 bool calcAvailInOut(MachineBasicBlock* MBB); 236 void calculateAnticAvail(MachineFunction &Fn); 237 bool addUsesForMEMERegion(MachineBasicBlock* MBB, 238 SmallVector<MachineBasicBlock*, 4>& blks); 239 bool addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks); 240 bool calcSpillPlacements(MachineBasicBlock* MBB, 241 SmallVector<MachineBasicBlock*, 4> &blks, 242 CSRegBlockMap &prevSpills); 243 bool calcRestorePlacements(MachineBasicBlock* MBB, 244 SmallVector<MachineBasicBlock*, 4> &blks, 245 CSRegBlockMap &prevRestores); 246 void placeSpillsAndRestores(MachineFunction &Fn); 247 void placeCSRSpillsAndRestores(MachineFunction &Fn); 248 void calculateCalleeSavedRegisters(MachineFunction &Fn); 249 void insertCSRSpillsAndRestores(MachineFunction &Fn); 250 void calculateFrameObjectOffsets(MachineFunction &Fn); 251 void replaceFrameIndices(MachineFunction &Fn); 252 void insertPrologEpilogCode(MachineFunction &Fn); 253 254 // Initialize DFA sets, called before iterations. 255 void clearAnticAvailSets(); 256 // Clear all sets constructed by shrink wrapping. 257 void clearAllSets(); 258 259 // Initialize all shrink wrapping data. 260 void initShrinkWrappingInfo(); 261 262 // Convienences for dealing with machine loops. 263 MachineBasicBlock* getTopLevelLoopPreheader(MachineLoop* LP) { 264 assert(LP && "Machine loop is NULL."); 265 MachineBasicBlock* PHDR = LP->getLoopPreheader(); 266 MachineLoop* PLP = LP->getParentLoop(); 267 while (PLP) { 268 PHDR = PLP->getLoopPreheader(); 269 PLP = PLP->getParentLoop(); 270 } 271 return PHDR; 272 } 273 274 MachineLoop* getTopLevelLoopParent(MachineLoop *LP) { 275 if (LP == 0) 276 return 0; 277 MachineLoop* PLP = LP->getParentLoop(); 278 while (PLP) { 279 LP = PLP; 280 PLP = PLP->getParentLoop(); 281 } 282 return LP; 283 } 284 285 // Propgate CSRs used in MBB to all MBBs of loop LP. 286 void propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP); 287 288 // Convenience for recognizing return blocks. 289 bool isReturnBlock(MachineBasicBlock* MBB) { 290 return (MBB && !MBB->empty() && MBB->back().getDesc().isReturn()); 291 } 292 293#ifndef NDEBUG 294 // Debugging methods. 295 296 // Mark this function as having fast exit paths. 297 void findFastExitPath(); 298 299 // Verify placement of spills/restores. 300 void verifySpillRestorePlacement(); 301 302 std::string getBasicBlockName(const MachineBasicBlock* MBB); 303 std::string stringifyCSRegSet(const CSRegSet& s); 304 void dumpSet(const CSRegSet& s); 305 void dumpUsed(MachineBasicBlock* MBB); 306 void dumpAllUsed(); 307 void dumpSets(MachineBasicBlock* MBB); 308 void dumpSets1(MachineBasicBlock* MBB); 309 void dumpAllSets(); 310 void dumpSRSets(); 311#endif 312 313 }; 314 char PEI::ID = 0; 315} 316 317// Initialize shrink wrapping DFA sets, called before iterations. 318void PEI::clearAnticAvailSets() { 319 AnticIn.clear(); 320 AnticOut.clear(); 321 AvailIn.clear(); 322 AvailOut.clear(); 323} 324 325// Clear all sets constructed by shrink wrapping. 326void PEI::clearAllSets() { 327 ReturnBlocks.clear(); 328 clearAnticAvailSets(); 329 UsedCSRegs.clear(); 330 CSRUsed.clear(); 331 TLLoops.clear(); 332 CSRSave.clear(); 333 CSRRestore.clear(); 334} 335 336// Initialize all shrink wrapping data. 337void PEI::initShrinkWrappingInfo() { 338 clearAllSets(); 339 EntryBlock = 0; 340 HasFastExitPath = false; 341 ShrinkWrapThisFunction = ShrinkWrapping; 342 // DEBUG: enable or disable shrink wrapping for the current function 343 // via --shrink-wrap-func=<funcname>. 344#ifndef NDEBUG 345 if (ShrinkWrapFunc != "") { 346 std::string MFName = MF->getFunction()->getName(); 347 ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc); 348 } 349#endif 350} 351 352 353/// createPrologEpilogCodeInserter - This function returns a pass that inserts 354/// prolog and epilog code, and eliminates abstract frame references. 355/// 356FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); } 357 358/// placeCSRSpillsAndRestores - determine which MBBs of the function 359/// need save, restore code for callee-saved registers by doing a DF analysis 360/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs 361/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo 362/// is used to ensure that CSR save/restore code is not placed inside loops. 363/// This function computes the maps of MBBs -> CSRs to spill and restore 364/// in CSRSave, CSRRestore. 365/// 366/// If shrink wrapping is not being performed, place all spills in 367/// the entry block, all restores in return blocks. In this case, 368/// CSRSave has a single mapping, CSRRestore has mappings for each 369/// return block. 370/// 371void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) { 372 373 initShrinkWrappingInfo(); 374 375 DEBUG(if (ShrinkWrapThisFunction) { 376 DOUT << "Place CSR spills/restores for " 377 << MF->getFunction()->getName() << "\n"; 378 }); 379 380 if (calculateSets(Fn)) 381 placeSpillsAndRestores(Fn); 382} 383 384/// calcAnticInOut - calculate the anticipated in/out reg sets 385/// for the given MBB by looking forward in the MCFG at MBB's 386/// successors. 387/// 388bool PEI::calcAnticInOut(MachineBasicBlock* MBB) { 389 bool changed = false; 390 391 // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB)) 392 SmallVector<MachineBasicBlock*, 4> successors; 393 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 394 SE = MBB->succ_end(); SI != SE; ++SI) { 395 MachineBasicBlock* SUCC = *SI; 396 if (SUCC != MBB) 397 successors.push_back(SUCC); 398 } 399 400 unsigned i = 0, e = successors.size(); 401 if (i != e) { 402 CSRegSet prevAnticOut = AnticOut[MBB]; 403 MachineBasicBlock* SUCC = successors[i]; 404 405 AnticOut[MBB] = AnticIn[SUCC]; 406 for (++i; i != e; ++i) { 407 SUCC = successors[i]; 408 AnticOut[MBB] &= AnticIn[SUCC]; 409 } 410 if (prevAnticOut != AnticOut[MBB]) 411 changed = true; 412 } 413 414 // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]); 415 CSRegSet prevAnticIn = AnticIn[MBB]; 416 AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB]; 417 if (prevAnticIn |= AnticIn[MBB]) 418 changed = true; 419 return changed; 420} 421 422/// calcAvailInOut - calculate the available in/out reg sets 423/// for the given MBB by looking backward in the MCFG at MBB's 424/// predecessors. 425/// 426bool PEI::calcAvailInOut(MachineBasicBlock* MBB) { 427 bool changed = false; 428 429 // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB)) 430 SmallVector<MachineBasicBlock*, 4> predecessors; 431 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), 432 PE = MBB->pred_end(); PI != PE; ++PI) { 433 MachineBasicBlock* PRED = *PI; 434 if (PRED != MBB) 435 predecessors.push_back(PRED); 436 } 437 438 unsigned i = 0, e = predecessors.size(); 439 if (i != e) { 440 CSRegSet prevAvailIn = AvailIn[MBB]; 441 MachineBasicBlock* PRED = predecessors[i]; 442 443 AvailIn[MBB] = AvailOut[PRED]; 444 for (++i; i != e; ++i) { 445 PRED = predecessors[i]; 446 AvailIn[MBB] &= AvailOut[PRED]; 447 } 448 if (prevAvailIn != AvailIn[MBB]) 449 changed = true; 450 } 451 452 // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]); 453 CSRegSet prevAvailOut = AvailOut[MBB]; 454 AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB]; 455 if (prevAvailOut |= AvailOut[MBB]) 456 changed = true; 457 return changed; 458} 459 460/// calculateAnticAvail - build the sets anticipated and available 461/// registers in the MCFG of the current function iteratively, 462/// doing a combined forward and backward analysis. 463/// 464void PEI::calculateAnticAvail(MachineFunction &Fn) { 465 // Initialize data flow sets. 466 clearAnticAvailSets(); 467 468 // Calulate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG. 469 bool changed = true; 470 unsigned iterations = 0; 471 while (changed) { 472 changed = false; 473 ++iterations; 474 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); 475 MBBI != MBBE; ++MBBI) { 476 MachineBasicBlock* MBB = MBBI; 477 478 // Calculate anticipated in, out regs at MBB from 479 // anticipated at successors of MBB. 480 changed |= calcAnticInOut(MBB); 481 482 // Calculate available in, out regs at MBB from 483 // available at predecessors of MBB. 484 changed |= calcAvailInOut(MBB); 485 } 486 } 487 488 DEBUG(if (ShrinkWrapDebugging >= Details) { 489 DOUT << "-----------------------------------------------------------\n"; 490 DOUT << " Antic/Avail Sets:\n"; 491 DOUT << "-----------------------------------------------------------\n"; 492 DOUT << "iterations = " << iterations << "\n"; 493 DOUT << "-----------------------------------------------------------\n"; 494 DOUT << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n"; 495 DOUT << "-----------------------------------------------------------\n"; 496 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); 497 MBBI != MBBE; ++MBBI) { 498 MachineBasicBlock* MBB = MBBI; 499 dumpSets(MBB); 500 } 501 DOUT << "-----------------------------------------------------------\n"; 502 }); 503} 504 505/// propagateUsesAroundLoop - copy used register info from MBB to all blocks 506/// of the loop given by LP and its parent loops. This prevents spills/restores 507/// from being placed in the bodies of loops. 508/// 509void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) { 510 if (! MBB || !LP) 511 return; 512 513 std::vector<MachineBasicBlock*> loopBlocks = LP->getBlocks(); 514 for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) { 515 MachineBasicBlock* LBB = loopBlocks[i]; 516 if (LBB == MBB) 517 continue; 518 if (CSRUsed[LBB].contains(CSRUsed[MBB])) 519 continue; 520 CSRUsed[LBB] |= CSRUsed[MBB]; 521 } 522} 523 524/// calculateSets - collect the CSRs used in this function, compute 525/// the DF sets that describe the initial minimal regions in the 526/// Machine CFG around which CSR spills and restores must be placed. 527/// 528/// Additionally, this function decides if shrink wrapping should 529/// be disabled for the current function, checking the following: 530/// 1. the current function has more than 500 MBBs: heuristic limit 531/// on function size to reduce compile time impact of the current 532/// iterative algorithm. 533/// 2. all CSRs are used in the entry block. 534/// 3. all CSRs are used in all immediate successors of the entry block. 535/// 4. all CSRs are used in a subset of blocks, each of which dominates 536/// all return blocks. These blocks, taken as a subgraph of the MCFG, 537/// are equivalent to the entry block since all execution paths pass 538/// through them. 539/// 540bool PEI::calculateSets(MachineFunction &Fn) { 541 // Sets used to compute spill, restore placement sets. 542 const std::vector<CalleeSavedInfo> CSI = 543 Fn.getFrameInfo()->getCalleeSavedInfo(); 544 545 // If no CSRs used, we are done. 546 if (CSI.empty()) { 547 DEBUG(if (ShrinkWrapThisFunction) 548 DOUT << "DISABLED: " << Fn.getFunction()->getName() 549 << ": uses no callee-saved registers\n"); 550 return false; 551 } 552 553 // Save refs to entry and return blocks. 554 EntryBlock = Fn.begin(); 555 for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end(); 556 MBB != E; ++MBB) 557 if (isReturnBlock(MBB)) 558 ReturnBlocks.push_back(MBB); 559 560 // Determine if this function has fast exit paths. 561 DEBUG(if (ShrinkWrapThisFunction) 562 findFastExitPath()); 563 564 // Limit shrink wrapping via the current iterative bit vector 565 // implementation to functions with <= 500 MBBs. 566 if (Fn.size() > 500) { 567 DEBUG(if (ShrinkWrapThisFunction) 568 DOUT << "DISABLED: " << Fn.getFunction()->getName() 569 << ": too large (" << Fn.size() << " MBBs)\n"); 570 ShrinkWrapThisFunction = false; 571 } 572 573 // Return now if not shrink wrapping. 574 if (! ShrinkWrapThisFunction) 575 return false; 576 577 // Collect set of used CSRs. 578 for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { 579 UsedCSRegs.set(inx); 580 } 581 582 // Walk instructions in all MBBs, create CSRUsed[] sets, choose 583 // whether or not to shrink wrap this function. 584 MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>(); 585 MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>(); 586 const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); 587 588 bool allCSRUsesInEntryBlock = true; 589 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); 590 MBBI != MBBE; ++MBBI) { 591 MachineBasicBlock* MBB = MBBI; 592 for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) { 593 for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { 594 unsigned Reg = CSI[inx].getReg(); 595 // If instruction I reads or modifies Reg, add it to UsedCSRegs, 596 // CSRUsed map for the current block. 597 for (unsigned opInx = 0, opEnd = I->getNumOperands(); 598 opInx != opEnd; ++opInx) { 599 const MachineOperand &MO = I->getOperand(opInx); 600 if (! (MO.isReg() && (MO.isUse() || MO.isDef()))) 601 continue; 602 unsigned MOReg = MO.getReg(); 603 if (!MOReg) 604 continue; 605 if (MOReg == Reg || 606 (TargetRegisterInfo::isPhysicalRegister(MOReg) && 607 TargetRegisterInfo::isPhysicalRegister(Reg) && 608 TRI->isSubRegister(Reg, MOReg))) { 609 // CSR Reg is defined/used in block MBB. 610 CSRUsed[MBB].set(inx); 611 // Check for uses in EntryBlock. 612 if (MBB != EntryBlock) 613 allCSRUsesInEntryBlock = false; 614 } 615 } 616 } 617 } 618 619 if (CSRUsed[MBB].empty()) 620 continue; 621 622 // Propagate CSRUsed[MBB] in loops 623 if (MachineLoop* LP = LI.getLoopFor(MBB)) { 624 // Add top level loop to work list. 625 MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP); 626 MachineLoop* PLP = getTopLevelLoopParent(LP); 627 628 if (! HDR) { 629 HDR = PLP->getHeader(); 630 assert(HDR->pred_size() > 0 && "Loop header has no predecessors?"); 631 MachineBasicBlock::pred_iterator PI = HDR->pred_begin(); 632 HDR = *PI; 633 } 634 TLLoops[HDR] = PLP; 635 636 // Push uses from inside loop to its parent loops, 637 // or to all other MBBs in its loop. 638 if (LP->getLoopDepth() > 1) { 639 for (MachineLoop* PLP = LP->getParentLoop(); PLP; 640 PLP = PLP->getParentLoop()) { 641 propagateUsesAroundLoop(MBB, PLP); 642 } 643 } else { 644 propagateUsesAroundLoop(MBB, LP); 645 } 646 } 647 } 648 649 if (allCSRUsesInEntryBlock) { 650 DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName() 651 << ": all CSRs used in EntryBlock\n"); 652 ShrinkWrapThisFunction = false; 653 } else { 654 bool allCSRsUsedInEntryFanout = true; 655 for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), 656 SE = EntryBlock->succ_end(); SI != SE; ++SI) { 657 MachineBasicBlock* SUCC = *SI; 658 if (CSRUsed[SUCC] != UsedCSRegs) 659 allCSRsUsedInEntryFanout = false; 660 } 661 if (allCSRsUsedInEntryFanout) { 662 DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName() 663 << ": all CSRs used in imm successors of EntryBlock\n"); 664 ShrinkWrapThisFunction = false; 665 } 666 } 667 668 if (ShrinkWrapThisFunction) { 669 // Check if MBB uses CSRs and dominates all exit nodes. 670 // Such nodes are equiv. to the entry node w.r.t. 671 // CSR uses: every path through the function must 672 // pass through this node. If each CSR is used at least 673 // once by these nodes, shrink wrapping is disabled. 674 CSRegSet CSRUsedInChokePoints; 675 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); 676 MBBI != MBBE; ++MBBI) { 677 MachineBasicBlock* MBB = MBBI; 678 if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1) 679 continue; 680 bool dominatesExitNodes = true; 681 for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) 682 if (! DT.dominates(MBB, ReturnBlocks[ri])) { 683 dominatesExitNodes = false; 684 break; 685 } 686 if (dominatesExitNodes) { 687 CSRUsedInChokePoints |= CSRUsed[MBB]; 688 if (CSRUsedInChokePoints == UsedCSRegs) { 689 DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName() 690 << ": all CSRs used in choke point(s) at " 691 << getBasicBlockName(MBB) << "\n"); 692 ShrinkWrapThisFunction = false; 693 break; 694 } 695 } 696 } 697 } 698 699 // Return now if we have decided not to apply shrink wrapping 700 // to the current function. 701 if (! ShrinkWrapThisFunction) 702 return false; 703 704 DEBUG({ 705 DOUT << "ENABLED: " << Fn.getFunction()->getName(); 706 if (HasFastExitPath) 707 DOUT << " (fast exit path)"; 708 DOUT << "\n"; 709 if (ShrinkWrapDebugging >= BasicInfo) { 710 DOUT << "------------------------------" 711 << "-----------------------------\n"; 712 DOUT << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n"; 713 if (ShrinkWrapDebugging >= Details) { 714 DOUT << "------------------------------" 715 << "-----------------------------\n"; 716 dumpAllUsed(); 717 } 718 } 719 }); 720 721 // Build initial DF sets to determine minimal regions in the 722 // Machine CFG around which CSRs must be spilled and restored. 723 calculateAnticAvail(Fn); 724 725 return true; 726} 727 728/// addUsesForMEMERegion - add uses of CSRs spilled or restored in 729/// multi-entry, multi-exit (MEME) regions so spill and restore 730/// placement will not break code that enters or leaves a 731/// shrink-wrapped region by inducing spills with no matching 732/// restores or restores with no matching spills. A MEME region 733/// is a subgraph of the MCFG with multiple entry edges, multiple 734/// exit edges, or both. This code propagates use information 735/// through the MCFG until all paths requiring spills and restores 736/// _outside_ the computed minimal placement regions have been covered. 737/// 738bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB, 739 SmallVector<MachineBasicBlock*, 4>& blks) { 740 if (MBB->succ_size() < 2 && MBB->pred_size() < 2) { 741 bool processThisBlock = false; 742 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 743 SE = MBB->succ_end(); SI != SE; ++SI) { 744 MachineBasicBlock* SUCC = *SI; 745 if (SUCC->pred_size() > 1) { 746 processThisBlock = true; 747 break; 748 } 749 } 750 if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) { 751 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), 752 PE = MBB->pred_end(); PI != PE; ++PI) { 753 MachineBasicBlock* PRED = *PI; 754 if (PRED->succ_size() > 1) { 755 processThisBlock = true; 756 break; 757 } 758 } 759 } 760 if (! processThisBlock) 761 return false; 762 } 763 764 CSRegSet prop; 765 if (!CSRSave[MBB].empty()) 766 prop = CSRSave[MBB]; 767 else if (!CSRRestore[MBB].empty()) 768 prop = CSRRestore[MBB]; 769 else 770 prop = CSRUsed[MBB]; 771 if (prop.empty()) 772 return false; 773 774 // Propagate selected bits to successors, predecessors of MBB. 775 bool addedUses = false; 776 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 777 SE = MBB->succ_end(); SI != SE; ++SI) { 778 MachineBasicBlock* SUCC = *SI; 779 // Self-loop 780 if (SUCC == MBB) 781 continue; 782 if (! CSRUsed[SUCC].contains(prop)) { 783 CSRUsed[SUCC] |= prop; 784 addedUses = true; 785 blks.push_back(SUCC); 786 DEBUG(if (ShrinkWrapDebugging >= Iterations) 787 DOUT << getBasicBlockName(MBB) 788 << "(" << stringifyCSRegSet(prop) << ")->" 789 << "successor " << getBasicBlockName(SUCC) << "\n"); 790 } 791 } 792 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), 793 PE = MBB->pred_end(); PI != PE; ++PI) { 794 MachineBasicBlock* PRED = *PI; 795 // Self-loop 796 if (PRED == MBB) 797 continue; 798 if (! CSRUsed[PRED].contains(prop)) { 799 CSRUsed[PRED] |= prop; 800 addedUses = true; 801 blks.push_back(PRED); 802 DEBUG(if (ShrinkWrapDebugging >= Iterations) 803 DOUT << getBasicBlockName(MBB) 804 << "(" << stringifyCSRegSet(prop) << ")->" 805 << "predecessor " << getBasicBlockName(PRED) << "\n"); 806 } 807 } 808 return addedUses; 809} 810 811/// addUsesForTopLevelLoops - add uses for CSRs used inside top 812/// level loops to the exit blocks of those loops. 813/// 814bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) { 815 bool addedUses = false; 816 817 // Place restores for top level loops where needed. 818 for (DenseMap<MachineBasicBlock*, MachineLoop*>::iterator 819 I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) { 820 MachineBasicBlock* MBB = I->first; 821 MachineLoop* LP = I->second; 822 MachineBasicBlock* HDR = LP->getHeader(); 823 SmallVector<MachineBasicBlock*, 4> exitBlocks; 824 CSRegSet loopSpills; 825 826 loopSpills = CSRSave[MBB]; 827 if (CSRSave[MBB].empty()) { 828 loopSpills = CSRUsed[HDR]; 829 assert(!loopSpills.empty() && "No CSRs used in loop?"); 830 } else if (CSRRestore[MBB].contains(CSRSave[MBB])) 831 continue; 832 833 LP->getExitBlocks(exitBlocks); 834 assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?"); 835 for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) { 836 MachineBasicBlock* EXB = exitBlocks[i]; 837 if (! CSRUsed[EXB].contains(loopSpills)) { 838 CSRUsed[EXB] |= loopSpills; 839 addedUses = true; 840 DEBUG(if (ShrinkWrapDebugging >= Iterations) 841 DOUT << "LOOP " << getBasicBlockName(MBB) 842 << "(" << stringifyCSRegSet(loopSpills) << ")->" 843 << getBasicBlockName(EXB) << "\n"); 844 if (EXB->succ_size() > 1 || EXB->pred_size() > 1) 845 blks.push_back(EXB); 846 } 847 } 848 } 849 return addedUses; 850} 851 852/// calcSpillPlacements - determine which CSRs should be spilled 853/// in MBB using AnticIn sets of MBB's predecessors, keeping track 854/// of changes to spilled reg sets. Add MBB to the set of blocks 855/// that need to be processed for propagating use info to cover 856/// multi-entry/exit regions. 857/// 858bool PEI::calcSpillPlacements(MachineBasicBlock* MBB, 859 SmallVector<MachineBasicBlock*, 4> &blks, 860 CSRegBlockMap &prevSpills) { 861 bool placedSpills = false; 862 // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB) 863 CSRegSet anticInPreds; 864 SmallVector<MachineBasicBlock*, 4> predecessors; 865 for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), 866 PE = MBB->pred_end(); PI != PE; ++PI) { 867 MachineBasicBlock* PRED = *PI; 868 if (PRED != MBB) 869 predecessors.push_back(PRED); 870 } 871 unsigned i = 0, e = predecessors.size(); 872 if (i != e) { 873 MachineBasicBlock* PRED = predecessors[i]; 874 anticInPreds = UsedCSRegs - AnticIn[PRED]; 875 for (++i; i != e; ++i) { 876 PRED = predecessors[i]; 877 anticInPreds &= (UsedCSRegs - AnticIn[PRED]); 878 } 879 } else { 880 // Handle uses in entry blocks (which have no predecessors). 881 // This is necessary because the DFA formulation assumes the 882 // entry and (multiple) exit nodes cannot have CSR uses, which 883 // is not the case in the real world. 884 anticInPreds = UsedCSRegs; 885 } 886 // Compute spills required at MBB: 887 CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds; 888 889 if (! CSRSave[MBB].empty()) { 890 if (MBB == EntryBlock) { 891 for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) 892 CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB]; 893 } else { 894 // Reset all regs spilled in MBB that are also spilled in EntryBlock. 895 if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) { 896 CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock]; 897 } 898 } 899 } 900 placedSpills = (CSRSave[MBB] != prevSpills[MBB]); 901 prevSpills[MBB] = CSRSave[MBB]; 902 // Remember this block for adding restores to successor 903 // blocks for multi-entry region. 904 if (placedSpills) 905 blks.push_back(MBB); 906 907 DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations) 908 DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " 909 << stringifyCSRegSet(CSRSave[MBB]) << "\n"); 910 911 return placedSpills; 912} 913 914/// calcRestorePlacements - determine which CSRs should be restored 915/// in MBB using AvailOut sets of MBB's succcessors, keeping track 916/// of changes to restored reg sets. Add MBB to the set of blocks 917/// that need to be processed for propagating use info to cover 918/// multi-entry/exit regions. 919/// 920bool PEI::calcRestorePlacements(MachineBasicBlock* MBB, 921 SmallVector<MachineBasicBlock*, 4> &blks, 922 CSRegBlockMap &prevRestores) { 923 bool placedRestores = false; 924 // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB) 925 CSRegSet availOutSucc; 926 SmallVector<MachineBasicBlock*, 4> successors; 927 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 928 SE = MBB->succ_end(); SI != SE; ++SI) { 929 MachineBasicBlock* SUCC = *SI; 930 if (SUCC != MBB) 931 successors.push_back(SUCC); 932 } 933 unsigned i = 0, e = successors.size(); 934 if (i != e) { 935 MachineBasicBlock* SUCC = successors[i]; 936 availOutSucc = UsedCSRegs - AvailOut[SUCC]; 937 for (++i; i != e; ++i) { 938 SUCC = successors[i]; 939 availOutSucc &= (UsedCSRegs - AvailOut[SUCC]); 940 } 941 } else { 942 if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) { 943 // Handle uses in return blocks (which have no successors). 944 // This is necessary because the DFA formulation assumes the 945 // entry and (multiple) exit nodes cannot have CSR uses, which 946 // is not the case in the real world. 947 availOutSucc = UsedCSRegs; 948 } 949 } 950 // Compute restores required at MBB: 951 CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc; 952 953 // Postprocess restore placements at MBB. 954 // Remove the CSRs that are restored in the return blocks. 955 // Lest this be confusing, note that: 956 // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks. 957 if (MBB->succ_size() && ! CSRRestore[MBB].empty()) { 958 if (! CSRSave[EntryBlock].empty()) 959 CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock]; 960 } 961 placedRestores = (CSRRestore[MBB] != prevRestores[MBB]); 962 prevRestores[MBB] = CSRRestore[MBB]; 963 // Remember this block for adding saves to predecessor 964 // blocks for multi-entry region. 965 if (placedRestores) 966 blks.push_back(MBB); 967 968 DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations) 969 DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = " 970 << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); 971 972 return placedRestores; 973} 974 975/// placeSpillsAndRestores - place spills and restores of CSRs 976/// used in MBBs in minimal regions that contain the uses. 977/// 978void PEI::placeSpillsAndRestores(MachineFunction &Fn) { 979 CSRegBlockMap prevCSRSave; 980 CSRegBlockMap prevCSRRestore; 981 SmallVector<MachineBasicBlock*, 4> cvBlocks, ncvBlocks; 982 bool changed = true; 983 unsigned iterations = 0; 984 985 // Iterate computation of spill and restore placements in the MCFG until: 986 // 1. CSR use info has been fully propagated around the MCFG, and 987 // 2. computation of CSRSave[], CSRRestore[] reach fixed points. 988 while (changed) { 989 changed = false; 990 ++iterations; 991 992 DEBUG(if (ShrinkWrapDebugging >= Iterations) 993 DOUT << "iter " << iterations 994 << " --------------------------------------------------\n"); 995 996 // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG, 997 // which determines the placements of spills and restores. 998 // Keep track of changes to spills, restores in each iteration to 999 // minimize the total iterations. 1000 bool SRChanged = false; 1001 for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); 1002 MBBI != MBBE; ++MBBI) { 1003 MachineBasicBlock* MBB = MBBI; 1004 1005 // Place spills for CSRs in MBB. 1006 SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave); 1007 1008 // Place restores for CSRs in MBB. 1009 SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore); 1010 } 1011 1012 // Add uses of CSRs used inside loops where needed. 1013 changed |= addUsesForTopLevelLoops(cvBlocks); 1014 1015 // Add uses for CSRs spilled or restored at branch, join points. 1016 if (changed || SRChanged) { 1017 while (! cvBlocks.empty()) { 1018 MachineBasicBlock* MBB = cvBlocks.pop_back_val(); 1019 changed |= addUsesForMEMERegion(MBB, ncvBlocks); 1020 } 1021 if (! ncvBlocks.empty()) { 1022 cvBlocks = ncvBlocks; 1023 ncvBlocks.clear(); 1024 } 1025 } 1026 1027 if (changed) { 1028 calculateAnticAvail(Fn); 1029 CSRSave.clear(); 1030 CSRRestore.clear(); 1031 } 1032 } 1033 1034 // Check for effectiveness: 1035 // SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks} 1036 // numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock] 1037 // Gives a measure of how many CSR spills have been moved from EntryBlock 1038 // to minimal regions enclosing their uses. 1039 CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]); 1040 unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count(); 1041 numSRReduced += numSRReducedThisFunc; 1042 DEBUG(if (ShrinkWrapDebugging >= BasicInfo) { 1043 DOUT << "-----------------------------------------------------------\n"; 1044 DOUT << "total iterations = " << iterations << " ( " 1045 << Fn.getFunction()->getName() 1046 << " " << numSRReducedThisFunc 1047 << " " << Fn.size() 1048 << " )\n"; 1049 DOUT << "-----------------------------------------------------------\n"; 1050 dumpSRSets(); 1051 DOUT << "-----------------------------------------------------------\n"; 1052 if (numSRReducedThisFunc) 1053 verifySpillRestorePlacement(); 1054 }); 1055} 1056 1057/// calculateCalleeSavedRegisters - Scan the function for modified callee saved 1058/// registers. Also calculate the MaxCallFrameSize and HasCalls variables for 1059/// the function's frame information and eliminates call frame pseudo 1060/// instructions. 1061/// 1062void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { 1063 const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); 1064 const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo(); 1065 1066 // Get the callee saved register list... 1067 const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn); 1068 1069 // Get the function call frame set-up and tear-down instruction opcode 1070 int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode(); 1071 int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode(); 1072 1073 // These are used to keep track the callee-save area. Initialize them. 1074 MinCSFrameIndex = INT_MAX; 1075 MaxCSFrameIndex = 0; 1076 1077 // Early exit for targets which have no callee saved registers and no call 1078 // frame setup/destroy pseudo instructions. 1079 if ((CSRegs == 0 || CSRegs[0] == 0) && 1080 FrameSetupOpcode == -1 && FrameDestroyOpcode == -1) 1081 return; 1082 1083 unsigned MaxCallFrameSize = 0; 1084 bool HasCalls = false; 1085 1086 std::vector<MachineBasicBlock::iterator> FrameSDOps; 1087 for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) 1088 for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) 1089 if (I->getOpcode() == FrameSetupOpcode || 1090 I->getOpcode() == FrameDestroyOpcode) { 1091 assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo" 1092 " instructions should have a single immediate argument!"); 1093 unsigned Size = I->getOperand(0).getImm(); 1094 if (Size > MaxCallFrameSize) MaxCallFrameSize = Size; 1095 HasCalls = true; 1096 FrameSDOps.push_back(I); 1097 } 1098 1099 MachineFrameInfo *FFI = Fn.getFrameInfo(); 1100 FFI->setHasCalls(HasCalls); 1101 FFI->setMaxCallFrameSize(MaxCallFrameSize); 1102 1103 for (unsigned i = 0, e = FrameSDOps.size(); i != e; ++i) { 1104 MachineBasicBlock::iterator I = FrameSDOps[i]; 1105 // If call frames are not being included as part of the stack frame, 1106 // and there is no dynamic allocation (therefore referencing frame slots 1107 // off sp), leave the pseudo ops alone. We'll eliminate them later. 1108 if (RegInfo->hasReservedCallFrame(Fn) || RegInfo->hasFP(Fn)) 1109 RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); 1110 } 1111 1112 // Now figure out which *callee saved* registers are modified by the current 1113 // function, thus needing to be saved and restored in the prolog/epilog. 1114 // 1115 const TargetRegisterClass* const *CSRegClasses = 1116 RegInfo->getCalleeSavedRegClasses(&Fn); 1117 std::vector<CalleeSavedInfo> CSI; 1118 for (unsigned i = 0; CSRegs[i]; ++i) { 1119 unsigned Reg = CSRegs[i]; 1120 if (Fn.getRegInfo().isPhysRegUsed(Reg)) { 1121 // If the reg is modified, save it! 1122 CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i])); 1123 } else { 1124 for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg); 1125 *AliasSet; ++AliasSet) { // Check alias registers too. 1126 if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) { 1127 CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i])); 1128 break; 1129 } 1130 } 1131 } 1132 } 1133 1134 if (CSI.empty()) 1135 return; // Early exit if no callee saved registers are modified! 1136 1137 unsigned NumFixedSpillSlots; 1138 const std::pair<unsigned,int> *FixedSpillSlots = 1139 TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); 1140 1141 // Now that we know which registers need to be saved and restored, allocate 1142 // stack slots for them. 1143 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1144 unsigned Reg = CSI[i].getReg(); 1145 const TargetRegisterClass *RC = CSI[i].getRegClass(); 1146 1147 // Check to see if this physreg must be spilled to a particular stack slot 1148 // on this target. 1149 const std::pair<unsigned,int> *FixedSlot = FixedSpillSlots; 1150 while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots && 1151 FixedSlot->first != Reg) 1152 ++FixedSlot; 1153 1154 int FrameIdx; 1155 if (FixedSlot == FixedSpillSlots+NumFixedSpillSlots) { 1156 // Nope, just spill it anywhere convenient. 1157 unsigned Align = RC->getAlignment(); 1158 unsigned StackAlign = TFI->getStackAlignment(); 1159 // We may not be able to sastify the desired alignment specification of 1160 // the TargetRegisterClass if the stack alignment is smaller. 1161 // Use the min. 1162 Align = std::min(Align, StackAlign); 1163 FrameIdx = FFI->CreateStackObject(RC->getSize(), Align); 1164 if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; 1165 if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; 1166 } else { 1167 // Spill it to the stack where we must. 1168 FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->second); 1169 } 1170 CSI[i].setFrameIdx(FrameIdx); 1171 } 1172 1173 FFI->setCalleeSavedInfo(CSI); 1174} 1175 1176/// insertCSRSpillsAndRestores - Insert spill and restore code for 1177/// callee saved registers used in the function, handling shrink wrapping. 1178/// 1179void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { 1180 // Get callee saved register information. 1181 MachineFrameInfo *FFI = Fn.getFrameInfo(); 1182 const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo(); 1183 1184 // Early exit if no callee saved registers are modified! 1185 if (CSI.empty()) 1186 return; 1187 1188 const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); 1189 MachineBasicBlock::iterator I; 1190 1191 DEBUG(if (ShrinkWrapThisFunction && ShrinkWrapDebugging >= Details) 1192 DOUT << "Inserting CSR spills/restores in function " 1193 << Fn.getFunction()->getName() << "\n"); 1194 1195 if (! ShrinkWrapThisFunction) { 1196 // Spill using target interface. 1197 I = EntryBlock->begin(); 1198 if (!TII.spillCalleeSavedRegisters(*EntryBlock, I, CSI)) { 1199 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1200 // Add the callee-saved register as live-in. 1201 // It's killed at the spill. 1202 EntryBlock->addLiveIn(CSI[i].getReg()); 1203 1204 // Insert the spill to the stack frame. 1205 TII.storeRegToStackSlot(*EntryBlock, I, CSI[i].getReg(), true, 1206 CSI[i].getFrameIdx(), CSI[i].getRegClass()); 1207 } 1208 } 1209 1210 // Restore using target interface. 1211 for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) { 1212 MachineBasicBlock* MBB = ReturnBlocks[ri]; 1213 I = MBB->end(); --I; 1214 1215 // Skip over all terminator instructions, which are part of the return 1216 // sequence. 1217 MachineBasicBlock::iterator I2 = I; 1218 while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator()) 1219 I = I2; 1220 1221 bool AtStart = I == MBB->begin(); 1222 MachineBasicBlock::iterator BeforeI = I; 1223 if (!AtStart) 1224 --BeforeI; 1225 1226 // Restore all registers immediately before the return and any 1227 // terminators that preceed it. 1228 if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI)) { 1229 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1230 TII.loadRegFromStackSlot(*MBB, I, CSI[i].getReg(), 1231 CSI[i].getFrameIdx(), 1232 CSI[i].getRegClass()); 1233 assert(I != MBB->begin() && 1234 "loadRegFromStackSlot didn't insert any code!"); 1235 // Insert in reverse order. loadRegFromStackSlot can insert 1236 // multiple instructions. 1237 if (AtStart) 1238 I = MBB->begin(); 1239 else { 1240 I = BeforeI; 1241 ++I; 1242 } 1243 } 1244 } 1245 } 1246 return; 1247 } 1248 1249 // Insert spills. 1250 std::vector<CalleeSavedInfo> blockCSI; 1251 for (CSRegBlockMap::iterator BI = CSRSave.begin(), 1252 BE = CSRSave.end(); BI != BE; ++BI) { 1253 MachineBasicBlock* MBB = BI->first; 1254 CSRegSet save = BI->second; 1255 1256 if (save.empty()) 1257 continue; 1258 1259 DEBUG(if (ShrinkWrapDebugging >= Details) 1260 DOUT << "Spilling " << stringifyCSRegSet(save) 1261 << " in " << getBasicBlockName(MBB) << "\n"); 1262 1263 blockCSI.clear(); 1264 for (CSRegSet::iterator RI = save.begin(), 1265 RE = save.end(); RI != RE; ++RI) { 1266 blockCSI.push_back(CSI[*RI]); 1267 } 1268 assert(blockCSI.size() > 0 && 1269 "Could not collect callee saved register info"); 1270 1271 I = MBB->begin(); 1272 1273 // When shrink wrapping, use stack slot stores/loads. 1274 for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { 1275 // Add the callee-saved register as live-in. 1276 // It's killed at the spill. 1277 MBB->addLiveIn(blockCSI[i].getReg()); 1278 1279 // Insert the spill to the stack frame. 1280 TII.storeRegToStackSlot(*MBB, I, blockCSI[i].getReg(), 1281 true, 1282 blockCSI[i].getFrameIdx(), 1283 blockCSI[i].getRegClass()); 1284 } 1285 } 1286 1287 DEBUG(if (ShrinkWrapDebugging >= Details) 1288 DOUT << "------------------------------" 1289 << "-----------------------------\n"); 1290 1291 for (CSRegBlockMap::iterator BI = CSRRestore.begin(), 1292 BE = CSRRestore.end(); BI != BE; ++BI) { 1293 MachineBasicBlock* MBB = BI->first; 1294 CSRegSet restore = BI->second; 1295 1296 if (restore.empty()) 1297 continue; 1298 1299 DEBUG(if (ShrinkWrapDebugging >= Details) 1300 DOUT << "Restoring " << stringifyCSRegSet(restore) 1301 << " in " << getBasicBlockName(MBB) << "\n"); 1302 1303 blockCSI.clear(); 1304 for (CSRegSet::iterator RI = restore.begin(), 1305 RE = restore.end(); RI != RE; ++RI) { 1306 blockCSI.push_back(CSI[*RI]); 1307 } 1308 assert(blockCSI.size() > 0 && 1309 "Could not find callee saved register info"); 1310 1311 // If MBB is empty and needs restores, insert at the _beginning_. 1312 if (MBB->empty()) { 1313 I = MBB->begin(); 1314 } else { 1315 I = MBB->end(); 1316 --I; 1317 1318 // Skip over all terminator instructions, which are part of the 1319 // return sequence. 1320 if (! I->getDesc().isTerminator()) { 1321 ++I; 1322 } else { 1323 MachineBasicBlock::iterator I2 = I; 1324 while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator()) 1325 I = I2; 1326 } 1327 } 1328 1329 bool AtStart = I == MBB->begin(); 1330 MachineBasicBlock::iterator BeforeI = I; 1331 if (!AtStart) 1332 --BeforeI; 1333 1334 // Restore all registers immediately before the return and any 1335 // terminators that preceed it. 1336 for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { 1337 TII.loadRegFromStackSlot(*MBB, I, blockCSI[i].getReg(), 1338 blockCSI[i].getFrameIdx(), 1339 blockCSI[i].getRegClass()); 1340 assert(I != MBB->begin() && 1341 "loadRegFromStackSlot didn't insert any code!"); 1342 // Insert in reverse order. loadRegFromStackSlot can insert 1343 // multiple instructions. 1344 if (AtStart) 1345 I = MBB->begin(); 1346 else { 1347 I = BeforeI; 1348 ++I; 1349 } 1350 } 1351 } 1352 1353 DEBUG(if (ShrinkWrapDebugging >= Details) 1354 DOUT << "------------------------------" 1355 << "-----------------------------\n"); 1356} 1357 1358/// AdjustStackOffset - Helper function used to adjust the stack frame offset. 1359static inline void 1360AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx, 1361 bool StackGrowsDown, int64_t &Offset, 1362 unsigned &MaxAlign) { 1363 // If stack grows down, we need to add size of find the lowest address of the 1364 // object. 1365 if (StackGrowsDown) 1366 Offset += FFI->getObjectSize(FrameIdx); 1367 1368 unsigned Align = FFI->getObjectAlignment(FrameIdx); 1369 1370 // If the alignment of this object is greater than that of the stack, then 1371 // increase the stack alignment to match. 1372 MaxAlign = std::max(MaxAlign, Align); 1373 1374 // Adjust to alignment boundary. 1375 Offset = (Offset + Align - 1) / Align * Align; 1376 1377 if (StackGrowsDown) { 1378 FFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset 1379 } else { 1380 FFI->setObjectOffset(FrameIdx, Offset); 1381 Offset += FFI->getObjectSize(FrameIdx); 1382 } 1383} 1384 1385/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the 1386/// abstract stack objects. 1387/// 1388void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { 1389 const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo(); 1390 1391 bool StackGrowsDown = 1392 TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; 1393 1394 // Loop over all of the stack objects, assigning sequential addresses... 1395 MachineFrameInfo *FFI = Fn.getFrameInfo(); 1396 1397 unsigned MaxAlign = FFI->getMaxAlignment(); 1398 1399 // Start at the beginning of the local area. 1400 // The Offset is the distance from the stack top in the direction 1401 // of stack growth -- so it's always nonnegative. 1402 int64_t Offset = TFI.getOffsetOfLocalArea(); 1403 if (StackGrowsDown) 1404 Offset = -Offset; 1405 assert(Offset >= 0 1406 && "Local area offset should be in direction of stack growth"); 1407 1408 // If there are fixed sized objects that are preallocated in the local area, 1409 // non-fixed objects can't be allocated right at the start of local area. 1410 // We currently don't support filling in holes in between fixed sized 1411 // objects, so we adjust 'Offset' to point to the end of last fixed sized 1412 // preallocated object. 1413 for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { 1414 int64_t FixedOff; 1415 if (StackGrowsDown) { 1416 // The maximum distance from the stack pointer is at lower address of 1417 // the object -- which is given by offset. For down growing stack 1418 // the offset is negative, so we negate the offset to get the distance. 1419 FixedOff = -FFI->getObjectOffset(i); 1420 } else { 1421 // The maximum distance from the start pointer is at the upper 1422 // address of the object. 1423 FixedOff = FFI->getObjectOffset(i) + FFI->getObjectSize(i); 1424 } 1425 if (FixedOff > Offset) Offset = FixedOff; 1426 } 1427 1428 // First assign frame offsets to stack objects that are used to spill 1429 // callee saved registers. 1430 if (StackGrowsDown) { 1431 for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { 1432 // If stack grows down, we need to add size of find the lowest 1433 // address of the object. 1434 Offset += FFI->getObjectSize(i); 1435 1436 unsigned Align = FFI->getObjectAlignment(i); 1437 // If the alignment of this object is greater than that of the stack, 1438 // then increase the stack alignment to match. 1439 MaxAlign = std::max(MaxAlign, Align); 1440 // Adjust to alignment boundary 1441 Offset = (Offset+Align-1)/Align*Align; 1442 1443 FFI->setObjectOffset(i, -Offset); // Set the computed offset 1444 } 1445 } else { 1446 int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex; 1447 for (int i = MaxCSFI; i >= MinCSFI ; --i) { 1448 unsigned Align = FFI->getObjectAlignment(i); 1449 // If the alignment of this object is greater than that of the stack, 1450 // then increase the stack alignment to match. 1451 MaxAlign = std::max(MaxAlign, Align); 1452 // Adjust to alignment boundary 1453 Offset = (Offset+Align-1)/Align*Align; 1454 1455 FFI->setObjectOffset(i, Offset); 1456 Offset += FFI->getObjectSize(i); 1457 } 1458 } 1459 1460 // Make sure the special register scavenging spill slot is closest to the 1461 // frame pointer if a frame pointer is required. 1462 const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); 1463 if (RS && RegInfo->hasFP(Fn)) { 1464 int SFI = RS->getScavengingFrameIndex(); 1465 if (SFI >= 0) 1466 AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign); 1467 } 1468 1469 // Make sure that the stack protector comes before the local variables on the 1470 // stack. 1471 if (FFI->getStackProtectorIndex() >= 0) 1472 AdjustStackOffset(FFI, FFI->getStackProtectorIndex(), StackGrowsDown, 1473 Offset, MaxAlign); 1474 1475 // Then assign frame offsets to stack objects that are not used to spill 1476 // callee saved registers. 1477 for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { 1478 if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) 1479 continue; 1480 if (RS && (int)i == RS->getScavengingFrameIndex()) 1481 continue; 1482 if (FFI->isDeadObjectIndex(i)) 1483 continue; 1484 if (FFI->getStackProtectorIndex() == (int)i) 1485 continue; 1486 1487 AdjustStackOffset(FFI, i, StackGrowsDown, Offset, MaxAlign); 1488 } 1489 1490 // Make sure the special register scavenging spill slot is closest to the 1491 // stack pointer. 1492 if (RS && !RegInfo->hasFP(Fn)) { 1493 int SFI = RS->getScavengingFrameIndex(); 1494 if (SFI >= 0) 1495 AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign); 1496 } 1497 1498 // Round up the size to a multiple of the alignment, but only if there are 1499 // calls or alloca's in the function. This ensures that any calls to 1500 // subroutines have their stack frames suitable aligned. 1501 // Also do this if we need runtime alignment of the stack. In this case 1502 // offsets will be relative to SP not FP; round up the stack size so this 1503 // works. 1504 if (!RegInfo->targetHandlesStackFrameRounding() && 1505 (FFI->hasCalls() || FFI->hasVarSizedObjects() || 1506 (RegInfo->needsStackRealignment(Fn) && 1507 FFI->getObjectIndexEnd() != 0))) { 1508 // If we have reserved argument space for call sites in the function 1509 // immediately on entry to the current function, count it as part of the 1510 // overall stack size. 1511 if (RegInfo->hasReservedCallFrame(Fn)) 1512 Offset += FFI->getMaxCallFrameSize(); 1513 1514 unsigned AlignMask = std::max(TFI.getStackAlignment(),MaxAlign) - 1; 1515 Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); 1516 } 1517 1518 // Update frame info to pretend that this is part of the stack... 1519 FFI->setStackSize(Offset+TFI.getOffsetOfLocalArea()); 1520 1521 // Remember the required stack alignment in case targets need it to perform 1522 // dynamic stack alignment. 1523 FFI->setMaxAlignment(MaxAlign); 1524} 1525 1526 1527/// insertPrologEpilogCode - Scan the function for modified callee saved 1528/// registers, insert spill code for these callee saved registers, then add 1529/// prolog and epilog code to the function. 1530/// 1531void PEI::insertPrologEpilogCode(MachineFunction &Fn) { 1532 const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); 1533 1534 // Add prologue to the function... 1535 TRI->emitPrologue(Fn); 1536 1537 // Add epilogue to restore the callee-save registers in each exiting block 1538 for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { 1539 // If last instruction is a return instruction, add an epilogue 1540 if (!I->empty() && I->back().getDesc().isReturn()) 1541 TRI->emitEpilogue(Fn, *I); 1542 } 1543} 1544 1545 1546/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical 1547/// register references and actual offsets. 1548/// 1549void PEI::replaceFrameIndices(MachineFunction &Fn) { 1550 if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do? 1551 1552 const TargetMachine &TM = Fn.getTarget(); 1553 assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!"); 1554 const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); 1555 const TargetFrameInfo *TFI = TM.getFrameInfo(); 1556 bool StackGrowsDown = 1557 TFI->getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; 1558 int FrameSetupOpcode = TRI.getCallFrameSetupOpcode(); 1559 int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode(); 1560 1561 for (MachineFunction::iterator BB = Fn.begin(), 1562 E = Fn.end(); BB != E; ++BB) { 1563 int SPAdj = 0; // SP offset due to call frame setup / destroy. 1564 if (RS) RS->enterBasicBlock(BB); 1565 1566 for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { 1567 if (I->getOpcode() == TargetInstrInfo::DECLARE) { 1568 // Ignore it. 1569 ++I; 1570 continue; 1571 } 1572 1573 if (I->getOpcode() == FrameSetupOpcode || 1574 I->getOpcode() == FrameDestroyOpcode) { 1575 // Remember how much SP has been adjusted to create the call 1576 // frame. 1577 int Size = I->getOperand(0).getImm(); 1578 1579 if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) || 1580 (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode)) 1581 Size = -Size; 1582 1583 SPAdj += Size; 1584 1585 MachineBasicBlock::iterator PrevI = BB->end(); 1586 if (I != BB->begin()) PrevI = prior(I); 1587 TRI.eliminateCallFramePseudoInstr(Fn, *BB, I); 1588 1589 // Visit the instructions created by eliminateCallFramePseudoInstr(). 1590 if (PrevI == BB->end()) 1591 I = BB->begin(); // The replaced instr was the first in the block. 1592 else 1593 I = next(PrevI); 1594 continue; 1595 } 1596 1597 MachineInstr *MI = I; 1598 bool DoIncr = true; 1599 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) 1600 if (MI->getOperand(i).isFI()) { 1601 // Some instructions (e.g. inline asm instructions) can have 1602 // multiple frame indices and/or cause eliminateFrameIndex 1603 // to insert more than one instruction. We need the register 1604 // scavenger to go through all of these instructions so that 1605 // it can update its register information. We keep the 1606 // iterator at the point before insertion so that we can 1607 // revisit them in full. 1608 bool AtBeginning = (I == BB->begin()); 1609 if (!AtBeginning) --I; 1610 1611 // If this instruction has a FrameIndex operand, we need to 1612 // use that target machine register info object to eliminate 1613 // it. 1614 1615 TRI.eliminateFrameIndex(MI, SPAdj, RS); 1616 1617 // Reset the iterator if we were at the beginning of the BB. 1618 if (AtBeginning) { 1619 I = BB->begin(); 1620 DoIncr = false; 1621 } 1622 1623 MI = 0; 1624 break; 1625 } 1626 1627 if (DoIncr && I != BB->end()) ++I; 1628 1629 // Update register states. 1630 if (RS && MI) RS->forward(MI); 1631 } 1632 1633 assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?"); 1634 } 1635} 1636 1637// Debugging methods for shrink wrapping. 1638#ifndef NDEBUG 1639/// findFastExitPath - debugging method used to detect functions 1640/// with at least one path from the entry block to a return block 1641/// directly or which has a very small number of edges. 1642/// 1643void PEI::findFastExitPath() { 1644 if (! EntryBlock) 1645 return; 1646 // Fina a path from EntryBlock to any return block that does not branch: 1647 // Entry 1648 // | ... 1649 // v | 1650 // B1<-----+ 1651 // | 1652 // v 1653 // Return 1654 for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), 1655 SE = EntryBlock->succ_end(); SI != SE; ++SI) { 1656 MachineBasicBlock* SUCC = *SI; 1657 1658 // Assume positive, disprove existence of fast path. 1659 HasFastExitPath = true; 1660 1661 // Check the immediate successors. 1662 if (isReturnBlock(SUCC)) { 1663 if (ShrinkWrapDebugging >= BasicInfo) 1664 DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock) 1665 << "->" << getBasicBlockName(SUCC) << "\n"; 1666 break; 1667 } 1668 // Traverse df from SUCC, look for a branch block. 1669 std::string exitPath = getBasicBlockName(SUCC); 1670 for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC), 1671 BE = df_end(SUCC); BI != BE; ++BI) { 1672 MachineBasicBlock* SBB = *BI; 1673 // Reject paths with branch nodes. 1674 if (SBB->succ_size() > 1) { 1675 HasFastExitPath = false; 1676 break; 1677 } 1678 exitPath += "->" + getBasicBlockName(SBB); 1679 } 1680 if (HasFastExitPath) { 1681 if (ShrinkWrapDebugging >= BasicInfo) 1682 DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock) 1683 << "->" << exitPath << "\n"; 1684 break; 1685 } 1686 } 1687} 1688 1689/// verifySpillRestorePlacement - check the current spill/restore 1690/// sets for safety. Attempt to find spills without restores or 1691/// restores without spills. 1692/// Spills: walk df from each MBB in spill set ensuring that 1693/// all CSRs spilled at MMBB are restored on all paths 1694/// from MBB to all exit blocks. 1695/// Restores: walk idf from each MBB in restore set ensuring that 1696/// all CSRs restored at MBB are spilled on all paths 1697/// reaching MBB. 1698/// 1699void PEI::verifySpillRestorePlacement() { 1700 unsigned numReturnBlocks = 0; 1701 for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); 1702 MBBI != MBBE; ++MBBI) { 1703 MachineBasicBlock* MBB = MBBI; 1704 if (isReturnBlock(MBB) || MBB->succ_size() == 0) 1705 ++numReturnBlocks; 1706 } 1707 for (CSRegBlockMap::iterator BI = CSRSave.begin(), 1708 BE = CSRSave.end(); BI != BE; ++BI) { 1709 MachineBasicBlock* MBB = BI->first; 1710 CSRegSet spilled = BI->second; 1711 CSRegSet restored; 1712 1713 if (spilled.empty()) 1714 continue; 1715 1716 DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " 1717 << stringifyCSRegSet(spilled) 1718 << " RESTORE[" << getBasicBlockName(MBB) << "] = " 1719 << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; 1720 1721 if (CSRRestore[MBB].intersects(spilled)) { 1722 restored |= (CSRRestore[MBB] & spilled); 1723 } 1724 1725 // Walk depth first from MBB to find restores of all CSRs spilled at MBB: 1726 // we must find restores for all spills w/no intervening spills on all 1727 // paths from MBB to all return blocks. 1728 for (df_iterator<MachineBasicBlock*> BI = df_begin(MBB), 1729 BE = df_end(MBB); BI != BE; ++BI) { 1730 MachineBasicBlock* SBB = *BI; 1731 if (SBB == MBB) 1732 continue; 1733 // Stop when we encounter spills of any CSRs spilled at MBB that 1734 // have not yet been seen to be restored. 1735 if (CSRSave[SBB].intersects(spilled) && 1736 !restored.contains(CSRSave[SBB] & spilled)) 1737 break; 1738 // Collect the CSRs spilled at MBB that are restored 1739 // at this DF successor of MBB. 1740 if (CSRRestore[SBB].intersects(spilled)) 1741 restored |= (CSRRestore[SBB] & spilled); 1742 // If we are at a retun block, check that the restores 1743 // we have seen so far exhaust the spills at MBB, then 1744 // reset the restores. 1745 if (isReturnBlock(SBB) || SBB->succ_size() == 0) { 1746 if (restored != spilled) { 1747 CSRegSet notRestored = (spilled - restored); 1748 DOUT << MF->getFunction()->getName() << ": " 1749 << stringifyCSRegSet(notRestored) 1750 << " spilled at " << getBasicBlockName(MBB) 1751 << " are never restored on path to return " 1752 << getBasicBlockName(SBB) << "\n"; 1753 } 1754 restored.clear(); 1755 } 1756 } 1757 } 1758 1759 // Check restore placements. 1760 for (CSRegBlockMap::iterator BI = CSRRestore.begin(), 1761 BE = CSRRestore.end(); BI != BE; ++BI) { 1762 MachineBasicBlock* MBB = BI->first; 1763 CSRegSet restored = BI->second; 1764 CSRegSet spilled; 1765 1766 if (restored.empty()) 1767 continue; 1768 1769 DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " 1770 << stringifyCSRegSet(CSRSave[MBB]) 1771 << " RESTORE[" << getBasicBlockName(MBB) << "] = " 1772 << stringifyCSRegSet(restored) << "\n"; 1773 1774 if (CSRSave[MBB].intersects(restored)) { 1775 spilled |= (CSRSave[MBB] & restored); 1776 } 1777 // Walk inverse depth first from MBB to find spills of all 1778 // CSRs restored at MBB: 1779 for (idf_iterator<MachineBasicBlock*> BI = idf_begin(MBB), 1780 BE = idf_end(MBB); BI != BE; ++BI) { 1781 MachineBasicBlock* PBB = *BI; 1782 if (PBB == MBB) 1783 continue; 1784 // Stop when we encounter restores of any CSRs restored at MBB that 1785 // have not yet been seen to be spilled. 1786 if (CSRRestore[PBB].intersects(restored) && 1787 !spilled.contains(CSRRestore[PBB] & restored)) 1788 break; 1789 // Collect the CSRs restored at MBB that are spilled 1790 // at this DF predecessor of MBB. 1791 if (CSRSave[PBB].intersects(restored)) 1792 spilled |= (CSRSave[PBB] & restored); 1793 } 1794 if (spilled != restored) { 1795 CSRegSet notSpilled = (restored - spilled); 1796 DOUT << MF->getFunction()->getName() << ": " 1797 << stringifyCSRegSet(notSpilled) 1798 << " restored at " << getBasicBlockName(MBB) 1799 << " are never spilled\n"; 1800 } 1801 } 1802} 1803 1804// Debugging print methods. 1805std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) { 1806 std::ostringstream name; 1807 if (MBB) { 1808 if (MBB->getBasicBlock()) 1809 name << MBB->getBasicBlock()->getName(); 1810 else 1811 name << "_MBB_" << MBB->getNumber(); 1812 } 1813 return name.str(); 1814} 1815 1816std::string PEI::stringifyCSRegSet(const CSRegSet& s) { 1817 const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo(); 1818 const std::vector<CalleeSavedInfo> CSI = 1819 MF->getFrameInfo()->getCalleeSavedInfo(); 1820 1821 std::ostringstream srep; 1822 if (CSI.size() == 0) { 1823 srep << "[]"; 1824 return srep.str(); 1825 } 1826 srep << "["; 1827 CSRegSet::iterator I = s.begin(), E = s.end(); 1828 if (I != E) { 1829 unsigned reg = CSI[*I].getReg(); 1830 srep << TRI->getName(reg); 1831 for (++I; I != E; ++I) { 1832 reg = CSI[*I].getReg(); 1833 srep << ","; 1834 srep << TRI->getName(reg); 1835 } 1836 } 1837 srep << "]"; 1838 return srep.str(); 1839} 1840 1841void PEI::dumpSet(const CSRegSet& s) { 1842 DOUT << stringifyCSRegSet(s) << "\n"; 1843} 1844 1845void PEI::dumpUsed(MachineBasicBlock* MBB) { 1846 if (MBB) { 1847 DOUT << "CSRUsed[" << getBasicBlockName(MBB) << "] = " 1848 << stringifyCSRegSet(CSRUsed[MBB]) << "\n"; 1849 } 1850} 1851 1852void PEI::dumpAllUsed() { 1853 for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); 1854 MBBI != MBBE; ++MBBI) { 1855 MachineBasicBlock* MBB = MBBI; 1856 dumpUsed(MBB); 1857 } 1858} 1859 1860void PEI::dumpSets(MachineBasicBlock* MBB) { 1861 if (MBB) { 1862 DOUT << getBasicBlockName(MBB) << " | " 1863 << stringifyCSRegSet(CSRUsed[MBB]) << " | " 1864 << stringifyCSRegSet(AnticIn[MBB]) << " | " 1865 << stringifyCSRegSet(AnticOut[MBB]) << " | " 1866 << stringifyCSRegSet(AvailIn[MBB]) << " | " 1867 << stringifyCSRegSet(AvailOut[MBB]) << "\n"; 1868 } 1869} 1870 1871void PEI::dumpSets1(MachineBasicBlock* MBB) { 1872 if (MBB) { 1873 DOUT << getBasicBlockName(MBB) << " | " 1874 << stringifyCSRegSet(CSRUsed[MBB]) << " | " 1875 << stringifyCSRegSet(AnticIn[MBB]) << " | " 1876 << stringifyCSRegSet(AnticOut[MBB]) << " | " 1877 << stringifyCSRegSet(AvailIn[MBB]) << " | " 1878 << stringifyCSRegSet(AvailOut[MBB]) << " | " 1879 << stringifyCSRegSet(CSRSave[MBB]) << " | " 1880 << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; 1881 } 1882} 1883 1884void PEI::dumpAllSets() { 1885 for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); 1886 MBBI != MBBE; ++MBBI) { 1887 MachineBasicBlock* MBB = MBBI; 1888 dumpSets1(MBB); 1889 } 1890} 1891 1892void PEI::dumpSRSets() { 1893 for (MachineFunction::iterator MBB = MF->begin(), E = MF->end(); 1894 MBB != E; ++MBB) { 1895 if (! CSRSave[MBB].empty()) { 1896 DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " 1897 << stringifyCSRegSet(CSRSave[MBB]); 1898 if (CSRRestore[MBB].empty()) 1899 DOUT << "\n"; 1900 } 1901 if (! CSRRestore[MBB].empty()) { 1902 if (! CSRSave[MBB].empty()) 1903 DOUT << " "; 1904 DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = " 1905 << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; 1906 } 1907 } 1908} 1909#endif 1910