PrologEpilogInserter.cpp revision e5497d83bb451d19e4651c0bc257ccbcbb87fb10
1//===-- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass is responsible for finalizing the functions frame layout, saving
11// callee saved registers, and for emitting prolog & epilog code for the
12// function.
13//
14// This pass must be run after register allocation.  After this pass is
15// executed, it is illegal to construct MO_FrameIndex operands.
16//
17// This pass implements a shrink wrapping variant of prolog/epilog insertion:
18// - Places callee saved register (CSR) spills and restores in the CFG to
19//   tightly surround uses so that execution paths that do not use CSRs do not
20//   pay the spill/restore penalty.
21//
22// - Avoiding placment of spills/restores in loops: if a CSR is used inside a
23//   loop(nest), the spills are placed in the loop preheader, and restores are
24//   placed in the loop exit nodes (the successors of the loop _exiting_ nodes).
25//
26// - Covering paths without CSR uses: e.g. if a restore is placed in a join
27//   block, a matching spill is added to the end of all immediate predecessor
28//   blocks that are not reached by a spill. Similarly for saves placed in
29//   branch blocks.
30//
31// Shrink wrapping uses an analysis similar to the one in GVNPRE to determine
32// which basic blocks require callee-saved register save/restore code.
33//
34// This pass uses MachineDominators and MachineLoopInfo. Loop information
35// is used to prevent shrink wrapping of callee-saved register save/restore
36// code into loops.
37//
38//===----------------------------------------------------------------------===//
39
40#define DEBUG_TYPE "shrink-wrap"
41
42#include "llvm/CodeGen/Passes.h"
43#include "llvm/CodeGen/MachineDominators.h"
44#include "llvm/CodeGen/MachineLoopInfo.h"
45#include "llvm/CodeGen/MachineFunctionPass.h"
46#include "llvm/CodeGen/MachineInstr.h"
47#include "llvm/CodeGen/MachineFrameInfo.h"
48#include "llvm/CodeGen/MachineModuleInfo.h"
49#include "llvm/CodeGen/MachineRegisterInfo.h"
50#include "llvm/CodeGen/RegisterScavenging.h"
51#include "llvm/Target/TargetMachine.h"
52#include "llvm/Target/TargetRegisterInfo.h"
53#include "llvm/Target/TargetFrameInfo.h"
54#include "llvm/Target/TargetInstrInfo.h"
55#include "llvm/ADT/SparseBitVector.h"
56#include "llvm/ADT/DenseMap.h"
57#include "llvm/ADT/PostOrderIterator.h"
58#include "llvm/ADT/Statistic.h"
59#include "llvm/Support/CommandLine.h"
60#include "llvm/Support/Compiler.h"
61#include "llvm/Support/Debug.h"
62#include "llvm/ADT/STLExtras.h"
63#include "llvm/ADT/Statistic.h"
64#include <climits>
65#include <sstream>
66
67using namespace llvm;
68
69STATISTIC(numSRReduced, "Number of CSR spills+restores reduced.");
70
71// Shrink Wrapping:
72static cl::opt<bool>
73ShrinkWrapping("shrink-wrap",
74               cl::desc("Shrink wrap callee-saved register spills/restores"));
75
76// Shrink wrap only the specified function, a debugging aid.
77static cl::opt<std::string>
78ShrinkWrapFunc("shrink-wrap-func", cl::Hidden,
79               cl::desc("Shrink wrap the specified function"),
80               cl::value_desc("funcname"),
81               cl::init(""));
82
83// Debugging level for shrink wrapping.
84enum ShrinkWrapDebugLevel {
85  None, BasicInfo, Iterations, Details
86};
87
88static cl::opt<enum ShrinkWrapDebugLevel>
89ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden,
90  cl::desc("Print shrink wrapping debugging information"),
91  cl::values(
92    clEnumVal(None      , "disable debug output"),
93    clEnumVal(BasicInfo , "print basic DF sets"),
94    clEnumVal(Iterations, "print SR sets for each iteration"),
95    clEnumVal(Details   , "print all DF sets"),
96    clEnumValEnd));
97
98
99namespace {
100  struct VISIBILITY_HIDDEN PEI : public MachineFunctionPass {
101    static char ID;
102    PEI() : MachineFunctionPass(&ID) {}
103
104    const char *getPassName() const {
105      return "Prolog/Epilog Insertion & Frame Finalization";
106    }
107
108    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
109      AU.setPreservesCFG();
110      if (ShrinkWrapping || ShrinkWrapFunc != "") {
111        AU.addRequired<MachineLoopInfo>();
112        AU.addRequired<MachineDominatorTree>();
113      }
114      AU.addPreserved<MachineLoopInfo>();
115      AU.addPreserved<MachineDominatorTree>();
116      MachineFunctionPass::getAnalysisUsage(AU);
117    }
118
119    /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
120    /// frame indexes with appropriate references.
121    ///
122    bool runOnMachineFunction(MachineFunction &Fn) {
123      const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
124      RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
125
126      DEBUG(MF = &Fn);
127
128      // Get MachineModuleInfo so that we can track the construction of the
129      // frame.
130      if (MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>())
131        Fn.getFrameInfo()->setMachineModuleInfo(MMI);
132
133      // Allow the target machine to make some adjustments to the function
134      // e.g. UsedPhysRegs before calculateCalleeSavedRegisters.
135      TRI->processFunctionBeforeCalleeSavedScan(Fn, RS);
136
137      // Scan the function for modified callee saved registers and insert spill
138      // code for any callee saved registers that are modified.  Also calculate
139      // the MaxCallFrameSize and HasCalls variables for the function's frame
140      // information and eliminates call frame pseudo instructions.
141      calculateCalleeSavedRegisters(Fn);
142
143      // Determine placement of CSR spill/restore code:
144      //  - with shrink wrapping, place spills and restores to tightly
145      //    enclose regions in the Machine CFG of the function where
146      //    they are used. Without shrink wrapping
147      //  - default (no shrink wrapping), place all spills in the
148      //    entry block, all restores in return blocks.
149      placeCSRSpillsAndRestores(Fn);
150
151      // Add the code to save and restore the callee saved registers
152      insertCSRSpillsAndRestores(Fn);
153
154      // Allow the target machine to make final modifications to the function
155      // before the frame layout is finalized.
156      TRI->processFunctionBeforeFrameFinalized(Fn);
157
158      // Calculate actual frame offsets for all abstract stack objects...
159      calculateFrameObjectOffsets(Fn);
160
161      // Add prolog and epilog code to the function.  This function is required
162      // to align the stack frame as necessary for any stack variables or
163      // called functions.  Because of this, calculateCalleeSavedRegisters
164      // must be called before this function in order to set the HasCalls
165      // and MaxCallFrameSize variables.
166      insertPrologEpilogCode(Fn);
167
168      // Replace all MO_FrameIndex operands with physical register references
169      // and actual offsets.
170      //
171      replaceFrameIndices(Fn);
172
173      delete RS;
174      clearAllSets();
175      return true;
176    }
177
178  private:
179    RegScavenger *RS;
180
181    // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
182    // stack frame indexes.
183    unsigned MinCSFrameIndex, MaxCSFrameIndex;
184
185    // Analysis info for spill/restore placement.
186    // "CSR": "callee saved register".
187
188    // CSRegSet contains indices into the Callee Saved Register Info
189    // vector built by calculateCalleeSavedRegisters() and accessed
190    // via MF.getFrameInfo()->getCalleeSavedInfo().
191    typedef SparseBitVector<> CSRegSet;
192
193    // CSRegBlockMap maps MachineBasicBlocks to sets of callee
194    // saved register indices.
195    typedef DenseMap<MachineBasicBlock*, CSRegSet> CSRegBlockMap;
196
197    // Set and maps for computing CSR spill/restore placement:
198    //  used in function (UsedCSRegs)
199    //  used in a basic block (CSRUsed)
200    //  anticipatable in a basic block (Antic{In,Out})
201    //  available in a basic block (Avail{In,Out})
202    //  to be spilled at the entry to a basic block (CSRSave)
203    //  to be restored at the end of a basic block (CSRRestore)
204    CSRegSet UsedCSRegs;
205    CSRegBlockMap CSRUsed;
206    CSRegBlockMap AnticIn, AnticOut;
207    CSRegBlockMap AvailIn, AvailOut;
208    CSRegBlockMap CSRSave;
209    CSRegBlockMap CSRRestore;
210
211    // Entry and return blocks of the current function.
212    MachineBasicBlock* EntryBlock;
213    SmallVector<MachineBasicBlock*, 4> ReturnBlocks;
214
215    // Map of MBBs to top level MachineLoops.
216    DenseMap<MachineBasicBlock*, MachineLoop*> TLLoops;
217
218    // Flag to control shrink wrapping per-function:
219    // may choose to skip shrink wrapping for certain
220    // functions.
221    bool ShrinkWrapThisFunction;
222
223#ifndef NDEBUG
224    // Machine function handle.
225    MachineFunction* MF;
226
227    // Flag indicating that the current function
228    // has at least one "short" path in the machine
229    // CFG from the entry block to an exit block.
230    bool HasFastExitPath;
231#endif
232
233    bool calculateSets(MachineFunction &Fn);
234    bool calcAnticInOut(MachineBasicBlock* MBB);
235    bool calcAvailInOut(MachineBasicBlock* MBB);
236    void calculateAnticAvail(MachineFunction &Fn);
237    bool addUsesForMEMERegion(MachineBasicBlock* MBB,
238                              SmallVector<MachineBasicBlock*, 4>& blks);
239    bool addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks);
240    bool calcSpillPlacements(MachineBasicBlock* MBB,
241                             SmallVector<MachineBasicBlock*, 4> &blks,
242                             CSRegBlockMap &prevSpills);
243    bool calcRestorePlacements(MachineBasicBlock* MBB,
244                               SmallVector<MachineBasicBlock*, 4> &blks,
245                               CSRegBlockMap &prevRestores);
246    void placeSpillsAndRestores(MachineFunction &Fn);
247    void placeCSRSpillsAndRestores(MachineFunction &Fn);
248    void calculateCalleeSavedRegisters(MachineFunction &Fn);
249    void insertCSRSpillsAndRestores(MachineFunction &Fn);
250    void calculateFrameObjectOffsets(MachineFunction &Fn);
251    void replaceFrameIndices(MachineFunction &Fn);
252    void insertPrologEpilogCode(MachineFunction &Fn);
253
254    // Initialize DFA sets, called before iterations.
255    void clearAnticAvailSets();
256    // Clear all sets constructed by shrink wrapping.
257    void clearAllSets();
258
259    // Initialize all shrink wrapping data.
260    void initShrinkWrappingInfo();
261
262    // Convienences for dealing with machine loops.
263    MachineBasicBlock* getTopLevelLoopPreheader(MachineLoop* LP) {
264      assert(LP && "Machine loop is NULL.");
265      MachineBasicBlock* PHDR = LP->getLoopPreheader();
266      MachineLoop* PLP = LP->getParentLoop();
267      while (PLP) {
268        PHDR = PLP->getLoopPreheader();
269        PLP = PLP->getParentLoop();
270      }
271      return PHDR;
272    }
273
274    MachineLoop* getTopLevelLoopParent(MachineLoop *LP) {
275      if (LP == 0)
276        return 0;
277      MachineLoop* PLP = LP->getParentLoop();
278      while (PLP) {
279        LP = PLP;
280        PLP = PLP->getParentLoop();
281      }
282      return LP;
283    }
284
285    // Propgate CSRs used in MBB to all MBBs of loop LP.
286    void propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP);
287
288    // Convenience for recognizing return blocks.
289    bool isReturnBlock(MachineBasicBlock* MBB) {
290      return (MBB && !MBB->empty() && MBB->back().getDesc().isReturn());
291    }
292
293#ifndef NDEBUG
294    // Debugging methods.
295
296    // Mark this function as having fast exit paths.
297    void findFastExitPath();
298
299    // Verify placement of spills/restores.
300    void verifySpillRestorePlacement();
301
302    std::string getBasicBlockName(const MachineBasicBlock* MBB);
303    std::string stringifyCSRegSet(const CSRegSet& s);
304    void dumpSet(const CSRegSet& s);
305    void dumpUsed(MachineBasicBlock* MBB);
306    void dumpAllUsed();
307    void dumpSets(MachineBasicBlock* MBB);
308    void dumpSets1(MachineBasicBlock* MBB);
309    void dumpAllSets();
310    void dumpSRSets();
311#endif
312
313  };
314  char PEI::ID = 0;
315}
316
317// Initialize shrink wrapping DFA sets, called before iterations.
318void PEI::clearAnticAvailSets() {
319  AnticIn.clear();
320  AnticOut.clear();
321  AvailIn.clear();
322  AvailOut.clear();
323}
324
325// Clear all sets constructed by shrink wrapping.
326void PEI::clearAllSets() {
327  ReturnBlocks.clear();
328  clearAnticAvailSets();
329  UsedCSRegs.clear();
330  CSRUsed.clear();
331  TLLoops.clear();
332  CSRSave.clear();
333  CSRRestore.clear();
334}
335
336// Initialize all shrink wrapping data.
337void PEI::initShrinkWrappingInfo() {
338  clearAllSets();
339  EntryBlock = 0;
340  HasFastExitPath = false;
341  ShrinkWrapThisFunction = ShrinkWrapping;
342  // DEBUG: enable or disable shrink wrapping for the current function
343  // via --shrink-wrap-func=<funcname>.
344#ifndef NDEBUG
345  if (ShrinkWrapFunc != "") {
346    std::string MFName = MF->getFunction()->getName();
347    ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc);
348  }
349#endif
350}
351
352
353/// createPrologEpilogCodeInserter - This function returns a pass that inserts
354/// prolog and epilog code, and eliminates abstract frame references.
355///
356FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); }
357
358/// placeCSRSpillsAndRestores - determine which MBBs of the function
359/// need save, restore code for callee-saved registers by doing a DF analysis
360/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs
361/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo
362/// is used to ensure that CSR save/restore code is not placed inside loops.
363/// This function computes the maps of MBBs -> CSRs to spill and restore
364/// in CSRSave, CSRRestore.
365///
366/// If shrink wrapping is not being performed, place all spills in
367/// the entry block, all restores in return blocks. In this case,
368/// CSRSave has a single mapping, CSRRestore has mappings for each
369/// return block.
370///
371void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) {
372
373  initShrinkWrappingInfo();
374
375  DEBUG(if (ShrinkWrapThisFunction) {
376      DOUT << "Place CSR spills/restores for "
377           << MF->getFunction()->getName() << "\n";
378    });
379
380  if (calculateSets(Fn))
381    placeSpillsAndRestores(Fn);
382}
383
384/// calcAnticInOut - calculate the anticipated in/out reg sets
385/// for the given MBB by looking forward in the MCFG at MBB's
386/// successors.
387///
388bool PEI::calcAnticInOut(MachineBasicBlock* MBB) {
389  bool changed = false;
390
391  // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB))
392  SmallVector<MachineBasicBlock*, 4> successors;
393  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
394         SE = MBB->succ_end(); SI != SE; ++SI) {
395    MachineBasicBlock* SUCC = *SI;
396    if (SUCC != MBB)
397      successors.push_back(SUCC);
398  }
399
400  unsigned i = 0, e = successors.size();
401  if (i != e) {
402    CSRegSet prevAnticOut = AnticOut[MBB];
403    MachineBasicBlock* SUCC = successors[i];
404
405    AnticOut[MBB] = AnticIn[SUCC];
406    for (++i; i != e; ++i) {
407      SUCC = successors[i];
408      AnticOut[MBB] &= AnticIn[SUCC];
409    }
410    if (prevAnticOut != AnticOut[MBB])
411      changed = true;
412  }
413
414  // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]);
415  CSRegSet prevAnticIn = AnticIn[MBB];
416  AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB];
417  if (prevAnticIn |= AnticIn[MBB])
418    changed = true;
419  return changed;
420}
421
422/// calcAvailInOut - calculate the available in/out reg sets
423/// for the given MBB by looking backward in the MCFG at MBB's
424/// predecessors.
425///
426bool PEI::calcAvailInOut(MachineBasicBlock* MBB) {
427  bool changed = false;
428
429  // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB))
430  SmallVector<MachineBasicBlock*, 4> predecessors;
431  for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
432         PE = MBB->pred_end(); PI != PE; ++PI) {
433    MachineBasicBlock* PRED = *PI;
434    if (PRED != MBB)
435      predecessors.push_back(PRED);
436  }
437
438  unsigned i = 0, e = predecessors.size();
439  if (i != e) {
440    CSRegSet prevAvailIn = AvailIn[MBB];
441    MachineBasicBlock* PRED = predecessors[i];
442
443    AvailIn[MBB] = AvailOut[PRED];
444    for (++i; i != e; ++i) {
445      PRED = predecessors[i];
446      AvailIn[MBB] &= AvailOut[PRED];
447    }
448    if (prevAvailIn != AvailIn[MBB])
449      changed = true;
450  }
451
452  // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]);
453  CSRegSet prevAvailOut = AvailOut[MBB];
454  AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB];
455  if (prevAvailOut |= AvailOut[MBB])
456    changed = true;
457  return changed;
458}
459
460/// calculateAnticAvail - build the sets anticipated and available
461/// registers in the MCFG of the current function iteratively,
462/// doing a combined forward and backward analysis.
463///
464void PEI::calculateAnticAvail(MachineFunction &Fn) {
465  // Initialize data flow sets.
466  clearAnticAvailSets();
467
468  // Calulate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG.
469  bool changed = true;
470  unsigned iterations = 0;
471  while (changed) {
472    changed = false;
473    ++iterations;
474    for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
475         MBBI != MBBE; ++MBBI) {
476      MachineBasicBlock* MBB = MBBI;
477
478      // Calculate anticipated in, out regs at MBB from
479      // anticipated at successors of MBB.
480      changed |= calcAnticInOut(MBB);
481
482      // Calculate available in, out regs at MBB from
483      // available at predecessors of MBB.
484      changed |= calcAvailInOut(MBB);
485    }
486  }
487
488  DEBUG(if (ShrinkWrapDebugging >= Details) {
489      DOUT << "-----------------------------------------------------------\n";
490      DOUT << " Antic/Avail Sets:\n";
491      DOUT << "-----------------------------------------------------------\n";
492      DOUT << "iterations = " << iterations << "\n";
493      DOUT << "-----------------------------------------------------------\n";
494      DOUT << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n";
495      DOUT << "-----------------------------------------------------------\n";
496      for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
497           MBBI != MBBE; ++MBBI) {
498        MachineBasicBlock* MBB = MBBI;
499        dumpSets(MBB);
500      }
501      DOUT << "-----------------------------------------------------------\n";
502    });
503}
504
505/// propagateUsesAroundLoop - copy used register info from MBB to all blocks
506/// of the loop given by LP and its parent loops. This prevents spills/restores
507/// from being placed in the bodies of loops.
508///
509void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) {
510  if (! MBB || !LP)
511    return;
512
513  std::vector<MachineBasicBlock*> loopBlocks = LP->getBlocks();
514  for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) {
515    MachineBasicBlock* LBB = loopBlocks[i];
516    if (LBB == MBB)
517      continue;
518    if (CSRUsed[LBB].contains(CSRUsed[MBB]))
519      continue;
520    CSRUsed[LBB] |= CSRUsed[MBB];
521  }
522}
523
524/// calculateSets - collect the CSRs used in this function, compute
525/// the DF sets that describe the initial minimal regions in the
526/// Machine CFG around which CSR spills and restores must be placed.
527///
528/// Additionally, this function decides if shrink wrapping should
529/// be disabled for the current function, checking the following:
530///  1. the current function has more than 500 MBBs: heuristic limit
531///     on function size to reduce compile time impact of the current
532///     iterative algorithm.
533///  2. all CSRs are used in the entry block.
534///  3. all CSRs are used in all immediate successors of the entry block.
535///  4. all CSRs are used in a subset of blocks, each of which dominates
536///     all return blocks. These blocks, taken as a subgraph of the MCFG,
537///     are equivalent to the entry block since all execution paths pass
538///     through them.
539///
540bool PEI::calculateSets(MachineFunction &Fn) {
541  // Sets used to compute spill, restore placement sets.
542  const std::vector<CalleeSavedInfo> CSI =
543    Fn.getFrameInfo()->getCalleeSavedInfo();
544
545  // If no CSRs used, we are done.
546  if (CSI.empty()) {
547    DEBUG(if (ShrinkWrapThisFunction)
548            DOUT << "DISABLED: " << Fn.getFunction()->getName()
549                 << ": uses no callee-saved registers\n");
550    return false;
551  }
552
553  // Save refs to entry and return blocks.
554  EntryBlock = Fn.begin();
555  for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end();
556       MBB != E; ++MBB)
557    if (isReturnBlock(MBB))
558      ReturnBlocks.push_back(MBB);
559
560  // Determine if this function has fast exit paths.
561  DEBUG(if (ShrinkWrapThisFunction)
562          findFastExitPath());
563
564  // Limit shrink wrapping via the current iterative bit vector
565  // implementation to functions with <= 500 MBBs.
566  if (Fn.size() > 500) {
567    DEBUG(if (ShrinkWrapThisFunction)
568            DOUT << "DISABLED: " << Fn.getFunction()->getName()
569                 << ": too large (" << Fn.size() << " MBBs)\n");
570    ShrinkWrapThisFunction = false;
571  }
572
573  // Return now if not shrink wrapping.
574  if (! ShrinkWrapThisFunction)
575    return false;
576
577  // Collect set of used CSRs.
578  for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
579    UsedCSRegs.set(inx);
580  }
581
582  // Walk instructions in all MBBs, create CSRUsed[] sets, choose
583  // whether or not to shrink wrap this function.
584  MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
585  MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
586  const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
587
588  bool allCSRUsesInEntryBlock = true;
589  for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
590       MBBI != MBBE; ++MBBI) {
591    MachineBasicBlock* MBB = MBBI;
592    for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) {
593      for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
594        unsigned Reg = CSI[inx].getReg();
595        // If instruction I reads or modifies Reg, add it to UsedCSRegs,
596        // CSRUsed map for the current block.
597        for (unsigned opInx = 0, opEnd = I->getNumOperands();
598             opInx != opEnd; ++opInx) {
599          const MachineOperand &MO = I->getOperand(opInx);
600          if (! (MO.isReg() && (MO.isUse() || MO.isDef())))
601            continue;
602          unsigned MOReg = MO.getReg();
603          if (!MOReg)
604            continue;
605          if (MOReg == Reg ||
606              (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
607               TargetRegisterInfo::isPhysicalRegister(Reg) &&
608               TRI->isSubRegister(Reg, MOReg))) {
609            // CSR Reg is defined/used in block MBB.
610            CSRUsed[MBB].set(inx);
611            // Check for uses in EntryBlock.
612            if (MBB != EntryBlock)
613              allCSRUsesInEntryBlock = false;
614          }
615        }
616      }
617    }
618
619    if (CSRUsed[MBB].empty())
620      continue;
621
622    // Propagate CSRUsed[MBB] in loops
623    if (MachineLoop* LP = LI.getLoopFor(MBB)) {
624      // Add top level loop to work list.
625      MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP);
626      MachineLoop* PLP = getTopLevelLoopParent(LP);
627
628      if (! HDR) {
629        HDR = PLP->getHeader();
630        assert(HDR->pred_size() > 0 && "Loop header has no predecessors?");
631        MachineBasicBlock::pred_iterator PI = HDR->pred_begin();
632        HDR = *PI;
633      }
634      TLLoops[HDR] = PLP;
635
636      // Push uses from inside loop to its parent loops,
637      // or to all other MBBs in its loop.
638      if (LP->getLoopDepth() > 1) {
639        for (MachineLoop* PLP = LP->getParentLoop(); PLP;
640             PLP = PLP->getParentLoop()) {
641          propagateUsesAroundLoop(MBB, PLP);
642        }
643      } else {
644        propagateUsesAroundLoop(MBB, LP);
645      }
646    }
647  }
648
649  if (allCSRUsesInEntryBlock) {
650    DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName()
651          << ": all CSRs used in EntryBlock\n");
652    ShrinkWrapThisFunction = false;
653  } else {
654    bool allCSRsUsedInEntryFanout = true;
655    for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
656           SE = EntryBlock->succ_end(); SI != SE; ++SI) {
657      MachineBasicBlock* SUCC = *SI;
658      if (CSRUsed[SUCC] != UsedCSRegs)
659        allCSRsUsedInEntryFanout = false;
660    }
661    if (allCSRsUsedInEntryFanout) {
662      DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName()
663            << ": all CSRs used in imm successors of EntryBlock\n");
664      ShrinkWrapThisFunction = false;
665    }
666  }
667
668  if (ShrinkWrapThisFunction) {
669    // Check if MBB uses CSRs and dominates all exit nodes.
670    // Such nodes are equiv. to the entry node w.r.t.
671    // CSR uses: every path through the function must
672    // pass through this node. If each CSR is used at least
673    // once by these nodes, shrink wrapping is disabled.
674    CSRegSet CSRUsedInChokePoints;
675    for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
676         MBBI != MBBE; ++MBBI) {
677      MachineBasicBlock* MBB = MBBI;
678      if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1)
679        continue;
680      bool dominatesExitNodes = true;
681      for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
682        if (! DT.dominates(MBB, ReturnBlocks[ri])) {
683          dominatesExitNodes = false;
684          break;
685        }
686      if (dominatesExitNodes) {
687        CSRUsedInChokePoints |= CSRUsed[MBB];
688        if (CSRUsedInChokePoints == UsedCSRegs) {
689          DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName()
690                << ": all CSRs used in choke point(s) at "
691                << getBasicBlockName(MBB) << "\n");
692          ShrinkWrapThisFunction = false;
693          break;
694        }
695      }
696    }
697  }
698
699  // Return now if we have decided not to apply shrink wrapping
700  // to the current function.
701  if (! ShrinkWrapThisFunction)
702    return false;
703
704  DEBUG({
705      DOUT << "ENABLED: " << Fn.getFunction()->getName();
706      if (HasFastExitPath)
707        DOUT << " (fast exit path)";
708      DOUT << "\n";
709      if (ShrinkWrapDebugging >= BasicInfo) {
710        DOUT << "------------------------------"
711             << "-----------------------------\n";
712        DOUT << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n";
713        if (ShrinkWrapDebugging >= Details) {
714          DOUT << "------------------------------"
715               << "-----------------------------\n";
716          dumpAllUsed();
717        }
718      }
719    });
720
721  // Build initial DF sets to determine minimal regions in the
722  // Machine CFG around which CSRs must be spilled and restored.
723  calculateAnticAvail(Fn);
724
725  return true;
726}
727
728/// addUsesForMEMERegion - add uses of CSRs spilled or restored in
729/// multi-entry, multi-exit (MEME) regions so spill and restore
730/// placement will not break code that enters or leaves a
731/// shrink-wrapped region by inducing spills with no matching
732/// restores or restores with no matching spills. A MEME region
733/// is a subgraph of the MCFG with multiple entry edges, multiple
734/// exit edges, or both. This code propagates use information
735/// through the MCFG until all paths requiring spills and restores
736/// _outside_ the computed minimal placement regions have been covered.
737///
738bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB,
739                               SmallVector<MachineBasicBlock*, 4>& blks) {
740  if (MBB->succ_size() < 2 && MBB->pred_size() < 2) {
741    bool processThisBlock = false;
742    for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
743           SE = MBB->succ_end(); SI != SE; ++SI) {
744      MachineBasicBlock* SUCC = *SI;
745      if (SUCC->pred_size() > 1) {
746        processThisBlock = true;
747        break;
748      }
749    }
750    if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) {
751      for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
752             PE = MBB->pred_end(); PI != PE; ++PI) {
753        MachineBasicBlock* PRED = *PI;
754        if (PRED->succ_size() > 1) {
755          processThisBlock = true;
756          break;
757        }
758      }
759    }
760    if (! processThisBlock)
761      return false;
762  }
763
764  CSRegSet prop;
765  if (!CSRSave[MBB].empty())
766    prop = CSRSave[MBB];
767  else if (!CSRRestore[MBB].empty())
768    prop = CSRRestore[MBB];
769  else
770    prop = CSRUsed[MBB];
771  if (prop.empty())
772    return false;
773
774  // Propagate selected bits to successors, predecessors of MBB.
775  bool addedUses = false;
776  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
777         SE = MBB->succ_end(); SI != SE; ++SI) {
778    MachineBasicBlock* SUCC = *SI;
779    // Self-loop
780    if (SUCC == MBB)
781      continue;
782    if (! CSRUsed[SUCC].contains(prop)) {
783      CSRUsed[SUCC] |= prop;
784      addedUses = true;
785      blks.push_back(SUCC);
786      DEBUG(if (ShrinkWrapDebugging >= Iterations)
787              DOUT << getBasicBlockName(MBB)
788                   << "(" << stringifyCSRegSet(prop) << ")->"
789                   << "successor " << getBasicBlockName(SUCC) << "\n");
790    }
791  }
792  for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
793         PE = MBB->pred_end(); PI != PE; ++PI) {
794    MachineBasicBlock* PRED = *PI;
795    // Self-loop
796    if (PRED == MBB)
797      continue;
798    if (! CSRUsed[PRED].contains(prop)) {
799      CSRUsed[PRED] |= prop;
800      addedUses = true;
801      blks.push_back(PRED);
802      DEBUG(if (ShrinkWrapDebugging >= Iterations)
803              DOUT << getBasicBlockName(MBB)
804                   << "(" << stringifyCSRegSet(prop) << ")->"
805                   << "predecessor " << getBasicBlockName(PRED) << "\n");
806    }
807  }
808  return addedUses;
809}
810
811/// addUsesForTopLevelLoops - add uses for CSRs used inside top
812/// level loops to the exit blocks of those loops.
813///
814bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) {
815  bool addedUses = false;
816
817  // Place restores for top level loops where needed.
818  for (DenseMap<MachineBasicBlock*, MachineLoop*>::iterator
819         I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) {
820    MachineBasicBlock* MBB = I->first;
821    MachineLoop* LP = I->second;
822    MachineBasicBlock* HDR = LP->getHeader();
823    SmallVector<MachineBasicBlock*, 4> exitBlocks;
824    CSRegSet loopSpills;
825
826    loopSpills = CSRSave[MBB];
827    if (CSRSave[MBB].empty()) {
828      loopSpills = CSRUsed[HDR];
829      assert(!loopSpills.empty() && "No CSRs used in loop?");
830    } else if (CSRRestore[MBB].contains(CSRSave[MBB]))
831      continue;
832
833    LP->getExitBlocks(exitBlocks);
834    assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?");
835    for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) {
836      MachineBasicBlock* EXB = exitBlocks[i];
837      if (! CSRUsed[EXB].contains(loopSpills)) {
838        CSRUsed[EXB] |= loopSpills;
839        addedUses = true;
840        DEBUG(if (ShrinkWrapDebugging >= Iterations)
841                DOUT << "LOOP " << getBasicBlockName(MBB)
842                     << "(" << stringifyCSRegSet(loopSpills) << ")->"
843                     << getBasicBlockName(EXB) << "\n");
844        if (EXB->succ_size() > 1 || EXB->pred_size() > 1)
845          blks.push_back(EXB);
846      }
847    }
848  }
849  return addedUses;
850}
851
852/// calcSpillPlacements - determine which CSRs should be spilled
853/// in MBB using AnticIn sets of MBB's predecessors, keeping track
854/// of changes to spilled reg sets. Add MBB to the set of blocks
855/// that need to be processed for propagating use info to cover
856/// multi-entry/exit regions.
857///
858bool PEI::calcSpillPlacements(MachineBasicBlock* MBB,
859                              SmallVector<MachineBasicBlock*, 4> &blks,
860                              CSRegBlockMap &prevSpills) {
861  bool placedSpills = false;
862  // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB)
863  CSRegSet anticInPreds;
864  SmallVector<MachineBasicBlock*, 4> predecessors;
865  for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
866         PE = MBB->pred_end(); PI != PE; ++PI) {
867    MachineBasicBlock* PRED = *PI;
868    if (PRED != MBB)
869      predecessors.push_back(PRED);
870  }
871  unsigned i = 0, e = predecessors.size();
872  if (i != e) {
873    MachineBasicBlock* PRED = predecessors[i];
874    anticInPreds = UsedCSRegs - AnticIn[PRED];
875    for (++i; i != e; ++i) {
876      PRED = predecessors[i];
877      anticInPreds &= (UsedCSRegs - AnticIn[PRED]);
878    }
879  } else {
880    // Handle uses in entry blocks (which have no predecessors).
881    // This is necessary because the DFA formulation assumes the
882    // entry and (multiple) exit nodes cannot have CSR uses, which
883    // is not the case in the real world.
884    anticInPreds = UsedCSRegs;
885  }
886  // Compute spills required at MBB:
887  CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds;
888
889  if (! CSRSave[MBB].empty()) {
890    if (MBB == EntryBlock) {
891      for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
892        CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB];
893    } else {
894      // Reset all regs spilled in MBB that are also spilled in EntryBlock.
895      if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) {
896        CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock];
897      }
898    }
899  }
900  placedSpills = (CSRSave[MBB] != prevSpills[MBB]);
901  prevSpills[MBB] = CSRSave[MBB];
902  // Remember this block for adding restores to successor
903  // blocks for multi-entry region.
904  if (placedSpills)
905    blks.push_back(MBB);
906
907  DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations)
908          DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
909               << stringifyCSRegSet(CSRSave[MBB]) << "\n");
910
911  return placedSpills;
912}
913
914/// calcRestorePlacements - determine which CSRs should be restored
915/// in MBB using AvailOut sets of MBB's succcessors, keeping track
916/// of changes to restored reg sets. Add MBB to the set of blocks
917/// that need to be processed for propagating use info to cover
918/// multi-entry/exit regions.
919///
920bool PEI::calcRestorePlacements(MachineBasicBlock* MBB,
921                                SmallVector<MachineBasicBlock*, 4> &blks,
922                                CSRegBlockMap &prevRestores) {
923  bool placedRestores = false;
924  // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB)
925  CSRegSet availOutSucc;
926  SmallVector<MachineBasicBlock*, 4> successors;
927  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
928         SE = MBB->succ_end(); SI != SE; ++SI) {
929    MachineBasicBlock* SUCC = *SI;
930    if (SUCC != MBB)
931      successors.push_back(SUCC);
932  }
933  unsigned i = 0, e = successors.size();
934  if (i != e) {
935    MachineBasicBlock* SUCC = successors[i];
936    availOutSucc = UsedCSRegs - AvailOut[SUCC];
937    for (++i; i != e; ++i) {
938      SUCC = successors[i];
939      availOutSucc &= (UsedCSRegs - AvailOut[SUCC]);
940    }
941  } else {
942    if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) {
943      // Handle uses in return blocks (which have no successors).
944      // This is necessary because the DFA formulation assumes the
945      // entry and (multiple) exit nodes cannot have CSR uses, which
946      // is not the case in the real world.
947      availOutSucc = UsedCSRegs;
948    }
949  }
950  // Compute restores required at MBB:
951  CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc;
952
953  // Postprocess restore placements at MBB.
954  // Remove the CSRs that are restored in the return blocks.
955  // Lest this be confusing, note that:
956  // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks.
957  if (MBB->succ_size() && ! CSRRestore[MBB].empty()) {
958    if (! CSRSave[EntryBlock].empty())
959      CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock];
960  }
961  placedRestores = (CSRRestore[MBB] != prevRestores[MBB]);
962  prevRestores[MBB] = CSRRestore[MBB];
963  // Remember this block for adding saves to predecessor
964  // blocks for multi-entry region.
965  if (placedRestores)
966    blks.push_back(MBB);
967
968  DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations)
969          DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = "
970               << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
971
972  return placedRestores;
973}
974
975/// placeSpillsAndRestores - place spills and restores of CSRs
976/// used in MBBs in minimal regions that contain the uses.
977///
978void PEI::placeSpillsAndRestores(MachineFunction &Fn) {
979  CSRegBlockMap prevCSRSave;
980  CSRegBlockMap prevCSRRestore;
981  SmallVector<MachineBasicBlock*, 4> cvBlocks, ncvBlocks;
982  bool changed = true;
983  unsigned iterations = 0;
984
985  // Iterate computation of spill and restore placements in the MCFG until:
986  //   1. CSR use info has been fully propagated around the MCFG, and
987  //   2. computation of CSRSave[], CSRRestore[] reach fixed points.
988  while (changed) {
989    changed = false;
990    ++iterations;
991
992    DEBUG(if (ShrinkWrapDebugging >= Iterations)
993            DOUT << "iter " << iterations
994                 << " --------------------------------------------------\n");
995
996    // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG,
997    // which determines the placements of spills and restores.
998    // Keep track of changes to spills, restores in each iteration to
999    // minimize the total iterations.
1000    bool SRChanged = false;
1001    for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
1002         MBBI != MBBE; ++MBBI) {
1003      MachineBasicBlock* MBB = MBBI;
1004
1005      // Place spills for CSRs in MBB.
1006      SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave);
1007
1008      // Place restores for CSRs in MBB.
1009      SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore);
1010    }
1011
1012    // Add uses of CSRs used inside loops where needed.
1013    changed |= addUsesForTopLevelLoops(cvBlocks);
1014
1015    // Add uses for CSRs spilled or restored at branch, join points.
1016    if (changed || SRChanged) {
1017      while (! cvBlocks.empty()) {
1018        MachineBasicBlock* MBB = cvBlocks.pop_back_val();
1019        changed |= addUsesForMEMERegion(MBB, ncvBlocks);
1020      }
1021      if (! ncvBlocks.empty()) {
1022        cvBlocks = ncvBlocks;
1023        ncvBlocks.clear();
1024      }
1025    }
1026
1027    if (changed) {
1028      calculateAnticAvail(Fn);
1029      CSRSave.clear();
1030      CSRRestore.clear();
1031    }
1032  }
1033
1034  // Check for effectiveness:
1035  //  SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks}
1036  //  numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock]
1037  // Gives a measure of how many CSR spills have been moved from EntryBlock
1038  // to minimal regions enclosing their uses.
1039  CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]);
1040  unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count();
1041  numSRReduced += numSRReducedThisFunc;
1042  DEBUG(if (ShrinkWrapDebugging >= BasicInfo) {
1043      DOUT << "-----------------------------------------------------------\n";
1044      DOUT << "total iterations = " << iterations << " ( "
1045           << Fn.getFunction()->getName()
1046           << " " << numSRReducedThisFunc
1047           << " " << Fn.size()
1048           << " )\n";
1049      DOUT << "-----------------------------------------------------------\n";
1050      dumpSRSets();
1051      DOUT << "-----------------------------------------------------------\n";
1052      if (numSRReducedThisFunc)
1053        verifySpillRestorePlacement();
1054    });
1055}
1056
1057/// calculateCalleeSavedRegisters - Scan the function for modified callee saved
1058/// registers.  Also calculate the MaxCallFrameSize and HasCalls variables for
1059/// the function's frame information and eliminates call frame pseudo
1060/// instructions.
1061///
1062void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
1063  const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
1064  const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo();
1065
1066  // Get the callee saved register list...
1067  const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
1068
1069  // Get the function call frame set-up and tear-down instruction opcode
1070  int FrameSetupOpcode   = RegInfo->getCallFrameSetupOpcode();
1071  int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode();
1072
1073  // These are used to keep track the callee-save area. Initialize them.
1074  MinCSFrameIndex = INT_MAX;
1075  MaxCSFrameIndex = 0;
1076
1077  // Early exit for targets which have no callee saved registers and no call
1078  // frame setup/destroy pseudo instructions.
1079  if ((CSRegs == 0 || CSRegs[0] == 0) &&
1080      FrameSetupOpcode == -1 && FrameDestroyOpcode == -1)
1081    return;
1082
1083  unsigned MaxCallFrameSize = 0;
1084  bool HasCalls = false;
1085
1086  std::vector<MachineBasicBlock::iterator> FrameSDOps;
1087  for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB)
1088    for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
1089      if (I->getOpcode() == FrameSetupOpcode ||
1090          I->getOpcode() == FrameDestroyOpcode) {
1091        assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo"
1092               " instructions should have a single immediate argument!");
1093        unsigned Size = I->getOperand(0).getImm();
1094        if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
1095        HasCalls = true;
1096        FrameSDOps.push_back(I);
1097      }
1098
1099  MachineFrameInfo *FFI = Fn.getFrameInfo();
1100  FFI->setHasCalls(HasCalls);
1101  FFI->setMaxCallFrameSize(MaxCallFrameSize);
1102
1103  for (unsigned i = 0, e = FrameSDOps.size(); i != e; ++i) {
1104    MachineBasicBlock::iterator I = FrameSDOps[i];
1105    // If call frames are not being included as part of the stack frame,
1106    // and there is no dynamic allocation (therefore referencing frame slots
1107    // off sp), leave the pseudo ops alone. We'll eliminate them later.
1108    if (RegInfo->hasReservedCallFrame(Fn) || RegInfo->hasFP(Fn))
1109      RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
1110  }
1111
1112  // Now figure out which *callee saved* registers are modified by the current
1113  // function, thus needing to be saved and restored in the prolog/epilog.
1114  //
1115  const TargetRegisterClass* const *CSRegClasses =
1116    RegInfo->getCalleeSavedRegClasses(&Fn);
1117  std::vector<CalleeSavedInfo> CSI;
1118  for (unsigned i = 0; CSRegs[i]; ++i) {
1119    unsigned Reg = CSRegs[i];
1120    if (Fn.getRegInfo().isPhysRegUsed(Reg)) {
1121        // If the reg is modified, save it!
1122      CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i]));
1123    } else {
1124      for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg);
1125           *AliasSet; ++AliasSet) {  // Check alias registers too.
1126        if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) {
1127          CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i]));
1128          break;
1129        }
1130      }
1131    }
1132  }
1133
1134  if (CSI.empty())
1135    return;   // Early exit if no callee saved registers are modified!
1136
1137  unsigned NumFixedSpillSlots;
1138  const std::pair<unsigned,int> *FixedSpillSlots =
1139    TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
1140
1141  // Now that we know which registers need to be saved and restored, allocate
1142  // stack slots for them.
1143  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1144    unsigned Reg = CSI[i].getReg();
1145    const TargetRegisterClass *RC = CSI[i].getRegClass();
1146
1147    // Check to see if this physreg must be spilled to a particular stack slot
1148    // on this target.
1149    const std::pair<unsigned,int> *FixedSlot = FixedSpillSlots;
1150    while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots &&
1151           FixedSlot->first != Reg)
1152      ++FixedSlot;
1153
1154    int FrameIdx;
1155    if (FixedSlot == FixedSpillSlots+NumFixedSpillSlots) {
1156      // Nope, just spill it anywhere convenient.
1157      unsigned Align = RC->getAlignment();
1158      unsigned StackAlign = TFI->getStackAlignment();
1159      // We may not be able to sastify the desired alignment specification of
1160      // the TargetRegisterClass if the stack alignment is smaller.
1161      // Use the min.
1162      Align = std::min(Align, StackAlign);
1163      FrameIdx = FFI->CreateStackObject(RC->getSize(), Align);
1164      if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
1165      if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
1166    } else {
1167      // Spill it to the stack where we must.
1168      FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->second);
1169    }
1170    CSI[i].setFrameIdx(FrameIdx);
1171  }
1172
1173  FFI->setCalleeSavedInfo(CSI);
1174}
1175
1176/// insertCSRSpillsAndRestores - Insert spill and restore code for
1177/// callee saved registers used in the function, handling shrink wrapping.
1178///
1179void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
1180  // Get callee saved register information.
1181  MachineFrameInfo *FFI = Fn.getFrameInfo();
1182  const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
1183
1184  // Early exit if no callee saved registers are modified!
1185  if (CSI.empty())
1186    return;
1187
1188  const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
1189  MachineBasicBlock::iterator I;
1190
1191  DEBUG(if (ShrinkWrapThisFunction && ShrinkWrapDebugging >= Details)
1192          DOUT << "Inserting CSR spills/restores in function "
1193               << Fn.getFunction()->getName() << "\n");
1194
1195  if (! ShrinkWrapThisFunction) {
1196    // Spill using target interface.
1197    I = EntryBlock->begin();
1198    if (!TII.spillCalleeSavedRegisters(*EntryBlock, I, CSI)) {
1199      for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1200        // Add the callee-saved register as live-in.
1201        // It's killed at the spill.
1202        EntryBlock->addLiveIn(CSI[i].getReg());
1203
1204        // Insert the spill to the stack frame.
1205        TII.storeRegToStackSlot(*EntryBlock, I, CSI[i].getReg(), true,
1206                                CSI[i].getFrameIdx(), CSI[i].getRegClass());
1207      }
1208    }
1209
1210    // Restore using target interface.
1211    for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) {
1212      MachineBasicBlock* MBB = ReturnBlocks[ri];
1213      I = MBB->end(); --I;
1214
1215      // Skip over all terminator instructions, which are part of the return
1216      // sequence.
1217      MachineBasicBlock::iterator I2 = I;
1218      while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
1219        I = I2;
1220
1221      bool AtStart = I == MBB->begin();
1222      MachineBasicBlock::iterator BeforeI = I;
1223      if (!AtStart)
1224        --BeforeI;
1225
1226      // Restore all registers immediately before the return and any
1227      // terminators that preceed it.
1228      if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI)) {
1229        for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1230          TII.loadRegFromStackSlot(*MBB, I, CSI[i].getReg(),
1231                                   CSI[i].getFrameIdx(),
1232                                   CSI[i].getRegClass());
1233          assert(I != MBB->begin() &&
1234                 "loadRegFromStackSlot didn't insert any code!");
1235          // Insert in reverse order.  loadRegFromStackSlot can insert
1236          // multiple instructions.
1237          if (AtStart)
1238            I = MBB->begin();
1239          else {
1240            I = BeforeI;
1241            ++I;
1242          }
1243        }
1244      }
1245    }
1246    return;
1247  }
1248
1249  // Insert spills.
1250  std::vector<CalleeSavedInfo> blockCSI;
1251  for (CSRegBlockMap::iterator BI = CSRSave.begin(),
1252         BE = CSRSave.end(); BI != BE; ++BI) {
1253    MachineBasicBlock* MBB = BI->first;
1254    CSRegSet save = BI->second;
1255
1256    if (save.empty())
1257      continue;
1258
1259    DEBUG(if (ShrinkWrapDebugging >= Details)
1260            DOUT << "Spilling " << stringifyCSRegSet(save)
1261                 << " in " << getBasicBlockName(MBB) << "\n");
1262
1263    blockCSI.clear();
1264    for (CSRegSet::iterator RI = save.begin(),
1265           RE = save.end(); RI != RE; ++RI) {
1266      blockCSI.push_back(CSI[*RI]);
1267    }
1268    assert(blockCSI.size() > 0 &&
1269           "Could not collect callee saved register info");
1270
1271    I = MBB->begin();
1272
1273    // When shrink wrapping, use stack slot stores/loads.
1274    for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
1275      // Add the callee-saved register as live-in.
1276      // It's killed at the spill.
1277      MBB->addLiveIn(blockCSI[i].getReg());
1278
1279      // Insert the spill to the stack frame.
1280      TII.storeRegToStackSlot(*MBB, I, blockCSI[i].getReg(),
1281                              true,
1282                              blockCSI[i].getFrameIdx(),
1283                              blockCSI[i].getRegClass());
1284    }
1285  }
1286
1287  DEBUG(if (ShrinkWrapDebugging >= Details)
1288          DOUT << "------------------------------"
1289               << "-----------------------------\n");
1290
1291  for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
1292         BE = CSRRestore.end(); BI != BE; ++BI) {
1293    MachineBasicBlock* MBB = BI->first;
1294    CSRegSet restore = BI->second;
1295
1296    if (restore.empty())
1297      continue;
1298
1299    DEBUG(if (ShrinkWrapDebugging >= Details)
1300            DOUT << "Restoring " << stringifyCSRegSet(restore)
1301                 << " in " << getBasicBlockName(MBB) << "\n");
1302
1303    blockCSI.clear();
1304    for (CSRegSet::iterator RI = restore.begin(),
1305           RE = restore.end(); RI != RE; ++RI) {
1306      blockCSI.push_back(CSI[*RI]);
1307    }
1308    assert(blockCSI.size() > 0 &&
1309           "Could not find callee saved register info");
1310
1311    // If MBB is empty and needs restores, insert at the _beginning_.
1312    if (MBB->empty()) {
1313      I = MBB->begin();
1314    } else {
1315      I = MBB->end();
1316      --I;
1317
1318      // Skip over all terminator instructions, which are part of the
1319      // return sequence.
1320      if (! I->getDesc().isTerminator()) {
1321        ++I;
1322      } else {
1323        MachineBasicBlock::iterator I2 = I;
1324        while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
1325          I = I2;
1326      }
1327    }
1328
1329    bool AtStart = I == MBB->begin();
1330    MachineBasicBlock::iterator BeforeI = I;
1331    if (!AtStart)
1332      --BeforeI;
1333
1334    // Restore all registers immediately before the return and any
1335    // terminators that preceed it.
1336    for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
1337      TII.loadRegFromStackSlot(*MBB, I, blockCSI[i].getReg(),
1338                               blockCSI[i].getFrameIdx(),
1339                               blockCSI[i].getRegClass());
1340      assert(I != MBB->begin() &&
1341             "loadRegFromStackSlot didn't insert any code!");
1342      // Insert in reverse order.  loadRegFromStackSlot can insert
1343      // multiple instructions.
1344      if (AtStart)
1345        I = MBB->begin();
1346      else {
1347        I = BeforeI;
1348        ++I;
1349      }
1350    }
1351  }
1352
1353  DEBUG(if (ShrinkWrapDebugging >= Details)
1354          DOUT << "------------------------------"
1355               << "-----------------------------\n");
1356}
1357
1358/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
1359static inline void
1360AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx,
1361                  bool StackGrowsDown, int64_t &Offset,
1362                  unsigned &MaxAlign) {
1363  // If stack grows down, we need to add size of find the lowest address of the
1364  // object.
1365  if (StackGrowsDown)
1366    Offset += FFI->getObjectSize(FrameIdx);
1367
1368  unsigned Align = FFI->getObjectAlignment(FrameIdx);
1369
1370  // If the alignment of this object is greater than that of the stack, then
1371  // increase the stack alignment to match.
1372  MaxAlign = std::max(MaxAlign, Align);
1373
1374  // Adjust to alignment boundary.
1375  Offset = (Offset + Align - 1) / Align * Align;
1376
1377  if (StackGrowsDown) {
1378    FFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
1379  } else {
1380    FFI->setObjectOffset(FrameIdx, Offset);
1381    Offset += FFI->getObjectSize(FrameIdx);
1382  }
1383}
1384
1385/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
1386/// abstract stack objects.
1387///
1388void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
1389  const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo();
1390
1391  bool StackGrowsDown =
1392    TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
1393
1394  // Loop over all of the stack objects, assigning sequential addresses...
1395  MachineFrameInfo *FFI = Fn.getFrameInfo();
1396
1397  unsigned MaxAlign = FFI->getMaxAlignment();
1398
1399  // Start at the beginning of the local area.
1400  // The Offset is the distance from the stack top in the direction
1401  // of stack growth -- so it's always nonnegative.
1402  int64_t Offset = TFI.getOffsetOfLocalArea();
1403  if (StackGrowsDown)
1404    Offset = -Offset;
1405  assert(Offset >= 0
1406         && "Local area offset should be in direction of stack growth");
1407
1408  // If there are fixed sized objects that are preallocated in the local area,
1409  // non-fixed objects can't be allocated right at the start of local area.
1410  // We currently don't support filling in holes in between fixed sized
1411  // objects, so we adjust 'Offset' to point to the end of last fixed sized
1412  // preallocated object.
1413  for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
1414    int64_t FixedOff;
1415    if (StackGrowsDown) {
1416      // The maximum distance from the stack pointer is at lower address of
1417      // the object -- which is given by offset. For down growing stack
1418      // the offset is negative, so we negate the offset to get the distance.
1419      FixedOff = -FFI->getObjectOffset(i);
1420    } else {
1421      // The maximum distance from the start pointer is at the upper
1422      // address of the object.
1423      FixedOff = FFI->getObjectOffset(i) + FFI->getObjectSize(i);
1424    }
1425    if (FixedOff > Offset) Offset = FixedOff;
1426  }
1427
1428  // First assign frame offsets to stack objects that are used to spill
1429  // callee saved registers.
1430  if (StackGrowsDown) {
1431    for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
1432      // If stack grows down, we need to add size of find the lowest
1433      // address of the object.
1434      Offset += FFI->getObjectSize(i);
1435
1436      unsigned Align = FFI->getObjectAlignment(i);
1437      // If the alignment of this object is greater than that of the stack,
1438      // then increase the stack alignment to match.
1439      MaxAlign = std::max(MaxAlign, Align);
1440      // Adjust to alignment boundary
1441      Offset = (Offset+Align-1)/Align*Align;
1442
1443      FFI->setObjectOffset(i, -Offset);        // Set the computed offset
1444    }
1445  } else {
1446    int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex;
1447    for (int i = MaxCSFI; i >= MinCSFI ; --i) {
1448      unsigned Align = FFI->getObjectAlignment(i);
1449      // If the alignment of this object is greater than that of the stack,
1450      // then increase the stack alignment to match.
1451      MaxAlign = std::max(MaxAlign, Align);
1452      // Adjust to alignment boundary
1453      Offset = (Offset+Align-1)/Align*Align;
1454
1455      FFI->setObjectOffset(i, Offset);
1456      Offset += FFI->getObjectSize(i);
1457    }
1458  }
1459
1460  // Make sure the special register scavenging spill slot is closest to the
1461  // frame pointer if a frame pointer is required.
1462  const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
1463  if (RS && RegInfo->hasFP(Fn)) {
1464    int SFI = RS->getScavengingFrameIndex();
1465    if (SFI >= 0)
1466      AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign);
1467  }
1468
1469  // Make sure that the stack protector comes before the local variables on the
1470  // stack.
1471  if (FFI->getStackProtectorIndex() >= 0)
1472    AdjustStackOffset(FFI, FFI->getStackProtectorIndex(), StackGrowsDown,
1473                      Offset, MaxAlign);
1474
1475  // Then assign frame offsets to stack objects that are not used to spill
1476  // callee saved registers.
1477  for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
1478    if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
1479      continue;
1480    if (RS && (int)i == RS->getScavengingFrameIndex())
1481      continue;
1482    if (FFI->isDeadObjectIndex(i))
1483      continue;
1484    if (FFI->getStackProtectorIndex() == (int)i)
1485      continue;
1486
1487    AdjustStackOffset(FFI, i, StackGrowsDown, Offset, MaxAlign);
1488  }
1489
1490  // Make sure the special register scavenging spill slot is closest to the
1491  // stack pointer.
1492  if (RS && !RegInfo->hasFP(Fn)) {
1493    int SFI = RS->getScavengingFrameIndex();
1494    if (SFI >= 0)
1495      AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign);
1496  }
1497
1498  // Round up the size to a multiple of the alignment, but only if there are
1499  // calls or alloca's in the function.  This ensures that any calls to
1500  // subroutines have their stack frames suitable aligned.
1501  // Also do this if we need runtime alignment of the stack.  In this case
1502  // offsets will be relative to SP not FP; round up the stack size so this
1503  // works.
1504  if (!RegInfo->targetHandlesStackFrameRounding() &&
1505      (FFI->hasCalls() || FFI->hasVarSizedObjects() ||
1506       (RegInfo->needsStackRealignment(Fn) &&
1507        FFI->getObjectIndexEnd() != 0))) {
1508    // If we have reserved argument space for call sites in the function
1509    // immediately on entry to the current function, count it as part of the
1510    // overall stack size.
1511    if (RegInfo->hasReservedCallFrame(Fn))
1512      Offset += FFI->getMaxCallFrameSize();
1513
1514    unsigned AlignMask = std::max(TFI.getStackAlignment(),MaxAlign) - 1;
1515    Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
1516  }
1517
1518  // Update frame info to pretend that this is part of the stack...
1519  FFI->setStackSize(Offset+TFI.getOffsetOfLocalArea());
1520
1521  // Remember the required stack alignment in case targets need it to perform
1522  // dynamic stack alignment.
1523  FFI->setMaxAlignment(MaxAlign);
1524}
1525
1526
1527/// insertPrologEpilogCode - Scan the function for modified callee saved
1528/// registers, insert spill code for these callee saved registers, then add
1529/// prolog and epilog code to the function.
1530///
1531void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
1532  const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
1533
1534  // Add prologue to the function...
1535  TRI->emitPrologue(Fn);
1536
1537  // Add epilogue to restore the callee-save registers in each exiting block
1538  for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
1539    // If last instruction is a return instruction, add an epilogue
1540    if (!I->empty() && I->back().getDesc().isReturn())
1541      TRI->emitEpilogue(Fn, *I);
1542  }
1543}
1544
1545
1546/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
1547/// register references and actual offsets.
1548///
1549void PEI::replaceFrameIndices(MachineFunction &Fn) {
1550  if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do?
1551
1552  const TargetMachine &TM = Fn.getTarget();
1553  assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!");
1554  const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
1555  const TargetFrameInfo *TFI = TM.getFrameInfo();
1556  bool StackGrowsDown =
1557    TFI->getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
1558  int FrameSetupOpcode   = TRI.getCallFrameSetupOpcode();
1559  int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode();
1560
1561  for (MachineFunction::iterator BB = Fn.begin(),
1562         E = Fn.end(); BB != E; ++BB) {
1563    int SPAdj = 0;  // SP offset due to call frame setup / destroy.
1564    if (RS) RS->enterBasicBlock(BB);
1565
1566    for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
1567      if (I->getOpcode() == TargetInstrInfo::DECLARE) {
1568        // Ignore it.
1569        ++I;
1570        continue;
1571      }
1572
1573      if (I->getOpcode() == FrameSetupOpcode ||
1574          I->getOpcode() == FrameDestroyOpcode) {
1575        // Remember how much SP has been adjusted to create the call
1576        // frame.
1577        int Size = I->getOperand(0).getImm();
1578
1579        if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) ||
1580            (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode))
1581          Size = -Size;
1582
1583        SPAdj += Size;
1584
1585        MachineBasicBlock::iterator PrevI = BB->end();
1586        if (I != BB->begin()) PrevI = prior(I);
1587        TRI.eliminateCallFramePseudoInstr(Fn, *BB, I);
1588
1589        // Visit the instructions created by eliminateCallFramePseudoInstr().
1590        if (PrevI == BB->end())
1591          I = BB->begin();     // The replaced instr was the first in the block.
1592        else
1593          I = next(PrevI);
1594        continue;
1595      }
1596
1597      MachineInstr *MI = I;
1598      bool DoIncr = true;
1599      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
1600        if (MI->getOperand(i).isFI()) {
1601          // Some instructions (e.g. inline asm instructions) can have
1602          // multiple frame indices and/or cause eliminateFrameIndex
1603          // to insert more than one instruction. We need the register
1604          // scavenger to go through all of these instructions so that
1605          // it can update its register information. We keep the
1606          // iterator at the point before insertion so that we can
1607          // revisit them in full.
1608          bool AtBeginning = (I == BB->begin());
1609          if (!AtBeginning) --I;
1610
1611          // If this instruction has a FrameIndex operand, we need to
1612          // use that target machine register info object to eliminate
1613          // it.
1614
1615          TRI.eliminateFrameIndex(MI, SPAdj, RS);
1616
1617          // Reset the iterator if we were at the beginning of the BB.
1618          if (AtBeginning) {
1619            I = BB->begin();
1620            DoIncr = false;
1621          }
1622
1623          MI = 0;
1624          break;
1625        }
1626
1627      if (DoIncr && I != BB->end()) ++I;
1628
1629      // Update register states.
1630      if (RS && MI) RS->forward(MI);
1631    }
1632
1633    assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?");
1634  }
1635}
1636
1637// Debugging methods for shrink wrapping.
1638#ifndef NDEBUG
1639/// findFastExitPath - debugging method used to detect functions
1640/// with at least one path from the entry block to a return block
1641/// directly or which has a very small number of edges.
1642///
1643void PEI::findFastExitPath() {
1644  if (! EntryBlock)
1645    return;
1646  // Fina a path from EntryBlock to any return block that does not branch:
1647  //        Entry
1648  //          |     ...
1649  //          v      |
1650  //         B1<-----+
1651  //          |
1652  //          v
1653  //       Return
1654  for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
1655         SE = EntryBlock->succ_end(); SI != SE; ++SI) {
1656    MachineBasicBlock* SUCC = *SI;
1657
1658    // Assume positive, disprove existence of fast path.
1659    HasFastExitPath = true;
1660
1661    // Check the immediate successors.
1662    if (isReturnBlock(SUCC)) {
1663      if (ShrinkWrapDebugging >= BasicInfo)
1664        DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock)
1665             << "->" << getBasicBlockName(SUCC) << "\n";
1666      break;
1667    }
1668    // Traverse df from SUCC, look for a branch block.
1669    std::string exitPath = getBasicBlockName(SUCC);
1670    for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC),
1671           BE = df_end(SUCC); BI != BE; ++BI) {
1672      MachineBasicBlock* SBB = *BI;
1673      // Reject paths with branch nodes.
1674      if (SBB->succ_size() > 1) {
1675        HasFastExitPath = false;
1676        break;
1677      }
1678      exitPath += "->" + getBasicBlockName(SBB);
1679    }
1680    if (HasFastExitPath) {
1681      if (ShrinkWrapDebugging >= BasicInfo)
1682        DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock)
1683             << "->" << exitPath << "\n";
1684      break;
1685    }
1686  }
1687}
1688
1689/// verifySpillRestorePlacement - check the current spill/restore
1690/// sets for safety. Attempt to find spills without restores or
1691/// restores without spills.
1692/// Spills: walk df from each MBB in spill set ensuring that
1693///         all CSRs spilled at MMBB are restored on all paths
1694///         from MBB to all exit blocks.
1695/// Restores: walk idf from each MBB in restore set ensuring that
1696///           all CSRs restored at MBB are spilled on all paths
1697///           reaching MBB.
1698///
1699void PEI::verifySpillRestorePlacement() {
1700  unsigned numReturnBlocks = 0;
1701  for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
1702       MBBI != MBBE; ++MBBI) {
1703    MachineBasicBlock* MBB = MBBI;
1704    if (isReturnBlock(MBB) || MBB->succ_size() == 0)
1705      ++numReturnBlocks;
1706  }
1707  for (CSRegBlockMap::iterator BI = CSRSave.begin(),
1708         BE = CSRSave.end(); BI != BE; ++BI) {
1709    MachineBasicBlock* MBB = BI->first;
1710    CSRegSet spilled = BI->second;
1711    CSRegSet restored;
1712
1713    if (spilled.empty())
1714      continue;
1715
1716    DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
1717         << stringifyCSRegSet(spilled)
1718         << "  RESTORE[" << getBasicBlockName(MBB) << "] = "
1719         << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
1720
1721    if (CSRRestore[MBB].intersects(spilled)) {
1722      restored |= (CSRRestore[MBB] & spilled);
1723    }
1724
1725    // Walk depth first from MBB to find restores of all CSRs spilled at MBB:
1726    // we must find restores for all spills w/no intervening spills on all
1727    // paths from MBB to all return blocks.
1728    for (df_iterator<MachineBasicBlock*> BI = df_begin(MBB),
1729           BE = df_end(MBB); BI != BE; ++BI) {
1730      MachineBasicBlock* SBB = *BI;
1731      if (SBB == MBB)
1732        continue;
1733      // Stop when we encounter spills of any CSRs spilled at MBB that
1734      // have not yet been seen to be restored.
1735      if (CSRSave[SBB].intersects(spilled) &&
1736          !restored.contains(CSRSave[SBB] & spilled))
1737        break;
1738      // Collect the CSRs spilled at MBB that are restored
1739      // at this DF successor of MBB.
1740      if (CSRRestore[SBB].intersects(spilled))
1741        restored |= (CSRRestore[SBB] & spilled);
1742      // If we are at a retun block, check that the restores
1743      // we have seen so far exhaust the spills at MBB, then
1744      // reset the restores.
1745      if (isReturnBlock(SBB) || SBB->succ_size() == 0) {
1746        if (restored != spilled) {
1747          CSRegSet notRestored = (spilled - restored);
1748          DOUT << MF->getFunction()->getName() << ": "
1749               << stringifyCSRegSet(notRestored)
1750               << " spilled at " << getBasicBlockName(MBB)
1751               << " are never restored on path to return "
1752               << getBasicBlockName(SBB) << "\n";
1753        }
1754        restored.clear();
1755      }
1756    }
1757  }
1758
1759  // Check restore placements.
1760  for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
1761         BE = CSRRestore.end(); BI != BE; ++BI) {
1762    MachineBasicBlock* MBB = BI->first;
1763    CSRegSet restored = BI->second;
1764    CSRegSet spilled;
1765
1766    if (restored.empty())
1767      continue;
1768
1769    DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
1770         << stringifyCSRegSet(CSRSave[MBB])
1771         << "  RESTORE[" << getBasicBlockName(MBB) << "] = "
1772         << stringifyCSRegSet(restored) << "\n";
1773
1774    if (CSRSave[MBB].intersects(restored)) {
1775      spilled |= (CSRSave[MBB] & restored);
1776    }
1777    // Walk inverse depth first from MBB to find spills of all
1778    // CSRs restored at MBB:
1779    for (idf_iterator<MachineBasicBlock*> BI = idf_begin(MBB),
1780           BE = idf_end(MBB); BI != BE; ++BI) {
1781      MachineBasicBlock* PBB = *BI;
1782      if (PBB == MBB)
1783        continue;
1784      // Stop when we encounter restores of any CSRs restored at MBB that
1785      // have not yet been seen to be spilled.
1786      if (CSRRestore[PBB].intersects(restored) &&
1787          !spilled.contains(CSRRestore[PBB] & restored))
1788        break;
1789      // Collect the CSRs restored at MBB that are spilled
1790      // at this DF predecessor of MBB.
1791      if (CSRSave[PBB].intersects(restored))
1792        spilled |= (CSRSave[PBB] & restored);
1793    }
1794    if (spilled != restored) {
1795      CSRegSet notSpilled = (restored - spilled);
1796      DOUT << MF->getFunction()->getName() << ": "
1797           << stringifyCSRegSet(notSpilled)
1798           << " restored at " << getBasicBlockName(MBB)
1799           << " are never spilled\n";
1800    }
1801  }
1802}
1803
1804// Debugging print methods.
1805std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) {
1806  std::ostringstream name;
1807  if (MBB) {
1808    if (MBB->getBasicBlock())
1809      name << MBB->getBasicBlock()->getName();
1810    else
1811      name << "_MBB_" << MBB->getNumber();
1812  }
1813  return name.str();
1814}
1815
1816std::string PEI::stringifyCSRegSet(const CSRegSet& s) {
1817  const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
1818  const std::vector<CalleeSavedInfo> CSI =
1819    MF->getFrameInfo()->getCalleeSavedInfo();
1820
1821  std::ostringstream srep;
1822  if (CSI.size() == 0) {
1823    srep << "[]";
1824    return srep.str();
1825  }
1826  srep << "[";
1827  CSRegSet::iterator I = s.begin(), E = s.end();
1828  if (I != E) {
1829    unsigned reg = CSI[*I].getReg();
1830    srep << TRI->getName(reg);
1831    for (++I; I != E; ++I) {
1832      reg = CSI[*I].getReg();
1833      srep << ",";
1834      srep << TRI->getName(reg);
1835    }
1836  }
1837  srep << "]";
1838  return srep.str();
1839}
1840
1841void PEI::dumpSet(const CSRegSet& s) {
1842  DOUT << stringifyCSRegSet(s) << "\n";
1843}
1844
1845void PEI::dumpUsed(MachineBasicBlock* MBB) {
1846  if (MBB) {
1847    DOUT << "CSRUsed[" << getBasicBlockName(MBB) << "] = "
1848         << stringifyCSRegSet(CSRUsed[MBB])  << "\n";
1849  }
1850}
1851
1852void PEI::dumpAllUsed() {
1853    for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
1854         MBBI != MBBE; ++MBBI) {
1855      MachineBasicBlock* MBB = MBBI;
1856      dumpUsed(MBB);
1857    }
1858}
1859
1860void PEI::dumpSets(MachineBasicBlock* MBB) {
1861  if (MBB) {
1862    DOUT << getBasicBlockName(MBB)           << " | "
1863         << stringifyCSRegSet(CSRUsed[MBB])  << " | "
1864         << stringifyCSRegSet(AnticIn[MBB])  << " | "
1865         << stringifyCSRegSet(AnticOut[MBB]) << " | "
1866         << stringifyCSRegSet(AvailIn[MBB])  << " | "
1867         << stringifyCSRegSet(AvailOut[MBB]) << "\n";
1868  }
1869}
1870
1871void PEI::dumpSets1(MachineBasicBlock* MBB) {
1872  if (MBB) {
1873    DOUT << getBasicBlockName(MBB)             << " | "
1874         << stringifyCSRegSet(CSRUsed[MBB])    << " | "
1875         << stringifyCSRegSet(AnticIn[MBB])    << " | "
1876         << stringifyCSRegSet(AnticOut[MBB])   << " | "
1877         << stringifyCSRegSet(AvailIn[MBB])    << " | "
1878         << stringifyCSRegSet(AvailOut[MBB])   << " | "
1879         << stringifyCSRegSet(CSRSave[MBB])    << " | "
1880         << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
1881  }
1882}
1883
1884void PEI::dumpAllSets() {
1885    for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
1886         MBBI != MBBE; ++MBBI) {
1887      MachineBasicBlock* MBB = MBBI;
1888      dumpSets1(MBB);
1889    }
1890}
1891
1892void PEI::dumpSRSets() {
1893  for (MachineFunction::iterator MBB = MF->begin(), E = MF->end();
1894       MBB != E; ++MBB) {
1895    if (! CSRSave[MBB].empty()) {
1896      DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
1897           << stringifyCSRegSet(CSRSave[MBB]);
1898      if (CSRRestore[MBB].empty())
1899        DOUT << "\n";
1900    }
1901    if (! CSRRestore[MBB].empty()) {
1902      if (! CSRSave[MBB].empty())
1903        DOUT << "    ";
1904      DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = "
1905           << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
1906    }
1907  }
1908}
1909#endif
1910