PrologEpilogInserter.cpp revision 714cea3af986b10a529df5d8f370530b25cbaaab
1//===-- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass is responsible for finalizing the functions frame layout, saving
11// callee saved registers, and for emitting prolog & epilog code for the
12// function.
13//
14// This pass must be run after register allocation.  After this pass is
15// executed, it is illegal to construct MO_FrameIndex operands.
16//
17// This pass implements a shrink wrapping variant of prolog/epilog insertion:
18// - Places callee saved register (CSR) spills and restores in the CFG to
19//   tightly surround uses so that execution paths that do not use CSRs do not
20//   pay the spill/restore penalty.
21//
22// - Avoiding placment of spills/restores in loops: if a CSR is used inside a
23//   loop(nest), the spills are placed in the loop preheader, and restores are
24//   placed in the loop exit nodes (the successors of the loop _exiting_ nodes).
25//
26// - Covering paths without CSR uses: e.g. if a restore is placed in a join
27//   block, a matching spill is added to the end of all immediate predecessor
28//   blocks that are not reached by a spill. Similarly for saves placed in
29//   branch blocks.
30//
31// Shrink wrapping uses an analysis similar to the one in GVNPRE to determine
32// which basic blocks require callee-saved register save/restore code.
33//
34// This pass uses MachineDominators and MachineLoopInfo. Loop information
35// is used to prevent shrink wrapping of callee-saved register save/restore
36// code into loops.
37//
38//===----------------------------------------------------------------------===//
39
40#define DEBUG_TYPE "shrink-wrap"
41
42#include "llvm/CodeGen/Passes.h"
43#include "llvm/CodeGen/MachineDominators.h"
44#include "llvm/CodeGen/MachineLoopInfo.h"
45#include "llvm/CodeGen/MachineFunctionPass.h"
46#include "llvm/CodeGen/MachineInstr.h"
47#include "llvm/CodeGen/MachineFrameInfo.h"
48#include "llvm/CodeGen/MachineModuleInfo.h"
49#include "llvm/CodeGen/MachineRegisterInfo.h"
50#include "llvm/CodeGen/RegisterScavenging.h"
51#include "llvm/Target/TargetMachine.h"
52#include "llvm/Target/TargetRegisterInfo.h"
53#include "llvm/Target/TargetFrameInfo.h"
54#include "llvm/Target/TargetInstrInfo.h"
55#include "llvm/ADT/SparseBitVector.h"
56#include "llvm/ADT/DenseMap.h"
57#include "llvm/ADT/PostOrderIterator.h"
58#include "llvm/ADT/Statistic.h"
59#include "llvm/Support/CommandLine.h"
60#include "llvm/Support/Compiler.h"
61#include "llvm/Support/Debug.h"
62#include "llvm/ADT/STLExtras.h"
63#include "llvm/ADT/Statistic.h"
64#include <climits>
65#include <sstream>
66
67using namespace llvm;
68
69STATISTIC(numSRReduced, "Number of CSR spills+restores reduced.");
70
71// Shrink Wrapping:
72static cl::opt<bool>
73ShrinkWrapping("shrink-wrap",
74               cl::desc("Shrink wrap callee-saved register spills/restores"));
75
76// Shrink wrap only the specified function, a debugging aid.
77static cl::opt<std::string>
78ShrinkWrapFunc("shrink-wrap-func", cl::Hidden,
79               cl::desc("Shrink wrap the specified function"),
80               cl::value_desc("funcname"),
81               cl::init(""));
82
83// Debugging level for shrink wrapping.
84enum ShrinkWrapDebugLevel {
85  None, BasicInfo, Iterations, Details
86};
87
88static cl::opt<enum ShrinkWrapDebugLevel>
89ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden,
90  cl::desc("Print shrink wrapping debugging information"),
91  cl::values(
92    clEnumVal(None      , "disable debug output"),
93    clEnumVal(BasicInfo , "print basic DF sets"),
94    clEnumVal(Iterations, "print SR sets for each iteration"),
95    clEnumVal(Details   , "print all DF sets"),
96    clEnumValEnd));
97
98
99namespace {
100  struct VISIBILITY_HIDDEN PEI : public MachineFunctionPass {
101    static char ID;
102    PEI() : MachineFunctionPass(&ID) {}
103
104    const char *getPassName() const {
105      return "Prolog/Epilog Insertion & Frame Finalization";
106    }
107
108    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
109      AU.setPreservesCFG();
110      if (ShrinkWrapping || ShrinkWrapFunc != "") {
111        AU.addRequired<MachineLoopInfo>();
112        AU.addRequired<MachineDominatorTree>();
113      }
114      AU.addPreserved<MachineLoopInfo>();
115      AU.addPreserved<MachineDominatorTree>();
116      MachineFunctionPass::getAnalysisUsage(AU);
117    }
118
119    /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
120    /// frame indexes with appropriate references.
121    ///
122    bool runOnMachineFunction(MachineFunction &Fn) {
123      const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
124      RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
125
126      DEBUG(MF = &Fn);
127
128      // Get MachineModuleInfo so that we can track the construction of the
129      // frame.
130      if (MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>())
131        Fn.getFrameInfo()->setMachineModuleInfo(MMI);
132
133      // Allow the target machine to make some adjustments to the function
134      // e.g. UsedPhysRegs before calculateCalleeSavedRegisters.
135      TRI->processFunctionBeforeCalleeSavedScan(Fn, RS);
136
137      // Scan the function for modified callee saved registers and insert spill
138      // code for any callee saved registers that are modified.  Also calculate
139      // the MaxCallFrameSize and HasCalls variables for the function's frame
140      // information and eliminates call frame pseudo instructions.
141      calculateCalleeSavedRegisters(Fn);
142
143      // Determine placement of CSR spill/restore code:
144      //  - with shrink wrapping, place spills and restores to tightly
145      //    enclose regions in the Machine CFG of the function where
146      //    they are used. Without shrink wrapping
147      //  - default (no shrink wrapping), place all spills in the
148      //    entry block, all restores in return blocks.
149      placeCSRSpillsAndRestores(Fn);
150
151      // Add the code to save and restore the callee saved registers
152      insertCSRSpillsAndRestores(Fn);
153
154      // Allow the target machine to make final modifications to the function
155      // before the frame layout is finalized.
156      TRI->processFunctionBeforeFrameFinalized(Fn);
157
158      // Calculate actual frame offsets for all abstract stack objects...
159      calculateFrameObjectOffsets(Fn);
160
161      // Add prolog and epilog code to the function.  This function is required
162      // to align the stack frame as necessary for any stack variables or
163      // called functions.  Because of this, calculateCalleeSavedRegisters
164      // must be called before this function in order to set the HasCalls
165      // and MaxCallFrameSize variables.
166      insertPrologEpilogCode(Fn);
167
168      // Replace all MO_FrameIndex operands with physical register references
169      // and actual offsets.
170      //
171      replaceFrameIndices(Fn);
172
173      delete RS;
174      clearAllSets();
175      return true;
176    }
177
178  private:
179    RegScavenger *RS;
180
181    // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
182    // stack frame indexes.
183    unsigned MinCSFrameIndex, MaxCSFrameIndex;
184
185    // Analysis info for spill/restore placement.
186    // "CSR": "callee saved register".
187
188    // CSRegSet contains indices into the Callee Saved Register Info
189    // vector built by calculateCalleeSavedRegisters() and accessed
190    // via MF.getFrameInfo()->getCalleeSavedInfo().
191    typedef SparseBitVector<> CSRegSet;
192
193    // CSRegBlockMap maps MachineBasicBlocks to sets of callee
194    // saved register indices.
195    typedef DenseMap<MachineBasicBlock*, CSRegSet> CSRegBlockMap;
196
197    // Set and maps for computing CSR spill/restore placement:
198    //  used in function (UsedCSRegs)
199    //  used in a basic block (CSRUsed)
200    //  anticipatable in a basic block (Antic{In,Out})
201    //  available in a basic block (Avail{In,Out})
202    //  to be spilled at the entry to a basic block (CSRSave)
203    //  to be restored at the end of a basic block (CSRRestore)
204    CSRegSet UsedCSRegs;
205    CSRegBlockMap CSRUsed;
206    CSRegBlockMap AnticIn, AnticOut;
207    CSRegBlockMap AvailIn, AvailOut;
208    CSRegBlockMap CSRSave;
209    CSRegBlockMap CSRRestore;
210
211    // Entry and return blocks of the current function.
212    MachineBasicBlock* EntryBlock;
213    SmallVector<MachineBasicBlock*, 4> ReturnBlocks;
214
215    // Map of MBBs to top level MachineLoops.
216    DenseMap<MachineBasicBlock*, MachineLoop*> TLLoops;
217
218    // Flag to control shrink wrapping per-function:
219    // may choose to skip shrink wrapping for certain
220    // functions.
221    bool ShrinkWrapThisFunction;
222
223#ifndef NDEBUG
224    // Machine function handle.
225    MachineFunction* MF;
226
227    // Flag indicating that the current function
228    // has at least one "short" path in the machine
229    // CFG from the entry block to an exit block.
230    bool HasFastExitPath;
231#endif
232
233    bool calculateSets(MachineFunction &Fn);
234    bool calcAnticInOut(MachineBasicBlock* MBB);
235    bool calcAvailInOut(MachineBasicBlock* MBB);
236    void calculateAnticAvail(MachineFunction &Fn);
237    bool addUsesForMEMERegion(MachineBasicBlock* MBB,
238                              SmallVector<MachineBasicBlock*, 4>& blks);
239    bool addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks);
240    bool calcSpillPlacements(MachineBasicBlock* MBB,
241                             SmallVector<MachineBasicBlock*, 4> &blks,
242                             CSRegBlockMap &prevSpills);
243    bool calcRestorePlacements(MachineBasicBlock* MBB,
244                               SmallVector<MachineBasicBlock*, 4> &blks,
245                               CSRegBlockMap &prevRestores);
246    void placeSpillsAndRestores(MachineFunction &Fn);
247    void placeCSRSpillsAndRestores(MachineFunction &Fn);
248    void calculateCalleeSavedRegisters(MachineFunction &Fn);
249    void insertCSRSpillsAndRestores(MachineFunction &Fn);
250    void calculateFrameObjectOffsets(MachineFunction &Fn);
251    void replaceFrameIndices(MachineFunction &Fn);
252    void insertPrologEpilogCode(MachineFunction &Fn);
253
254    // Initialize DFA sets, called before iterations.
255    void clearAnticAvailSets();
256    // Clear all sets constructed by shrink wrapping.
257    void clearAllSets();
258
259    // Initialize all shrink wrapping data.
260    void initShrinkWrappingInfo();
261
262    // Convienences for dealing with machine loops.
263    MachineBasicBlock* getTopLevelLoopPreheader(MachineLoop* LP) {
264      assert(LP && "Machine loop is NULL.");
265      MachineBasicBlock* PHDR = LP->getLoopPreheader();
266      MachineLoop* PLP = LP->getParentLoop();
267      while (PLP) {
268        PHDR = PLP->getLoopPreheader();
269        PLP = PLP->getParentLoop();
270      }
271      return PHDR;
272    }
273
274    MachineLoop* getTopLevelLoopParent(MachineLoop *LP) {
275      if (LP == 0)
276        return 0;
277      MachineLoop* PLP = LP->getParentLoop();
278      while (PLP) {
279        LP = PLP;
280        PLP = PLP->getParentLoop();
281      }
282      return LP;
283    }
284
285    // Propgate CSRs used in MBB to all MBBs of loop LP.
286    void propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP);
287
288    // Convenience for recognizing return blocks.
289    bool isReturnBlock(MachineBasicBlock* MBB) {
290      return (MBB && !MBB->empty() && MBB->back().getDesc().isReturn());
291    }
292
293#ifndef NDEBUG
294    // Debugging methods.
295
296    // Mark this function as having fast exit paths.
297    void findFastExitPath();
298
299    // Verify placement of spills/restores.
300    void verifySpillRestorePlacement();
301
302    std::string getBasicBlockName(const MachineBasicBlock* MBB);
303    std::string stringifyCSRegSet(const CSRegSet& s);
304    void dumpSet(const CSRegSet& s);
305    void dumpUsed(MachineBasicBlock* MBB);
306    void dumpAllUsed();
307    void dumpSets(MachineBasicBlock* MBB);
308    void dumpSets1(MachineBasicBlock* MBB);
309    void dumpAllSets();
310    void dumpSRSets();
311#endif
312
313  };
314  char PEI::ID = 0;
315}
316
317// Initialize shrink wrapping DFA sets, called before iterations.
318void PEI::clearAnticAvailSets() {
319  AnticIn.clear();
320  AnticOut.clear();
321  AvailIn.clear();
322  AvailOut.clear();
323}
324
325// Clear all sets constructed by shrink wrapping.
326void PEI::clearAllSets() {
327  ReturnBlocks.clear();
328  clearAnticAvailSets();
329  UsedCSRegs.clear();
330  CSRUsed.clear();
331  TLLoops.clear();
332  CSRSave.clear();
333  CSRRestore.clear();
334}
335
336// Initialize all shrink wrapping data.
337void PEI::initShrinkWrappingInfo() {
338  clearAllSets();
339  EntryBlock = 0;
340#ifndef NDEBUG
341  HasFastExitPath = false;
342#endif
343  ShrinkWrapThisFunction = ShrinkWrapping;
344  // DEBUG: enable or disable shrink wrapping for the current function
345  // via --shrink-wrap-func=<funcname>.
346#ifndef NDEBUG
347  if (ShrinkWrapFunc != "") {
348    std::string MFName = MF->getFunction()->getName();
349    ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc);
350  }
351#endif
352}
353
354
355/// createPrologEpilogCodeInserter - This function returns a pass that inserts
356/// prolog and epilog code, and eliminates abstract frame references.
357///
358FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); }
359
360/// placeCSRSpillsAndRestores - determine which MBBs of the function
361/// need save, restore code for callee-saved registers by doing a DF analysis
362/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs
363/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo
364/// is used to ensure that CSR save/restore code is not placed inside loops.
365/// This function computes the maps of MBBs -> CSRs to spill and restore
366/// in CSRSave, CSRRestore.
367///
368/// If shrink wrapping is not being performed, place all spills in
369/// the entry block, all restores in return blocks. In this case,
370/// CSRSave has a single mapping, CSRRestore has mappings for each
371/// return block.
372///
373void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) {
374
375  initShrinkWrappingInfo();
376
377  DEBUG(if (ShrinkWrapThisFunction) {
378      DOUT << "Place CSR spills/restores for "
379           << MF->getFunction()->getName() << "\n";
380    });
381
382  if (calculateSets(Fn))
383    placeSpillsAndRestores(Fn);
384}
385
386/// calcAnticInOut - calculate the anticipated in/out reg sets
387/// for the given MBB by looking forward in the MCFG at MBB's
388/// successors.
389///
390bool PEI::calcAnticInOut(MachineBasicBlock* MBB) {
391  bool changed = false;
392
393  // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB))
394  SmallVector<MachineBasicBlock*, 4> successors;
395  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
396         SE = MBB->succ_end(); SI != SE; ++SI) {
397    MachineBasicBlock* SUCC = *SI;
398    if (SUCC != MBB)
399      successors.push_back(SUCC);
400  }
401
402  unsigned i = 0, e = successors.size();
403  if (i != e) {
404    CSRegSet prevAnticOut = AnticOut[MBB];
405    MachineBasicBlock* SUCC = successors[i];
406
407    AnticOut[MBB] = AnticIn[SUCC];
408    for (++i; i != e; ++i) {
409      SUCC = successors[i];
410      AnticOut[MBB] &= AnticIn[SUCC];
411    }
412    if (prevAnticOut != AnticOut[MBB])
413      changed = true;
414  }
415
416  // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]);
417  CSRegSet prevAnticIn = AnticIn[MBB];
418  AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB];
419  if (prevAnticIn |= AnticIn[MBB])
420    changed = true;
421  return changed;
422}
423
424/// calcAvailInOut - calculate the available in/out reg sets
425/// for the given MBB by looking backward in the MCFG at MBB's
426/// predecessors.
427///
428bool PEI::calcAvailInOut(MachineBasicBlock* MBB) {
429  bool changed = false;
430
431  // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB))
432  SmallVector<MachineBasicBlock*, 4> predecessors;
433  for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
434         PE = MBB->pred_end(); PI != PE; ++PI) {
435    MachineBasicBlock* PRED = *PI;
436    if (PRED != MBB)
437      predecessors.push_back(PRED);
438  }
439
440  unsigned i = 0, e = predecessors.size();
441  if (i != e) {
442    CSRegSet prevAvailIn = AvailIn[MBB];
443    MachineBasicBlock* PRED = predecessors[i];
444
445    AvailIn[MBB] = AvailOut[PRED];
446    for (++i; i != e; ++i) {
447      PRED = predecessors[i];
448      AvailIn[MBB] &= AvailOut[PRED];
449    }
450    if (prevAvailIn != AvailIn[MBB])
451      changed = true;
452  }
453
454  // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]);
455  CSRegSet prevAvailOut = AvailOut[MBB];
456  AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB];
457  if (prevAvailOut |= AvailOut[MBB])
458    changed = true;
459  return changed;
460}
461
462/// calculateAnticAvail - build the sets anticipated and available
463/// registers in the MCFG of the current function iteratively,
464/// doing a combined forward and backward analysis.
465///
466void PEI::calculateAnticAvail(MachineFunction &Fn) {
467  // Initialize data flow sets.
468  clearAnticAvailSets();
469
470  // Calulate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG.
471  bool changed = true;
472  unsigned iterations = 0;
473  while (changed) {
474    changed = false;
475    ++iterations;
476    for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
477         MBBI != MBBE; ++MBBI) {
478      MachineBasicBlock* MBB = MBBI;
479
480      // Calculate anticipated in, out regs at MBB from
481      // anticipated at successors of MBB.
482      changed |= calcAnticInOut(MBB);
483
484      // Calculate available in, out regs at MBB from
485      // available at predecessors of MBB.
486      changed |= calcAvailInOut(MBB);
487    }
488  }
489
490  DEBUG(if (ShrinkWrapDebugging >= Details) {
491      DOUT << "-----------------------------------------------------------\n";
492      DOUT << " Antic/Avail Sets:\n";
493      DOUT << "-----------------------------------------------------------\n";
494      DOUT << "iterations = " << iterations << "\n";
495      DOUT << "-----------------------------------------------------------\n";
496      DOUT << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n";
497      DOUT << "-----------------------------------------------------------\n";
498      for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
499           MBBI != MBBE; ++MBBI) {
500        MachineBasicBlock* MBB = MBBI;
501        dumpSets(MBB);
502      }
503      DOUT << "-----------------------------------------------------------\n";
504    });
505}
506
507/// propagateUsesAroundLoop - copy used register info from MBB to all blocks
508/// of the loop given by LP and its parent loops. This prevents spills/restores
509/// from being placed in the bodies of loops.
510///
511void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) {
512  if (! MBB || !LP)
513    return;
514
515  std::vector<MachineBasicBlock*> loopBlocks = LP->getBlocks();
516  for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) {
517    MachineBasicBlock* LBB = loopBlocks[i];
518    if (LBB == MBB)
519      continue;
520    if (CSRUsed[LBB].contains(CSRUsed[MBB]))
521      continue;
522    CSRUsed[LBB] |= CSRUsed[MBB];
523  }
524}
525
526/// calculateSets - collect the CSRs used in this function, compute
527/// the DF sets that describe the initial minimal regions in the
528/// Machine CFG around which CSR spills and restores must be placed.
529///
530/// Additionally, this function decides if shrink wrapping should
531/// be disabled for the current function, checking the following:
532///  1. the current function has more than 500 MBBs: heuristic limit
533///     on function size to reduce compile time impact of the current
534///     iterative algorithm.
535///  2. all CSRs are used in the entry block.
536///  3. all CSRs are used in all immediate successors of the entry block.
537///  4. all CSRs are used in a subset of blocks, each of which dominates
538///     all return blocks. These blocks, taken as a subgraph of the MCFG,
539///     are equivalent to the entry block since all execution paths pass
540///     through them.
541///
542bool PEI::calculateSets(MachineFunction &Fn) {
543  // Sets used to compute spill, restore placement sets.
544  const std::vector<CalleeSavedInfo> CSI =
545    Fn.getFrameInfo()->getCalleeSavedInfo();
546
547  // If no CSRs used, we are done.
548  if (CSI.empty()) {
549    DEBUG(if (ShrinkWrapThisFunction)
550            DOUT << "DISABLED: " << Fn.getFunction()->getName()
551                 << ": uses no callee-saved registers\n");
552    return false;
553  }
554
555  // Save refs to entry and return blocks.
556  EntryBlock = Fn.begin();
557  for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end();
558       MBB != E; ++MBB)
559    if (isReturnBlock(MBB))
560      ReturnBlocks.push_back(MBB);
561
562  // Determine if this function has fast exit paths.
563  DEBUG(if (ShrinkWrapThisFunction)
564          findFastExitPath());
565
566  // Limit shrink wrapping via the current iterative bit vector
567  // implementation to functions with <= 500 MBBs.
568  if (Fn.size() > 500) {
569    DEBUG(if (ShrinkWrapThisFunction)
570            DOUT << "DISABLED: " << Fn.getFunction()->getName()
571                 << ": too large (" << Fn.size() << " MBBs)\n");
572    ShrinkWrapThisFunction = false;
573  }
574
575  // Return now if not shrink wrapping.
576  if (! ShrinkWrapThisFunction)
577    return false;
578
579  // Collect set of used CSRs.
580  for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
581    UsedCSRegs.set(inx);
582  }
583
584  // Walk instructions in all MBBs, create CSRUsed[] sets, choose
585  // whether or not to shrink wrap this function.
586  MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
587  MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
588  const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
589
590  bool allCSRUsesInEntryBlock = true;
591  for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
592       MBBI != MBBE; ++MBBI) {
593    MachineBasicBlock* MBB = MBBI;
594    for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) {
595      for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
596        unsigned Reg = CSI[inx].getReg();
597        // If instruction I reads or modifies Reg, add it to UsedCSRegs,
598        // CSRUsed map for the current block.
599        for (unsigned opInx = 0, opEnd = I->getNumOperands();
600             opInx != opEnd; ++opInx) {
601          const MachineOperand &MO = I->getOperand(opInx);
602          if (! (MO.isReg() && (MO.isUse() || MO.isDef())))
603            continue;
604          unsigned MOReg = MO.getReg();
605          if (!MOReg)
606            continue;
607          if (MOReg == Reg ||
608              (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
609               TargetRegisterInfo::isPhysicalRegister(Reg) &&
610               TRI->isSubRegister(Reg, MOReg))) {
611            // CSR Reg is defined/used in block MBB.
612            CSRUsed[MBB].set(inx);
613            // Check for uses in EntryBlock.
614            if (MBB != EntryBlock)
615              allCSRUsesInEntryBlock = false;
616          }
617        }
618      }
619    }
620
621    if (CSRUsed[MBB].empty())
622      continue;
623
624    // Propagate CSRUsed[MBB] in loops
625    if (MachineLoop* LP = LI.getLoopFor(MBB)) {
626      // Add top level loop to work list.
627      MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP);
628      MachineLoop* PLP = getTopLevelLoopParent(LP);
629
630      if (! HDR) {
631        HDR = PLP->getHeader();
632        assert(HDR->pred_size() > 0 && "Loop header has no predecessors?");
633        MachineBasicBlock::pred_iterator PI = HDR->pred_begin();
634        HDR = *PI;
635      }
636      TLLoops[HDR] = PLP;
637
638      // Push uses from inside loop to its parent loops,
639      // or to all other MBBs in its loop.
640      if (LP->getLoopDepth() > 1) {
641        for (MachineLoop* PLP = LP->getParentLoop(); PLP;
642             PLP = PLP->getParentLoop()) {
643          propagateUsesAroundLoop(MBB, PLP);
644        }
645      } else {
646        propagateUsesAroundLoop(MBB, LP);
647      }
648    }
649  }
650
651  if (allCSRUsesInEntryBlock) {
652    DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName()
653          << ": all CSRs used in EntryBlock\n");
654    ShrinkWrapThisFunction = false;
655  } else {
656    bool allCSRsUsedInEntryFanout = true;
657    for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
658           SE = EntryBlock->succ_end(); SI != SE; ++SI) {
659      MachineBasicBlock* SUCC = *SI;
660      if (CSRUsed[SUCC] != UsedCSRegs)
661        allCSRsUsedInEntryFanout = false;
662    }
663    if (allCSRsUsedInEntryFanout) {
664      DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName()
665            << ": all CSRs used in imm successors of EntryBlock\n");
666      ShrinkWrapThisFunction = false;
667    }
668  }
669
670  if (ShrinkWrapThisFunction) {
671    // Check if MBB uses CSRs and dominates all exit nodes.
672    // Such nodes are equiv. to the entry node w.r.t.
673    // CSR uses: every path through the function must
674    // pass through this node. If each CSR is used at least
675    // once by these nodes, shrink wrapping is disabled.
676    CSRegSet CSRUsedInChokePoints;
677    for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
678         MBBI != MBBE; ++MBBI) {
679      MachineBasicBlock* MBB = MBBI;
680      if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1)
681        continue;
682      bool dominatesExitNodes = true;
683      for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
684        if (! DT.dominates(MBB, ReturnBlocks[ri])) {
685          dominatesExitNodes = false;
686          break;
687        }
688      if (dominatesExitNodes) {
689        CSRUsedInChokePoints |= CSRUsed[MBB];
690        if (CSRUsedInChokePoints == UsedCSRegs) {
691          DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName()
692                << ": all CSRs used in choke point(s) at "
693                << getBasicBlockName(MBB) << "\n");
694          ShrinkWrapThisFunction = false;
695          break;
696        }
697      }
698    }
699  }
700
701  // Return now if we have decided not to apply shrink wrapping
702  // to the current function.
703  if (! ShrinkWrapThisFunction)
704    return false;
705
706  DEBUG({
707      DOUT << "ENABLED: " << Fn.getFunction()->getName();
708      if (HasFastExitPath)
709        DOUT << " (fast exit path)";
710      DOUT << "\n";
711      if (ShrinkWrapDebugging >= BasicInfo) {
712        DOUT << "------------------------------"
713             << "-----------------------------\n";
714        DOUT << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n";
715        if (ShrinkWrapDebugging >= Details) {
716          DOUT << "------------------------------"
717               << "-----------------------------\n";
718          dumpAllUsed();
719        }
720      }
721    });
722
723  // Build initial DF sets to determine minimal regions in the
724  // Machine CFG around which CSRs must be spilled and restored.
725  calculateAnticAvail(Fn);
726
727  return true;
728}
729
730/// addUsesForMEMERegion - add uses of CSRs spilled or restored in
731/// multi-entry, multi-exit (MEME) regions so spill and restore
732/// placement will not break code that enters or leaves a
733/// shrink-wrapped region by inducing spills with no matching
734/// restores or restores with no matching spills. A MEME region
735/// is a subgraph of the MCFG with multiple entry edges, multiple
736/// exit edges, or both. This code propagates use information
737/// through the MCFG until all paths requiring spills and restores
738/// _outside_ the computed minimal placement regions have been covered.
739///
740bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB,
741                               SmallVector<MachineBasicBlock*, 4>& blks) {
742  if (MBB->succ_size() < 2 && MBB->pred_size() < 2) {
743    bool processThisBlock = false;
744    for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
745           SE = MBB->succ_end(); SI != SE; ++SI) {
746      MachineBasicBlock* SUCC = *SI;
747      if (SUCC->pred_size() > 1) {
748        processThisBlock = true;
749        break;
750      }
751    }
752    if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) {
753      for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
754             PE = MBB->pred_end(); PI != PE; ++PI) {
755        MachineBasicBlock* PRED = *PI;
756        if (PRED->succ_size() > 1) {
757          processThisBlock = true;
758          break;
759        }
760      }
761    }
762    if (! processThisBlock)
763      return false;
764  }
765
766  CSRegSet prop;
767  if (!CSRSave[MBB].empty())
768    prop = CSRSave[MBB];
769  else if (!CSRRestore[MBB].empty())
770    prop = CSRRestore[MBB];
771  else
772    prop = CSRUsed[MBB];
773  if (prop.empty())
774    return false;
775
776  // Propagate selected bits to successors, predecessors of MBB.
777  bool addedUses = false;
778  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
779         SE = MBB->succ_end(); SI != SE; ++SI) {
780    MachineBasicBlock* SUCC = *SI;
781    // Self-loop
782    if (SUCC == MBB)
783      continue;
784    if (! CSRUsed[SUCC].contains(prop)) {
785      CSRUsed[SUCC] |= prop;
786      addedUses = true;
787      blks.push_back(SUCC);
788      DEBUG(if (ShrinkWrapDebugging >= Iterations)
789              DOUT << getBasicBlockName(MBB)
790                   << "(" << stringifyCSRegSet(prop) << ")->"
791                   << "successor " << getBasicBlockName(SUCC) << "\n");
792    }
793  }
794  for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
795         PE = MBB->pred_end(); PI != PE; ++PI) {
796    MachineBasicBlock* PRED = *PI;
797    // Self-loop
798    if (PRED == MBB)
799      continue;
800    if (! CSRUsed[PRED].contains(prop)) {
801      CSRUsed[PRED] |= prop;
802      addedUses = true;
803      blks.push_back(PRED);
804      DEBUG(if (ShrinkWrapDebugging >= Iterations)
805              DOUT << getBasicBlockName(MBB)
806                   << "(" << stringifyCSRegSet(prop) << ")->"
807                   << "predecessor " << getBasicBlockName(PRED) << "\n");
808    }
809  }
810  return addedUses;
811}
812
813/// addUsesForTopLevelLoops - add uses for CSRs used inside top
814/// level loops to the exit blocks of those loops.
815///
816bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) {
817  bool addedUses = false;
818
819  // Place restores for top level loops where needed.
820  for (DenseMap<MachineBasicBlock*, MachineLoop*>::iterator
821         I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) {
822    MachineBasicBlock* MBB = I->first;
823    MachineLoop* LP = I->second;
824    MachineBasicBlock* HDR = LP->getHeader();
825    SmallVector<MachineBasicBlock*, 4> exitBlocks;
826    CSRegSet loopSpills;
827
828    loopSpills = CSRSave[MBB];
829    if (CSRSave[MBB].empty()) {
830      loopSpills = CSRUsed[HDR];
831      assert(!loopSpills.empty() && "No CSRs used in loop?");
832    } else if (CSRRestore[MBB].contains(CSRSave[MBB]))
833      continue;
834
835    LP->getExitBlocks(exitBlocks);
836    assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?");
837    for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) {
838      MachineBasicBlock* EXB = exitBlocks[i];
839      if (! CSRUsed[EXB].contains(loopSpills)) {
840        CSRUsed[EXB] |= loopSpills;
841        addedUses = true;
842        DEBUG(if (ShrinkWrapDebugging >= Iterations)
843                DOUT << "LOOP " << getBasicBlockName(MBB)
844                     << "(" << stringifyCSRegSet(loopSpills) << ")->"
845                     << getBasicBlockName(EXB) << "\n");
846        if (EXB->succ_size() > 1 || EXB->pred_size() > 1)
847          blks.push_back(EXB);
848      }
849    }
850  }
851  return addedUses;
852}
853
854/// calcSpillPlacements - determine which CSRs should be spilled
855/// in MBB using AnticIn sets of MBB's predecessors, keeping track
856/// of changes to spilled reg sets. Add MBB to the set of blocks
857/// that need to be processed for propagating use info to cover
858/// multi-entry/exit regions.
859///
860bool PEI::calcSpillPlacements(MachineBasicBlock* MBB,
861                              SmallVector<MachineBasicBlock*, 4> &blks,
862                              CSRegBlockMap &prevSpills) {
863  bool placedSpills = false;
864  // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB)
865  CSRegSet anticInPreds;
866  SmallVector<MachineBasicBlock*, 4> predecessors;
867  for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
868         PE = MBB->pred_end(); PI != PE; ++PI) {
869    MachineBasicBlock* PRED = *PI;
870    if (PRED != MBB)
871      predecessors.push_back(PRED);
872  }
873  unsigned i = 0, e = predecessors.size();
874  if (i != e) {
875    MachineBasicBlock* PRED = predecessors[i];
876    anticInPreds = UsedCSRegs - AnticIn[PRED];
877    for (++i; i != e; ++i) {
878      PRED = predecessors[i];
879      anticInPreds &= (UsedCSRegs - AnticIn[PRED]);
880    }
881  } else {
882    // Handle uses in entry blocks (which have no predecessors).
883    // This is necessary because the DFA formulation assumes the
884    // entry and (multiple) exit nodes cannot have CSR uses, which
885    // is not the case in the real world.
886    anticInPreds = UsedCSRegs;
887  }
888  // Compute spills required at MBB:
889  CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds;
890
891  if (! CSRSave[MBB].empty()) {
892    if (MBB == EntryBlock) {
893      for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
894        CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB];
895    } else {
896      // Reset all regs spilled in MBB that are also spilled in EntryBlock.
897      if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) {
898        CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock];
899      }
900    }
901  }
902  placedSpills = (CSRSave[MBB] != prevSpills[MBB]);
903  prevSpills[MBB] = CSRSave[MBB];
904  // Remember this block for adding restores to successor
905  // blocks for multi-entry region.
906  if (placedSpills)
907    blks.push_back(MBB);
908
909  DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations)
910          DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
911               << stringifyCSRegSet(CSRSave[MBB]) << "\n");
912
913  return placedSpills;
914}
915
916/// calcRestorePlacements - determine which CSRs should be restored
917/// in MBB using AvailOut sets of MBB's succcessors, keeping track
918/// of changes to restored reg sets. Add MBB to the set of blocks
919/// that need to be processed for propagating use info to cover
920/// multi-entry/exit regions.
921///
922bool PEI::calcRestorePlacements(MachineBasicBlock* MBB,
923                                SmallVector<MachineBasicBlock*, 4> &blks,
924                                CSRegBlockMap &prevRestores) {
925  bool placedRestores = false;
926  // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB)
927  CSRegSet availOutSucc;
928  SmallVector<MachineBasicBlock*, 4> successors;
929  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
930         SE = MBB->succ_end(); SI != SE; ++SI) {
931    MachineBasicBlock* SUCC = *SI;
932    if (SUCC != MBB)
933      successors.push_back(SUCC);
934  }
935  unsigned i = 0, e = successors.size();
936  if (i != e) {
937    MachineBasicBlock* SUCC = successors[i];
938    availOutSucc = UsedCSRegs - AvailOut[SUCC];
939    for (++i; i != e; ++i) {
940      SUCC = successors[i];
941      availOutSucc &= (UsedCSRegs - AvailOut[SUCC]);
942    }
943  } else {
944    if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) {
945      // Handle uses in return blocks (which have no successors).
946      // This is necessary because the DFA formulation assumes the
947      // entry and (multiple) exit nodes cannot have CSR uses, which
948      // is not the case in the real world.
949      availOutSucc = UsedCSRegs;
950    }
951  }
952  // Compute restores required at MBB:
953  CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc;
954
955  // Postprocess restore placements at MBB.
956  // Remove the CSRs that are restored in the return blocks.
957  // Lest this be confusing, note that:
958  // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks.
959  if (MBB->succ_size() && ! CSRRestore[MBB].empty()) {
960    if (! CSRSave[EntryBlock].empty())
961      CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock];
962  }
963  placedRestores = (CSRRestore[MBB] != prevRestores[MBB]);
964  prevRestores[MBB] = CSRRestore[MBB];
965  // Remember this block for adding saves to predecessor
966  // blocks for multi-entry region.
967  if (placedRestores)
968    blks.push_back(MBB);
969
970  DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations)
971          DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = "
972               << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
973
974  return placedRestores;
975}
976
977/// placeSpillsAndRestores - place spills and restores of CSRs
978/// used in MBBs in minimal regions that contain the uses.
979///
980void PEI::placeSpillsAndRestores(MachineFunction &Fn) {
981  CSRegBlockMap prevCSRSave;
982  CSRegBlockMap prevCSRRestore;
983  SmallVector<MachineBasicBlock*, 4> cvBlocks, ncvBlocks;
984  bool changed = true;
985  unsigned iterations = 0;
986
987  // Iterate computation of spill and restore placements in the MCFG until:
988  //   1. CSR use info has been fully propagated around the MCFG, and
989  //   2. computation of CSRSave[], CSRRestore[] reach fixed points.
990  while (changed) {
991    changed = false;
992    ++iterations;
993
994    DEBUG(if (ShrinkWrapDebugging >= Iterations)
995            DOUT << "iter " << iterations
996                 << " --------------------------------------------------\n");
997
998    // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG,
999    // which determines the placements of spills and restores.
1000    // Keep track of changes to spills, restores in each iteration to
1001    // minimize the total iterations.
1002    bool SRChanged = false;
1003    for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
1004         MBBI != MBBE; ++MBBI) {
1005      MachineBasicBlock* MBB = MBBI;
1006
1007      // Place spills for CSRs in MBB.
1008      SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave);
1009
1010      // Place restores for CSRs in MBB.
1011      SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore);
1012    }
1013
1014    // Add uses of CSRs used inside loops where needed.
1015    changed |= addUsesForTopLevelLoops(cvBlocks);
1016
1017    // Add uses for CSRs spilled or restored at branch, join points.
1018    if (changed || SRChanged) {
1019      while (! cvBlocks.empty()) {
1020        MachineBasicBlock* MBB = cvBlocks.pop_back_val();
1021        changed |= addUsesForMEMERegion(MBB, ncvBlocks);
1022      }
1023      if (! ncvBlocks.empty()) {
1024        cvBlocks = ncvBlocks;
1025        ncvBlocks.clear();
1026      }
1027    }
1028
1029    if (changed) {
1030      calculateAnticAvail(Fn);
1031      CSRSave.clear();
1032      CSRRestore.clear();
1033    }
1034  }
1035
1036  // Check for effectiveness:
1037  //  SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks}
1038  //  numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock]
1039  // Gives a measure of how many CSR spills have been moved from EntryBlock
1040  // to minimal regions enclosing their uses.
1041  CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]);
1042  unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count();
1043  numSRReduced += numSRReducedThisFunc;
1044  DEBUG(if (ShrinkWrapDebugging >= BasicInfo) {
1045      DOUT << "-----------------------------------------------------------\n";
1046      DOUT << "total iterations = " << iterations << " ( "
1047           << Fn.getFunction()->getName()
1048           << " " << numSRReducedThisFunc
1049           << " " << Fn.size()
1050           << " )\n";
1051      DOUT << "-----------------------------------------------------------\n";
1052      dumpSRSets();
1053      DOUT << "-----------------------------------------------------------\n";
1054      if (numSRReducedThisFunc)
1055        verifySpillRestorePlacement();
1056    });
1057}
1058
1059/// calculateCalleeSavedRegisters - Scan the function for modified callee saved
1060/// registers.  Also calculate the MaxCallFrameSize and HasCalls variables for
1061/// the function's frame information and eliminates call frame pseudo
1062/// instructions.
1063///
1064void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
1065  const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
1066  const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo();
1067
1068  // Get the callee saved register list...
1069  const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
1070
1071  // Get the function call frame set-up and tear-down instruction opcode
1072  int FrameSetupOpcode   = RegInfo->getCallFrameSetupOpcode();
1073  int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode();
1074
1075  // These are used to keep track the callee-save area. Initialize them.
1076  MinCSFrameIndex = INT_MAX;
1077  MaxCSFrameIndex = 0;
1078
1079  // Early exit for targets which have no callee saved registers and no call
1080  // frame setup/destroy pseudo instructions.
1081  if ((CSRegs == 0 || CSRegs[0] == 0) &&
1082      FrameSetupOpcode == -1 && FrameDestroyOpcode == -1)
1083    return;
1084
1085  unsigned MaxCallFrameSize = 0;
1086  bool HasCalls = false;
1087
1088  std::vector<MachineBasicBlock::iterator> FrameSDOps;
1089  for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB)
1090    for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
1091      if (I->getOpcode() == FrameSetupOpcode ||
1092          I->getOpcode() == FrameDestroyOpcode) {
1093        assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo"
1094               " instructions should have a single immediate argument!");
1095        unsigned Size = I->getOperand(0).getImm();
1096        if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
1097        HasCalls = true;
1098        FrameSDOps.push_back(I);
1099      }
1100
1101  MachineFrameInfo *FFI = Fn.getFrameInfo();
1102  FFI->setHasCalls(HasCalls);
1103  FFI->setMaxCallFrameSize(MaxCallFrameSize);
1104
1105  for (unsigned i = 0, e = FrameSDOps.size(); i != e; ++i) {
1106    MachineBasicBlock::iterator I = FrameSDOps[i];
1107    // If call frames are not being included as part of the stack frame,
1108    // and there is no dynamic allocation (therefore referencing frame slots
1109    // off sp), leave the pseudo ops alone. We'll eliminate them later.
1110    if (RegInfo->hasReservedCallFrame(Fn) || RegInfo->hasFP(Fn))
1111      RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
1112  }
1113
1114  // Now figure out which *callee saved* registers are modified by the current
1115  // function, thus needing to be saved and restored in the prolog/epilog.
1116  //
1117  const TargetRegisterClass* const *CSRegClasses =
1118    RegInfo->getCalleeSavedRegClasses(&Fn);
1119  std::vector<CalleeSavedInfo> CSI;
1120  for (unsigned i = 0; CSRegs[i]; ++i) {
1121    unsigned Reg = CSRegs[i];
1122    if (Fn.getRegInfo().isPhysRegUsed(Reg)) {
1123        // If the reg is modified, save it!
1124      CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i]));
1125    } else {
1126      for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg);
1127           *AliasSet; ++AliasSet) {  // Check alias registers too.
1128        if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) {
1129          CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i]));
1130          break;
1131        }
1132      }
1133    }
1134  }
1135
1136  if (CSI.empty())
1137    return;   // Early exit if no callee saved registers are modified!
1138
1139  unsigned NumFixedSpillSlots;
1140  const std::pair<unsigned,int> *FixedSpillSlots =
1141    TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
1142
1143  // Now that we know which registers need to be saved and restored, allocate
1144  // stack slots for them.
1145  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1146    unsigned Reg = CSI[i].getReg();
1147    const TargetRegisterClass *RC = CSI[i].getRegClass();
1148
1149    // Check to see if this physreg must be spilled to a particular stack slot
1150    // on this target.
1151    const std::pair<unsigned,int> *FixedSlot = FixedSpillSlots;
1152    while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots &&
1153           FixedSlot->first != Reg)
1154      ++FixedSlot;
1155
1156    int FrameIdx;
1157    if (FixedSlot == FixedSpillSlots+NumFixedSpillSlots) {
1158      // Nope, just spill it anywhere convenient.
1159      unsigned Align = RC->getAlignment();
1160      unsigned StackAlign = TFI->getStackAlignment();
1161      // We may not be able to sastify the desired alignment specification of
1162      // the TargetRegisterClass if the stack alignment is smaller.
1163      // Use the min.
1164      Align = std::min(Align, StackAlign);
1165      FrameIdx = FFI->CreateStackObject(RC->getSize(), Align);
1166      if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
1167      if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
1168    } else {
1169      // Spill it to the stack where we must.
1170      FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->second);
1171    }
1172    CSI[i].setFrameIdx(FrameIdx);
1173  }
1174
1175  FFI->setCalleeSavedInfo(CSI);
1176}
1177
1178/// insertCSRSpillsAndRestores - Insert spill and restore code for
1179/// callee saved registers used in the function, handling shrink wrapping.
1180///
1181void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
1182  // Get callee saved register information.
1183  MachineFrameInfo *FFI = Fn.getFrameInfo();
1184  const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
1185
1186  // Early exit if no callee saved registers are modified!
1187  if (CSI.empty())
1188    return;
1189
1190  const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
1191  MachineBasicBlock::iterator I;
1192
1193  DEBUG(if (ShrinkWrapThisFunction && ShrinkWrapDebugging >= Details)
1194          DOUT << "Inserting CSR spills/restores in function "
1195               << Fn.getFunction()->getName() << "\n");
1196
1197  if (! ShrinkWrapThisFunction) {
1198    // Spill using target interface.
1199    I = EntryBlock->begin();
1200    if (!TII.spillCalleeSavedRegisters(*EntryBlock, I, CSI)) {
1201      for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1202        // Add the callee-saved register as live-in.
1203        // It's killed at the spill.
1204        EntryBlock->addLiveIn(CSI[i].getReg());
1205
1206        // Insert the spill to the stack frame.
1207        TII.storeRegToStackSlot(*EntryBlock, I, CSI[i].getReg(), true,
1208                                CSI[i].getFrameIdx(), CSI[i].getRegClass());
1209      }
1210    }
1211
1212    // Restore using target interface.
1213    for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) {
1214      MachineBasicBlock* MBB = ReturnBlocks[ri];
1215      I = MBB->end(); --I;
1216
1217      // Skip over all terminator instructions, which are part of the return
1218      // sequence.
1219      MachineBasicBlock::iterator I2 = I;
1220      while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
1221        I = I2;
1222
1223      bool AtStart = I == MBB->begin();
1224      MachineBasicBlock::iterator BeforeI = I;
1225      if (!AtStart)
1226        --BeforeI;
1227
1228      // Restore all registers immediately before the return and any
1229      // terminators that preceed it.
1230      if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI)) {
1231        for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1232          TII.loadRegFromStackSlot(*MBB, I, CSI[i].getReg(),
1233                                   CSI[i].getFrameIdx(),
1234                                   CSI[i].getRegClass());
1235          assert(I != MBB->begin() &&
1236                 "loadRegFromStackSlot didn't insert any code!");
1237          // Insert in reverse order.  loadRegFromStackSlot can insert
1238          // multiple instructions.
1239          if (AtStart)
1240            I = MBB->begin();
1241          else {
1242            I = BeforeI;
1243            ++I;
1244          }
1245        }
1246      }
1247    }
1248    return;
1249  }
1250
1251  // Insert spills.
1252  std::vector<CalleeSavedInfo> blockCSI;
1253  for (CSRegBlockMap::iterator BI = CSRSave.begin(),
1254         BE = CSRSave.end(); BI != BE; ++BI) {
1255    MachineBasicBlock* MBB = BI->first;
1256    CSRegSet save = BI->second;
1257
1258    if (save.empty())
1259      continue;
1260
1261    DEBUG(if (ShrinkWrapDebugging >= Details)
1262            DOUT << "Spilling " << stringifyCSRegSet(save)
1263                 << " in " << getBasicBlockName(MBB) << "\n");
1264
1265    blockCSI.clear();
1266    for (CSRegSet::iterator RI = save.begin(),
1267           RE = save.end(); RI != RE; ++RI) {
1268      blockCSI.push_back(CSI[*RI]);
1269    }
1270    assert(blockCSI.size() > 0 &&
1271           "Could not collect callee saved register info");
1272
1273    I = MBB->begin();
1274
1275    // When shrink wrapping, use stack slot stores/loads.
1276    for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
1277      // Add the callee-saved register as live-in.
1278      // It's killed at the spill.
1279      MBB->addLiveIn(blockCSI[i].getReg());
1280
1281      // Insert the spill to the stack frame.
1282      TII.storeRegToStackSlot(*MBB, I, blockCSI[i].getReg(),
1283                              true,
1284                              blockCSI[i].getFrameIdx(),
1285                              blockCSI[i].getRegClass());
1286    }
1287  }
1288
1289  DEBUG(if (ShrinkWrapDebugging >= Details)
1290          DOUT << "------------------------------"
1291               << "-----------------------------\n");
1292
1293  for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
1294         BE = CSRRestore.end(); BI != BE; ++BI) {
1295    MachineBasicBlock* MBB = BI->first;
1296    CSRegSet restore = BI->second;
1297
1298    if (restore.empty())
1299      continue;
1300
1301    DEBUG(if (ShrinkWrapDebugging >= Details)
1302            DOUT << "Restoring " << stringifyCSRegSet(restore)
1303                 << " in " << getBasicBlockName(MBB) << "\n");
1304
1305    blockCSI.clear();
1306    for (CSRegSet::iterator RI = restore.begin(),
1307           RE = restore.end(); RI != RE; ++RI) {
1308      blockCSI.push_back(CSI[*RI]);
1309    }
1310    assert(blockCSI.size() > 0 &&
1311           "Could not find callee saved register info");
1312
1313    // If MBB is empty and needs restores, insert at the _beginning_.
1314    if (MBB->empty()) {
1315      I = MBB->begin();
1316    } else {
1317      I = MBB->end();
1318      --I;
1319
1320      // Skip over all terminator instructions, which are part of the
1321      // return sequence.
1322      if (! I->getDesc().isTerminator()) {
1323        ++I;
1324      } else {
1325        MachineBasicBlock::iterator I2 = I;
1326        while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
1327          I = I2;
1328      }
1329    }
1330
1331    bool AtStart = I == MBB->begin();
1332    MachineBasicBlock::iterator BeforeI = I;
1333    if (!AtStart)
1334      --BeforeI;
1335
1336    // Restore all registers immediately before the return and any
1337    // terminators that preceed it.
1338    for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
1339      TII.loadRegFromStackSlot(*MBB, I, blockCSI[i].getReg(),
1340                               blockCSI[i].getFrameIdx(),
1341                               blockCSI[i].getRegClass());
1342      assert(I != MBB->begin() &&
1343             "loadRegFromStackSlot didn't insert any code!");
1344      // Insert in reverse order.  loadRegFromStackSlot can insert
1345      // multiple instructions.
1346      if (AtStart)
1347        I = MBB->begin();
1348      else {
1349        I = BeforeI;
1350        ++I;
1351      }
1352    }
1353  }
1354
1355  DEBUG(if (ShrinkWrapDebugging >= Details)
1356          DOUT << "------------------------------"
1357               << "-----------------------------\n");
1358}
1359
1360/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
1361static inline void
1362AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx,
1363                  bool StackGrowsDown, int64_t &Offset,
1364                  unsigned &MaxAlign) {
1365  // If stack grows down, we need to add size of find the lowest address of the
1366  // object.
1367  if (StackGrowsDown)
1368    Offset += FFI->getObjectSize(FrameIdx);
1369
1370  unsigned Align = FFI->getObjectAlignment(FrameIdx);
1371
1372  // If the alignment of this object is greater than that of the stack, then
1373  // increase the stack alignment to match.
1374  MaxAlign = std::max(MaxAlign, Align);
1375
1376  // Adjust to alignment boundary.
1377  Offset = (Offset + Align - 1) / Align * Align;
1378
1379  if (StackGrowsDown) {
1380    FFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
1381  } else {
1382    FFI->setObjectOffset(FrameIdx, Offset);
1383    Offset += FFI->getObjectSize(FrameIdx);
1384  }
1385}
1386
1387/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
1388/// abstract stack objects.
1389///
1390void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
1391  const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo();
1392
1393  bool StackGrowsDown =
1394    TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
1395
1396  // Loop over all of the stack objects, assigning sequential addresses...
1397  MachineFrameInfo *FFI = Fn.getFrameInfo();
1398
1399  unsigned MaxAlign = FFI->getMaxAlignment();
1400
1401  // Start at the beginning of the local area.
1402  // The Offset is the distance from the stack top in the direction
1403  // of stack growth -- so it's always nonnegative.
1404  int64_t Offset = TFI.getOffsetOfLocalArea();
1405  if (StackGrowsDown)
1406    Offset = -Offset;
1407  assert(Offset >= 0
1408         && "Local area offset should be in direction of stack growth");
1409
1410  // If there are fixed sized objects that are preallocated in the local area,
1411  // non-fixed objects can't be allocated right at the start of local area.
1412  // We currently don't support filling in holes in between fixed sized
1413  // objects, so we adjust 'Offset' to point to the end of last fixed sized
1414  // preallocated object.
1415  for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
1416    int64_t FixedOff;
1417    if (StackGrowsDown) {
1418      // The maximum distance from the stack pointer is at lower address of
1419      // the object -- which is given by offset. For down growing stack
1420      // the offset is negative, so we negate the offset to get the distance.
1421      FixedOff = -FFI->getObjectOffset(i);
1422    } else {
1423      // The maximum distance from the start pointer is at the upper
1424      // address of the object.
1425      FixedOff = FFI->getObjectOffset(i) + FFI->getObjectSize(i);
1426    }
1427    if (FixedOff > Offset) Offset = FixedOff;
1428  }
1429
1430  // First assign frame offsets to stack objects that are used to spill
1431  // callee saved registers.
1432  if (StackGrowsDown) {
1433    for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
1434      // If stack grows down, we need to add size of find the lowest
1435      // address of the object.
1436      Offset += FFI->getObjectSize(i);
1437
1438      unsigned Align = FFI->getObjectAlignment(i);
1439      // If the alignment of this object is greater than that of the stack,
1440      // then increase the stack alignment to match.
1441      MaxAlign = std::max(MaxAlign, Align);
1442      // Adjust to alignment boundary
1443      Offset = (Offset+Align-1)/Align*Align;
1444
1445      FFI->setObjectOffset(i, -Offset);        // Set the computed offset
1446    }
1447  } else {
1448    int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex;
1449    for (int i = MaxCSFI; i >= MinCSFI ; --i) {
1450      unsigned Align = FFI->getObjectAlignment(i);
1451      // If the alignment of this object is greater than that of the stack,
1452      // then increase the stack alignment to match.
1453      MaxAlign = std::max(MaxAlign, Align);
1454      // Adjust to alignment boundary
1455      Offset = (Offset+Align-1)/Align*Align;
1456
1457      FFI->setObjectOffset(i, Offset);
1458      Offset += FFI->getObjectSize(i);
1459    }
1460  }
1461
1462  // Make sure the special register scavenging spill slot is closest to the
1463  // frame pointer if a frame pointer is required.
1464  const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
1465  if (RS && RegInfo->hasFP(Fn)) {
1466    int SFI = RS->getScavengingFrameIndex();
1467    if (SFI >= 0)
1468      AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign);
1469  }
1470
1471  // Make sure that the stack protector comes before the local variables on the
1472  // stack.
1473  if (FFI->getStackProtectorIndex() >= 0)
1474    AdjustStackOffset(FFI, FFI->getStackProtectorIndex(), StackGrowsDown,
1475                      Offset, MaxAlign);
1476
1477  // Then assign frame offsets to stack objects that are not used to spill
1478  // callee saved registers.
1479  for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
1480    if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
1481      continue;
1482    if (RS && (int)i == RS->getScavengingFrameIndex())
1483      continue;
1484    if (FFI->isDeadObjectIndex(i))
1485      continue;
1486    if (FFI->getStackProtectorIndex() == (int)i)
1487      continue;
1488
1489    AdjustStackOffset(FFI, i, StackGrowsDown, Offset, MaxAlign);
1490  }
1491
1492  // Make sure the special register scavenging spill slot is closest to the
1493  // stack pointer.
1494  if (RS && !RegInfo->hasFP(Fn)) {
1495    int SFI = RS->getScavengingFrameIndex();
1496    if (SFI >= 0)
1497      AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign);
1498  }
1499
1500  // Round up the size to a multiple of the alignment, but only if there are
1501  // calls or alloca's in the function.  This ensures that any calls to
1502  // subroutines have their stack frames suitable aligned.
1503  // Also do this if we need runtime alignment of the stack.  In this case
1504  // offsets will be relative to SP not FP; round up the stack size so this
1505  // works.
1506  if (!RegInfo->targetHandlesStackFrameRounding() &&
1507      (FFI->hasCalls() || FFI->hasVarSizedObjects() ||
1508       (RegInfo->needsStackRealignment(Fn) &&
1509        FFI->getObjectIndexEnd() != 0))) {
1510    // If we have reserved argument space for call sites in the function
1511    // immediately on entry to the current function, count it as part of the
1512    // overall stack size.
1513    if (RegInfo->hasReservedCallFrame(Fn))
1514      Offset += FFI->getMaxCallFrameSize();
1515
1516    unsigned AlignMask = std::max(TFI.getStackAlignment(),MaxAlign) - 1;
1517    Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
1518  }
1519
1520  // Update frame info to pretend that this is part of the stack...
1521  FFI->setStackSize(Offset+TFI.getOffsetOfLocalArea());
1522
1523  // Remember the required stack alignment in case targets need it to perform
1524  // dynamic stack alignment.
1525  FFI->setMaxAlignment(MaxAlign);
1526}
1527
1528
1529/// insertPrologEpilogCode - Scan the function for modified callee saved
1530/// registers, insert spill code for these callee saved registers, then add
1531/// prolog and epilog code to the function.
1532///
1533void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
1534  const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
1535
1536  // Add prologue to the function...
1537  TRI->emitPrologue(Fn);
1538
1539  // Add epilogue to restore the callee-save registers in each exiting block
1540  for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
1541    // If last instruction is a return instruction, add an epilogue
1542    if (!I->empty() && I->back().getDesc().isReturn())
1543      TRI->emitEpilogue(Fn, *I);
1544  }
1545}
1546
1547
1548/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
1549/// register references and actual offsets.
1550///
1551void PEI::replaceFrameIndices(MachineFunction &Fn) {
1552  if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do?
1553
1554  const TargetMachine &TM = Fn.getTarget();
1555  assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!");
1556  const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
1557  const TargetFrameInfo *TFI = TM.getFrameInfo();
1558  bool StackGrowsDown =
1559    TFI->getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
1560  int FrameSetupOpcode   = TRI.getCallFrameSetupOpcode();
1561  int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode();
1562
1563  for (MachineFunction::iterator BB = Fn.begin(),
1564         E = Fn.end(); BB != E; ++BB) {
1565    int SPAdj = 0;  // SP offset due to call frame setup / destroy.
1566    if (RS) RS->enterBasicBlock(BB);
1567
1568    for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
1569      if (I->getOpcode() == TargetInstrInfo::DECLARE) {
1570        // Ignore it.
1571        ++I;
1572        continue;
1573      }
1574
1575      if (I->getOpcode() == FrameSetupOpcode ||
1576          I->getOpcode() == FrameDestroyOpcode) {
1577        // Remember how much SP has been adjusted to create the call
1578        // frame.
1579        int Size = I->getOperand(0).getImm();
1580
1581        if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) ||
1582            (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode))
1583          Size = -Size;
1584
1585        SPAdj += Size;
1586
1587        MachineBasicBlock::iterator PrevI = BB->end();
1588        if (I != BB->begin()) PrevI = prior(I);
1589        TRI.eliminateCallFramePseudoInstr(Fn, *BB, I);
1590
1591        // Visit the instructions created by eliminateCallFramePseudoInstr().
1592        if (PrevI == BB->end())
1593          I = BB->begin();     // The replaced instr was the first in the block.
1594        else
1595          I = next(PrevI);
1596        continue;
1597      }
1598
1599      MachineInstr *MI = I;
1600      bool DoIncr = true;
1601      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
1602        if (MI->getOperand(i).isFI()) {
1603          // Some instructions (e.g. inline asm instructions) can have
1604          // multiple frame indices and/or cause eliminateFrameIndex
1605          // to insert more than one instruction. We need the register
1606          // scavenger to go through all of these instructions so that
1607          // it can update its register information. We keep the
1608          // iterator at the point before insertion so that we can
1609          // revisit them in full.
1610          bool AtBeginning = (I == BB->begin());
1611          if (!AtBeginning) --I;
1612
1613          // If this instruction has a FrameIndex operand, we need to
1614          // use that target machine register info object to eliminate
1615          // it.
1616
1617          TRI.eliminateFrameIndex(MI, SPAdj, RS);
1618
1619          // Reset the iterator if we were at the beginning of the BB.
1620          if (AtBeginning) {
1621            I = BB->begin();
1622            DoIncr = false;
1623          }
1624
1625          MI = 0;
1626          break;
1627        }
1628
1629      if (DoIncr && I != BB->end()) ++I;
1630
1631      // Update register states.
1632      if (RS && MI) RS->forward(MI);
1633    }
1634
1635    assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?");
1636  }
1637}
1638
1639// Debugging methods for shrink wrapping.
1640#ifndef NDEBUG
1641/// findFastExitPath - debugging method used to detect functions
1642/// with at least one path from the entry block to a return block
1643/// directly or which has a very small number of edges.
1644///
1645void PEI::findFastExitPath() {
1646  if (! EntryBlock)
1647    return;
1648  // Fina a path from EntryBlock to any return block that does not branch:
1649  //        Entry
1650  //          |     ...
1651  //          v      |
1652  //         B1<-----+
1653  //          |
1654  //          v
1655  //       Return
1656  for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
1657         SE = EntryBlock->succ_end(); SI != SE; ++SI) {
1658    MachineBasicBlock* SUCC = *SI;
1659
1660    // Assume positive, disprove existence of fast path.
1661#ifndef NDEBUG
1662    HasFastExitPath = true;
1663#endif
1664
1665    // Check the immediate successors.
1666    if (isReturnBlock(SUCC)) {
1667      if (ShrinkWrapDebugging >= BasicInfo)
1668        DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock)
1669             << "->" << getBasicBlockName(SUCC) << "\n";
1670      break;
1671    }
1672    // Traverse df from SUCC, look for a branch block.
1673    std::string exitPath = getBasicBlockName(SUCC);
1674    for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC),
1675           BE = df_end(SUCC); BI != BE; ++BI) {
1676      MachineBasicBlock* SBB = *BI;
1677      // Reject paths with branch nodes.
1678      if (SBB->succ_size() > 1) {
1679#ifndef NDEBUG
1680        HasFastExitPath = false;
1681#endif
1682        break;
1683      }
1684      exitPath += "->" + getBasicBlockName(SBB);
1685    }
1686#ifndef NDEBUG
1687    if (HasFastExitPath) {
1688      if (ShrinkWrapDebugging >= BasicInfo)
1689        DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock)
1690             << "->" << exitPath << "\n";
1691      break;
1692    }
1693#endif
1694  }
1695}
1696
1697/// verifySpillRestorePlacement - check the current spill/restore
1698/// sets for safety. Attempt to find spills without restores or
1699/// restores without spills.
1700/// Spills: walk df from each MBB in spill set ensuring that
1701///         all CSRs spilled at MMBB are restored on all paths
1702///         from MBB to all exit blocks.
1703/// Restores: walk idf from each MBB in restore set ensuring that
1704///           all CSRs restored at MBB are spilled on all paths
1705///           reaching MBB.
1706///
1707void PEI::verifySpillRestorePlacement() {
1708  unsigned numReturnBlocks = 0;
1709  for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
1710       MBBI != MBBE; ++MBBI) {
1711    MachineBasicBlock* MBB = MBBI;
1712    if (isReturnBlock(MBB) || MBB->succ_size() == 0)
1713      ++numReturnBlocks;
1714  }
1715  for (CSRegBlockMap::iterator BI = CSRSave.begin(),
1716         BE = CSRSave.end(); BI != BE; ++BI) {
1717    MachineBasicBlock* MBB = BI->first;
1718    CSRegSet spilled = BI->second;
1719    CSRegSet restored;
1720
1721    if (spilled.empty())
1722      continue;
1723
1724    DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
1725         << stringifyCSRegSet(spilled)
1726         << "  RESTORE[" << getBasicBlockName(MBB) << "] = "
1727         << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
1728
1729    if (CSRRestore[MBB].intersects(spilled)) {
1730      restored |= (CSRRestore[MBB] & spilled);
1731    }
1732
1733    // Walk depth first from MBB to find restores of all CSRs spilled at MBB:
1734    // we must find restores for all spills w/no intervening spills on all
1735    // paths from MBB to all return blocks.
1736    for (df_iterator<MachineBasicBlock*> BI = df_begin(MBB),
1737           BE = df_end(MBB); BI != BE; ++BI) {
1738      MachineBasicBlock* SBB = *BI;
1739      if (SBB == MBB)
1740        continue;
1741      // Stop when we encounter spills of any CSRs spilled at MBB that
1742      // have not yet been seen to be restored.
1743      if (CSRSave[SBB].intersects(spilled) &&
1744          !restored.contains(CSRSave[SBB] & spilled))
1745        break;
1746      // Collect the CSRs spilled at MBB that are restored
1747      // at this DF successor of MBB.
1748      if (CSRRestore[SBB].intersects(spilled))
1749        restored |= (CSRRestore[SBB] & spilled);
1750      // If we are at a retun block, check that the restores
1751      // we have seen so far exhaust the spills at MBB, then
1752      // reset the restores.
1753      if (isReturnBlock(SBB) || SBB->succ_size() == 0) {
1754        if (restored != spilled) {
1755          CSRegSet notRestored = (spilled - restored);
1756          DOUT << MF->getFunction()->getName() << ": "
1757               << stringifyCSRegSet(notRestored)
1758               << " spilled at " << getBasicBlockName(MBB)
1759               << " are never restored on path to return "
1760               << getBasicBlockName(SBB) << "\n";
1761        }
1762        restored.clear();
1763      }
1764    }
1765  }
1766
1767  // Check restore placements.
1768  for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
1769         BE = CSRRestore.end(); BI != BE; ++BI) {
1770    MachineBasicBlock* MBB = BI->first;
1771    CSRegSet restored = BI->second;
1772    CSRegSet spilled;
1773
1774    if (restored.empty())
1775      continue;
1776
1777    DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
1778         << stringifyCSRegSet(CSRSave[MBB])
1779         << "  RESTORE[" << getBasicBlockName(MBB) << "] = "
1780         << stringifyCSRegSet(restored) << "\n";
1781
1782    if (CSRSave[MBB].intersects(restored)) {
1783      spilled |= (CSRSave[MBB] & restored);
1784    }
1785    // Walk inverse depth first from MBB to find spills of all
1786    // CSRs restored at MBB:
1787    for (idf_iterator<MachineBasicBlock*> BI = idf_begin(MBB),
1788           BE = idf_end(MBB); BI != BE; ++BI) {
1789      MachineBasicBlock* PBB = *BI;
1790      if (PBB == MBB)
1791        continue;
1792      // Stop when we encounter restores of any CSRs restored at MBB that
1793      // have not yet been seen to be spilled.
1794      if (CSRRestore[PBB].intersects(restored) &&
1795          !spilled.contains(CSRRestore[PBB] & restored))
1796        break;
1797      // Collect the CSRs restored at MBB that are spilled
1798      // at this DF predecessor of MBB.
1799      if (CSRSave[PBB].intersects(restored))
1800        spilled |= (CSRSave[PBB] & restored);
1801    }
1802    if (spilled != restored) {
1803      CSRegSet notSpilled = (restored - spilled);
1804      DOUT << MF->getFunction()->getName() << ": "
1805           << stringifyCSRegSet(notSpilled)
1806           << " restored at " << getBasicBlockName(MBB)
1807           << " are never spilled\n";
1808    }
1809  }
1810}
1811
1812// Debugging print methods.
1813std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) {
1814  std::ostringstream name;
1815  if (MBB) {
1816    if (MBB->getBasicBlock())
1817      name << MBB->getBasicBlock()->getName();
1818    else
1819      name << "_MBB_" << MBB->getNumber();
1820  }
1821  return name.str();
1822}
1823
1824std::string PEI::stringifyCSRegSet(const CSRegSet& s) {
1825  const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
1826  const std::vector<CalleeSavedInfo> CSI =
1827    MF->getFrameInfo()->getCalleeSavedInfo();
1828
1829  std::ostringstream srep;
1830  if (CSI.size() == 0) {
1831    srep << "[]";
1832    return srep.str();
1833  }
1834  srep << "[";
1835  CSRegSet::iterator I = s.begin(), E = s.end();
1836  if (I != E) {
1837    unsigned reg = CSI[*I].getReg();
1838    srep << TRI->getName(reg);
1839    for (++I; I != E; ++I) {
1840      reg = CSI[*I].getReg();
1841      srep << ",";
1842      srep << TRI->getName(reg);
1843    }
1844  }
1845  srep << "]";
1846  return srep.str();
1847}
1848
1849void PEI::dumpSet(const CSRegSet& s) {
1850  DOUT << stringifyCSRegSet(s) << "\n";
1851}
1852
1853void PEI::dumpUsed(MachineBasicBlock* MBB) {
1854  if (MBB) {
1855    DOUT << "CSRUsed[" << getBasicBlockName(MBB) << "] = "
1856         << stringifyCSRegSet(CSRUsed[MBB])  << "\n";
1857  }
1858}
1859
1860void PEI::dumpAllUsed() {
1861    for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
1862         MBBI != MBBE; ++MBBI) {
1863      MachineBasicBlock* MBB = MBBI;
1864      dumpUsed(MBB);
1865    }
1866}
1867
1868void PEI::dumpSets(MachineBasicBlock* MBB) {
1869  if (MBB) {
1870    DOUT << getBasicBlockName(MBB)           << " | "
1871         << stringifyCSRegSet(CSRUsed[MBB])  << " | "
1872         << stringifyCSRegSet(AnticIn[MBB])  << " | "
1873         << stringifyCSRegSet(AnticOut[MBB]) << " | "
1874         << stringifyCSRegSet(AvailIn[MBB])  << " | "
1875         << stringifyCSRegSet(AvailOut[MBB]) << "\n";
1876  }
1877}
1878
1879void PEI::dumpSets1(MachineBasicBlock* MBB) {
1880  if (MBB) {
1881    DOUT << getBasicBlockName(MBB)             << " | "
1882         << stringifyCSRegSet(CSRUsed[MBB])    << " | "
1883         << stringifyCSRegSet(AnticIn[MBB])    << " | "
1884         << stringifyCSRegSet(AnticOut[MBB])   << " | "
1885         << stringifyCSRegSet(AvailIn[MBB])    << " | "
1886         << stringifyCSRegSet(AvailOut[MBB])   << " | "
1887         << stringifyCSRegSet(CSRSave[MBB])    << " | "
1888         << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
1889  }
1890}
1891
1892void PEI::dumpAllSets() {
1893    for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
1894         MBBI != MBBE; ++MBBI) {
1895      MachineBasicBlock* MBB = MBBI;
1896      dumpSets1(MBB);
1897    }
1898}
1899
1900void PEI::dumpSRSets() {
1901  for (MachineFunction::iterator MBB = MF->begin(), E = MF->end();
1902       MBB != E; ++MBB) {
1903    if (! CSRSave[MBB].empty()) {
1904      DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
1905           << stringifyCSRegSet(CSRSave[MBB]);
1906      if (CSRRestore[MBB].empty())
1907        DOUT << "\n";
1908    }
1909    if (! CSRRestore[MBB].empty()) {
1910      if (! CSRSave[MBB].empty())
1911        DOUT << "    ";
1912      DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = "
1913           << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
1914    }
1915  }
1916}
1917#endif
1918