1//===-- ShrinkWrapping.cpp - Reduce spills/restores of callee-saved regs --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements a shrink wrapping variant of prolog/epilog insertion:
11// - Spills and restores of callee-saved registers (CSRs) are placed in the
12//   machine CFG to tightly surround their uses so that execution paths that
13//   do not use CSRs do not pay the spill/restore penalty.
14//
15// - Avoiding placment of spills/restores in loops: if a CSR is used inside a
16//   loop the spills are placed in the loop preheader, and restores are
17//   placed in the loop exit nodes (the successors of loop _exiting_ nodes).
18//
19// - Covering paths without CSR uses:
20//   If a region in a CFG uses CSRs and has multiple entry and/or exit points,
21//   the use info for the CSRs inside the region is propagated outward in the
22//   CFG to ensure validity of the spill/restore placements. This decreases
23//   the effectiveness of shrink wrapping but does not require edge splitting
24//   in the machine CFG.
25//
26// This shrink wrapping implementation uses an iterative analysis to determine
27// which basic blocks require spills and restores for CSRs.
28//
29// This pass uses MachineDominators and MachineLoopInfo. Loop information
30// is used to prevent placement of callee-saved register spills/restores
31// in the bodies of loops.
32//
33//===----------------------------------------------------------------------===//
34
35#define DEBUG_TYPE "shrink-wrap"
36
37#include "PrologEpilogInserter.h"
38#include "llvm/ADT/DenseMap.h"
39#include "llvm/ADT/PostOrderIterator.h"
40#include "llvm/ADT/STLExtras.h"
41#include "llvm/ADT/SparseBitVector.h"
42#include "llvm/ADT/Statistic.h"
43#include "llvm/CodeGen/MachineDominators.h"
44#include "llvm/CodeGen/MachineFrameInfo.h"
45#include "llvm/CodeGen/MachineInstr.h"
46#include "llvm/CodeGen/MachineLoopInfo.h"
47#include "llvm/CodeGen/MachineRegisterInfo.h"
48#include "llvm/Support/CommandLine.h"
49#include "llvm/Support/Compiler.h"
50#include "llvm/Support/Debug.h"
51#include "llvm/Target/TargetMachine.h"
52#include "llvm/Target/TargetRegisterInfo.h"
53#include <sstream>
54
55using namespace llvm;
56
57STATISTIC(numSRReduced, "Number of CSR spills+restores reduced.");
58
59// Shrink Wrapping:
60static cl::opt<bool>
61ShrinkWrapping("shrink-wrap",
62               cl::desc("Shrink wrap callee-saved register spills/restores"));
63
64// Shrink wrap only the specified function, a debugging aid.
65static cl::opt<std::string>
66ShrinkWrapFunc("shrink-wrap-func", cl::Hidden,
67               cl::desc("Shrink wrap the specified function"),
68               cl::value_desc("funcname"),
69               cl::init(""));
70
71// Debugging level for shrink wrapping.
72enum ShrinkWrapDebugLevel {
73  Disabled, BasicInfo, Iterations, Details
74};
75
76static cl::opt<enum ShrinkWrapDebugLevel>
77ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden,
78  cl::desc("Print shrink wrapping debugging information"),
79  cl::values(
80    clEnumVal(Disabled  , "disable debug output"),
81    clEnumVal(BasicInfo , "print basic DF sets"),
82    clEnumVal(Iterations, "print SR sets for each iteration"),
83    clEnumVal(Details   , "print all DF sets"),
84    clEnumValEnd));
85
86
87void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
88  AU.setPreservesCFG();
89  if (ShrinkWrapping || ShrinkWrapFunc != "") {
90    AU.addRequired<MachineLoopInfo>();
91    AU.addRequired<MachineDominatorTree>();
92  }
93  AU.addPreserved<MachineLoopInfo>();
94  AU.addPreserved<MachineDominatorTree>();
95  AU.addRequired<TargetPassConfig>();
96  MachineFunctionPass::getAnalysisUsage(AU);
97}
98
99//===----------------------------------------------------------------------===//
100//  ShrinkWrapping implementation
101//===----------------------------------------------------------------------===//
102
103// Convienences for dealing with machine loops.
104MachineBasicBlock* PEI::getTopLevelLoopPreheader(MachineLoop* LP) {
105  assert(LP && "Machine loop is NULL.");
106  MachineBasicBlock* PHDR = LP->getLoopPreheader();
107  MachineLoop* PLP = LP->getParentLoop();
108  while (PLP) {
109    PHDR = PLP->getLoopPreheader();
110    PLP = PLP->getParentLoop();
111  }
112  return PHDR;
113}
114
115MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) {
116  if (LP == 0)
117    return 0;
118  MachineLoop* PLP = LP->getParentLoop();
119  while (PLP) {
120    LP = PLP;
121    PLP = PLP->getParentLoop();
122  }
123  return LP;
124}
125
126bool PEI::isReturnBlock(MachineBasicBlock* MBB) {
127  return (MBB && !MBB->empty() && MBB->back().isReturn());
128}
129
130// Initialize shrink wrapping DFA sets, called before iterations.
131void PEI::clearAnticAvailSets() {
132  AnticIn.clear();
133  AnticOut.clear();
134  AvailIn.clear();
135  AvailOut.clear();
136}
137
138// Clear all sets constructed by shrink wrapping.
139void PEI::clearAllSets() {
140  ReturnBlocks.clear();
141  clearAnticAvailSets();
142  UsedCSRegs.clear();
143  CSRUsed.clear();
144  TLLoops.clear();
145  CSRSave.clear();
146  CSRRestore.clear();
147}
148
149// Initialize all shrink wrapping data.
150void PEI::initShrinkWrappingInfo() {
151  clearAllSets();
152  EntryBlock = 0;
153#ifndef NDEBUG
154  HasFastExitPath = false;
155#endif
156  ShrinkWrapThisFunction = ShrinkWrapping;
157  // DEBUG: enable or disable shrink wrapping for the current function
158  // via --shrink-wrap-func=<funcname>.
159#ifndef NDEBUG
160  if (ShrinkWrapFunc != "") {
161    std::string MFName = MF->getName().str();
162    ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc);
163  }
164#endif
165}
166
167
168/// placeCSRSpillsAndRestores - determine which MBBs of the function
169/// need save, restore code for callee-saved registers by doing a DF analysis
170/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs
171/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo
172/// is used to ensure that CSR save/restore code is not placed inside loops.
173/// This function computes the maps of MBBs -> CSRs to spill and restore
174/// in CSRSave, CSRRestore.
175///
176/// If shrink wrapping is not being performed, place all spills in
177/// the entry block, all restores in return blocks. In this case,
178/// CSRSave has a single mapping, CSRRestore has mappings for each
179/// return block.
180///
181void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) {
182
183  DEBUG(MF = &Fn);
184
185  initShrinkWrappingInfo();
186
187  DEBUG(if (ShrinkWrapThisFunction) {
188      dbgs() << "Place CSR spills/restores for "
189             << MF->getName() << "\n";
190    });
191
192  if (calculateSets(Fn))
193    placeSpillsAndRestores(Fn);
194}
195
196/// calcAnticInOut - calculate the anticipated in/out reg sets
197/// for the given MBB by looking forward in the MCFG at MBB's
198/// successors.
199///
200bool PEI::calcAnticInOut(MachineBasicBlock* MBB) {
201  bool changed = false;
202
203  // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB))
204  SmallVector<MachineBasicBlock*, 4> successors;
205  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
206         SE = MBB->succ_end(); SI != SE; ++SI) {
207    MachineBasicBlock* SUCC = *SI;
208    if (SUCC != MBB)
209      successors.push_back(SUCC);
210  }
211
212  unsigned i = 0, e = successors.size();
213  if (i != e) {
214    CSRegSet prevAnticOut = AnticOut[MBB];
215    MachineBasicBlock* SUCC = successors[i];
216
217    AnticOut[MBB] = AnticIn[SUCC];
218    for (++i; i != e; ++i) {
219      SUCC = successors[i];
220      AnticOut[MBB] &= AnticIn[SUCC];
221    }
222    if (prevAnticOut != AnticOut[MBB])
223      changed = true;
224  }
225
226  // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]);
227  CSRegSet prevAnticIn = AnticIn[MBB];
228  AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB];
229  if (prevAnticIn != AnticIn[MBB])
230    changed = true;
231  return changed;
232}
233
234/// calcAvailInOut - calculate the available in/out reg sets
235/// for the given MBB by looking backward in the MCFG at MBB's
236/// predecessors.
237///
238bool PEI::calcAvailInOut(MachineBasicBlock* MBB) {
239  bool changed = false;
240
241  // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB))
242  SmallVector<MachineBasicBlock*, 4> predecessors;
243  for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
244         PE = MBB->pred_end(); PI != PE; ++PI) {
245    MachineBasicBlock* PRED = *PI;
246    if (PRED != MBB)
247      predecessors.push_back(PRED);
248  }
249
250  unsigned i = 0, e = predecessors.size();
251  if (i != e) {
252    CSRegSet prevAvailIn = AvailIn[MBB];
253    MachineBasicBlock* PRED = predecessors[i];
254
255    AvailIn[MBB] = AvailOut[PRED];
256    for (++i; i != e; ++i) {
257      PRED = predecessors[i];
258      AvailIn[MBB] &= AvailOut[PRED];
259    }
260    if (prevAvailIn != AvailIn[MBB])
261      changed = true;
262  }
263
264  // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]);
265  CSRegSet prevAvailOut = AvailOut[MBB];
266  AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB];
267  if (prevAvailOut != AvailOut[MBB])
268    changed = true;
269  return changed;
270}
271
272/// calculateAnticAvail - build the sets anticipated and available
273/// registers in the MCFG of the current function iteratively,
274/// doing a combined forward and backward analysis.
275///
276void PEI::calculateAnticAvail(MachineFunction &Fn) {
277  // Initialize data flow sets.
278  clearAnticAvailSets();
279
280  // Calculate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG.
281  bool changed = true;
282  unsigned iterations = 0;
283  while (changed) {
284    changed = false;
285    ++iterations;
286    for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
287         MBBI != MBBE; ++MBBI) {
288      MachineBasicBlock* MBB = MBBI;
289
290      // Calculate anticipated in, out regs at MBB from
291      // anticipated at successors of MBB.
292      changed |= calcAnticInOut(MBB);
293
294      // Calculate available in, out regs at MBB from
295      // available at predecessors of MBB.
296      changed |= calcAvailInOut(MBB);
297    }
298  }
299
300  DEBUG({
301      if (ShrinkWrapDebugging >= Details) {
302        dbgs()
303          << "-----------------------------------------------------------\n"
304          << " Antic/Avail Sets:\n"
305          << "-----------------------------------------------------------\n"
306          << "iterations = " << iterations << "\n"
307          << "-----------------------------------------------------------\n"
308          << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n"
309          << "-----------------------------------------------------------\n";
310
311        for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
312             MBBI != MBBE; ++MBBI) {
313          MachineBasicBlock* MBB = MBBI;
314          dumpSets(MBB);
315        }
316
317        dbgs()
318          << "-----------------------------------------------------------\n";
319      }
320    });
321}
322
323/// propagateUsesAroundLoop - copy used register info from MBB to all blocks
324/// of the loop given by LP and its parent loops. This prevents spills/restores
325/// from being placed in the bodies of loops.
326///
327void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) {
328  if (! MBB || !LP)
329    return;
330
331  std::vector<MachineBasicBlock*> loopBlocks = LP->getBlocks();
332  for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) {
333    MachineBasicBlock* LBB = loopBlocks[i];
334    if (LBB == MBB)
335      continue;
336    if (CSRUsed[LBB].contains(CSRUsed[MBB]))
337      continue;
338    CSRUsed[LBB] |= CSRUsed[MBB];
339  }
340}
341
342/// calculateSets - collect the CSRs used in this function, compute
343/// the DF sets that describe the initial minimal regions in the
344/// Machine CFG around which CSR spills and restores must be placed.
345///
346/// Additionally, this function decides if shrink wrapping should
347/// be disabled for the current function, checking the following:
348///  1. the current function has more than 500 MBBs: heuristic limit
349///     on function size to reduce compile time impact of the current
350///     iterative algorithm.
351///  2. all CSRs are used in the entry block.
352///  3. all CSRs are used in all immediate successors of the entry block.
353///  4. all CSRs are used in a subset of blocks, each of which dominates
354///     all return blocks. These blocks, taken as a subgraph of the MCFG,
355///     are equivalent to the entry block since all execution paths pass
356///     through them.
357///
358bool PEI::calculateSets(MachineFunction &Fn) {
359  // Sets used to compute spill, restore placement sets.
360  const std::vector<CalleeSavedInfo> CSI =
361    Fn.getFrameInfo()->getCalleeSavedInfo();
362
363  // If no CSRs used, we are done.
364  if (CSI.empty()) {
365    DEBUG(if (ShrinkWrapThisFunction)
366            dbgs() << "DISABLED: " << Fn.getName()
367                   << ": uses no callee-saved registers\n");
368    return false;
369  }
370
371  // Save refs to entry and return blocks.
372  EntryBlock = Fn.begin();
373  for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end();
374       MBB != E; ++MBB)
375    if (isReturnBlock(MBB))
376      ReturnBlocks.push_back(MBB);
377
378  // Determine if this function has fast exit paths.
379  DEBUG(if (ShrinkWrapThisFunction)
380          findFastExitPath());
381
382  // Limit shrink wrapping via the current iterative bit vector
383  // implementation to functions with <= 500 MBBs.
384  if (Fn.size() > 500) {
385    DEBUG(if (ShrinkWrapThisFunction)
386            dbgs() << "DISABLED: " << Fn.getName()
387                   << ": too large (" << Fn.size() << " MBBs)\n");
388    ShrinkWrapThisFunction = false;
389  }
390
391  // Return now if not shrink wrapping.
392  if (! ShrinkWrapThisFunction)
393    return false;
394
395  // Collect set of used CSRs.
396  for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
397    UsedCSRegs.set(inx);
398  }
399
400  // Walk instructions in all MBBs, create CSRUsed[] sets, choose
401  // whether or not to shrink wrap this function.
402  MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
403  MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
404  const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
405
406  bool allCSRUsesInEntryBlock = true;
407  for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
408       MBBI != MBBE; ++MBBI) {
409    MachineBasicBlock* MBB = MBBI;
410    for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) {
411      for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
412        unsigned Reg = CSI[inx].getReg();
413        // If instruction I reads or modifies Reg, add it to UsedCSRegs,
414        // CSRUsed map for the current block.
415        for (unsigned opInx = 0, opEnd = I->getNumOperands();
416             opInx != opEnd; ++opInx) {
417          const MachineOperand &MO = I->getOperand(opInx);
418          if (! (MO.isReg() && (MO.isUse() || MO.isDef())))
419            continue;
420          unsigned MOReg = MO.getReg();
421          if (!MOReg)
422            continue;
423          if (MOReg == Reg ||
424              (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
425               TargetRegisterInfo::isPhysicalRegister(Reg) &&
426               TRI->isSubRegister(Reg, MOReg))) {
427            // CSR Reg is defined/used in block MBB.
428            CSRUsed[MBB].set(inx);
429            // Check for uses in EntryBlock.
430            if (MBB != EntryBlock)
431              allCSRUsesInEntryBlock = false;
432          }
433        }
434      }
435    }
436
437    if (CSRUsed[MBB].empty())
438      continue;
439
440    // Propagate CSRUsed[MBB] in loops
441    if (MachineLoop* LP = LI.getLoopFor(MBB)) {
442      // Add top level loop to work list.
443      MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP);
444      MachineLoop* PLP = getTopLevelLoopParent(LP);
445
446      if (! HDR) {
447        HDR = PLP->getHeader();
448        assert(HDR->pred_size() > 0 && "Loop header has no predecessors?");
449        MachineBasicBlock::pred_iterator PI = HDR->pred_begin();
450        HDR = *PI;
451      }
452      TLLoops[HDR] = PLP;
453
454      // Push uses from inside loop to its parent loops,
455      // or to all other MBBs in its loop.
456      if (LP->getLoopDepth() > 1) {
457        for (MachineLoop* PLP = LP->getParentLoop(); PLP;
458             PLP = PLP->getParentLoop()) {
459          propagateUsesAroundLoop(MBB, PLP);
460        }
461      } else {
462        propagateUsesAroundLoop(MBB, LP);
463      }
464    }
465  }
466
467  if (allCSRUsesInEntryBlock) {
468    DEBUG(dbgs() << "DISABLED: " << Fn.getName()
469                 << ": all CSRs used in EntryBlock\n");
470    ShrinkWrapThisFunction = false;
471  } else {
472    bool allCSRsUsedInEntryFanout = true;
473    for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
474           SE = EntryBlock->succ_end(); SI != SE; ++SI) {
475      MachineBasicBlock* SUCC = *SI;
476      if (CSRUsed[SUCC] != UsedCSRegs)
477        allCSRsUsedInEntryFanout = false;
478    }
479    if (allCSRsUsedInEntryFanout) {
480      DEBUG(dbgs() << "DISABLED: " << Fn.getName()
481                   << ": all CSRs used in imm successors of EntryBlock\n");
482      ShrinkWrapThisFunction = false;
483    }
484  }
485
486  if (ShrinkWrapThisFunction) {
487    // Check if MBB uses CSRs and dominates all exit nodes.
488    // Such nodes are equiv. to the entry node w.r.t.
489    // CSR uses: every path through the function must
490    // pass through this node. If each CSR is used at least
491    // once by these nodes, shrink wrapping is disabled.
492    CSRegSet CSRUsedInChokePoints;
493    for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
494         MBBI != MBBE; ++MBBI) {
495      MachineBasicBlock* MBB = MBBI;
496      if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1)
497        continue;
498      bool dominatesExitNodes = true;
499      for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
500        if (! DT.dominates(MBB, ReturnBlocks[ri])) {
501          dominatesExitNodes = false;
502          break;
503        }
504      if (dominatesExitNodes) {
505        CSRUsedInChokePoints |= CSRUsed[MBB];
506        if (CSRUsedInChokePoints == UsedCSRegs) {
507          DEBUG(dbgs() << "DISABLED: " << Fn.getName()
508                       << ": all CSRs used in choke point(s) at "
509                       << getBasicBlockName(MBB) << "\n");
510          ShrinkWrapThisFunction = false;
511          break;
512        }
513      }
514    }
515  }
516
517  // Return now if we have decided not to apply shrink wrapping
518  // to the current function.
519  if (! ShrinkWrapThisFunction)
520    return false;
521
522  DEBUG({
523      dbgs() << "ENABLED: " << Fn.getName();
524      if (HasFastExitPath)
525        dbgs() << " (fast exit path)";
526      dbgs() << "\n";
527      if (ShrinkWrapDebugging >= BasicInfo) {
528        dbgs() << "------------------------------"
529             << "-----------------------------\n";
530        dbgs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n";
531        if (ShrinkWrapDebugging >= Details) {
532          dbgs() << "------------------------------"
533               << "-----------------------------\n";
534          dumpAllUsed();
535        }
536      }
537    });
538
539  // Build initial DF sets to determine minimal regions in the
540  // Machine CFG around which CSRs must be spilled and restored.
541  calculateAnticAvail(Fn);
542
543  return true;
544}
545
546/// addUsesForMEMERegion - add uses of CSRs spilled or restored in
547/// multi-entry, multi-exit (MEME) regions so spill and restore
548/// placement will not break code that enters or leaves a
549/// shrink-wrapped region by inducing spills with no matching
550/// restores or restores with no matching spills. A MEME region
551/// is a subgraph of the MCFG with multiple entry edges, multiple
552/// exit edges, or both. This code propagates use information
553/// through the MCFG until all paths requiring spills and restores
554/// _outside_ the computed minimal placement regions have been covered.
555///
556bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB,
557                               SmallVectorImpl<MachineBasicBlock *> &blks) {
558  if (MBB->succ_size() < 2 && MBB->pred_size() < 2) {
559    bool processThisBlock = false;
560    for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
561           SE = MBB->succ_end(); SI != SE; ++SI) {
562      MachineBasicBlock* SUCC = *SI;
563      if (SUCC->pred_size() > 1) {
564        processThisBlock = true;
565        break;
566      }
567    }
568    if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) {
569      for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
570             PE = MBB->pred_end(); PI != PE; ++PI) {
571        MachineBasicBlock* PRED = *PI;
572        if (PRED->succ_size() > 1) {
573          processThisBlock = true;
574          break;
575        }
576      }
577    }
578    if (! processThisBlock)
579      return false;
580  }
581
582  CSRegSet prop;
583  if (!CSRSave[MBB].empty())
584    prop = CSRSave[MBB];
585  else if (!CSRRestore[MBB].empty())
586    prop = CSRRestore[MBB];
587  else
588    prop = CSRUsed[MBB];
589  if (prop.empty())
590    return false;
591
592  // Propagate selected bits to successors, predecessors of MBB.
593  bool addedUses = false;
594  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
595         SE = MBB->succ_end(); SI != SE; ++SI) {
596    MachineBasicBlock* SUCC = *SI;
597    // Self-loop
598    if (SUCC == MBB)
599      continue;
600    if (! CSRUsed[SUCC].contains(prop)) {
601      CSRUsed[SUCC] |= prop;
602      addedUses = true;
603      blks.push_back(SUCC);
604      DEBUG(if (ShrinkWrapDebugging >= Iterations)
605              dbgs() << getBasicBlockName(MBB)
606                   << "(" << stringifyCSRegSet(prop) << ")->"
607                   << "successor " << getBasicBlockName(SUCC) << "\n");
608    }
609  }
610  for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
611         PE = MBB->pred_end(); PI != PE; ++PI) {
612    MachineBasicBlock* PRED = *PI;
613    // Self-loop
614    if (PRED == MBB)
615      continue;
616    if (! CSRUsed[PRED].contains(prop)) {
617      CSRUsed[PRED] |= prop;
618      addedUses = true;
619      blks.push_back(PRED);
620      DEBUG(if (ShrinkWrapDebugging >= Iterations)
621              dbgs() << getBasicBlockName(MBB)
622                   << "(" << stringifyCSRegSet(prop) << ")->"
623                   << "predecessor " << getBasicBlockName(PRED) << "\n");
624    }
625  }
626  return addedUses;
627}
628
629/// addUsesForTopLevelLoops - add uses for CSRs used inside top
630/// level loops to the exit blocks of those loops.
631///
632bool PEI::addUsesForTopLevelLoops(SmallVectorImpl<MachineBasicBlock *> &blks) {
633  bool addedUses = false;
634
635  // Place restores for top level loops where needed.
636  for (DenseMap<MachineBasicBlock*, MachineLoop*>::iterator
637         I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) {
638    MachineBasicBlock* MBB = I->first;
639    MachineLoop* LP = I->second;
640    MachineBasicBlock* HDR = LP->getHeader();
641    SmallVector<MachineBasicBlock*, 4> exitBlocks;
642    CSRegSet loopSpills;
643
644    loopSpills = CSRSave[MBB];
645    if (CSRSave[MBB].empty()) {
646      loopSpills = CSRUsed[HDR];
647      assert(!loopSpills.empty() && "No CSRs used in loop?");
648    } else if (CSRRestore[MBB].contains(CSRSave[MBB]))
649      continue;
650
651    LP->getExitBlocks(exitBlocks);
652    assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?");
653    for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) {
654      MachineBasicBlock* EXB = exitBlocks[i];
655      if (! CSRUsed[EXB].contains(loopSpills)) {
656        CSRUsed[EXB] |= loopSpills;
657        addedUses = true;
658        DEBUG(if (ShrinkWrapDebugging >= Iterations)
659                dbgs() << "LOOP " << getBasicBlockName(MBB)
660                     << "(" << stringifyCSRegSet(loopSpills) << ")->"
661                     << getBasicBlockName(EXB) << "\n");
662        if (EXB->succ_size() > 1 || EXB->pred_size() > 1)
663          blks.push_back(EXB);
664      }
665    }
666  }
667  return addedUses;
668}
669
670/// calcSpillPlacements - determine which CSRs should be spilled
671/// in MBB using AnticIn sets of MBB's predecessors, keeping track
672/// of changes to spilled reg sets. Add MBB to the set of blocks
673/// that need to be processed for propagating use info to cover
674/// multi-entry/exit regions.
675///
676bool PEI::calcSpillPlacements(MachineBasicBlock* MBB,
677                              SmallVectorImpl<MachineBasicBlock *> &blks,
678                              CSRegBlockMap &prevSpills) {
679  bool placedSpills = false;
680  // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB)
681  CSRegSet anticInPreds;
682  SmallVector<MachineBasicBlock*, 4> predecessors;
683  for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
684         PE = MBB->pred_end(); PI != PE; ++PI) {
685    MachineBasicBlock* PRED = *PI;
686    if (PRED != MBB)
687      predecessors.push_back(PRED);
688  }
689  unsigned i = 0, e = predecessors.size();
690  if (i != e) {
691    MachineBasicBlock* PRED = predecessors[i];
692    anticInPreds = UsedCSRegs - AnticIn[PRED];
693    for (++i; i != e; ++i) {
694      PRED = predecessors[i];
695      anticInPreds &= (UsedCSRegs - AnticIn[PRED]);
696    }
697  } else {
698    // Handle uses in entry blocks (which have no predecessors).
699    // This is necessary because the DFA formulation assumes the
700    // entry and (multiple) exit nodes cannot have CSR uses, which
701    // is not the case in the real world.
702    anticInPreds = UsedCSRegs;
703  }
704  // Compute spills required at MBB:
705  CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds;
706
707  if (! CSRSave[MBB].empty()) {
708    if (MBB == EntryBlock) {
709      for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
710        CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB];
711    } else {
712      // Reset all regs spilled in MBB that are also spilled in EntryBlock.
713      if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) {
714        CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock];
715      }
716    }
717  }
718  placedSpills = (CSRSave[MBB] != prevSpills[MBB]);
719  prevSpills[MBB] = CSRSave[MBB];
720  // Remember this block for adding restores to successor
721  // blocks for multi-entry region.
722  if (placedSpills)
723    blks.push_back(MBB);
724
725  DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations)
726          dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
727               << stringifyCSRegSet(CSRSave[MBB]) << "\n");
728
729  return placedSpills;
730}
731
732/// calcRestorePlacements - determine which CSRs should be restored
733/// in MBB using AvailOut sets of MBB's succcessors, keeping track
734/// of changes to restored reg sets. Add MBB to the set of blocks
735/// that need to be processed for propagating use info to cover
736/// multi-entry/exit regions.
737///
738bool PEI::calcRestorePlacements(MachineBasicBlock* MBB,
739                                SmallVectorImpl<MachineBasicBlock *> &blks,
740                                CSRegBlockMap &prevRestores) {
741  bool placedRestores = false;
742  // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB)
743  CSRegSet availOutSucc;
744  SmallVector<MachineBasicBlock*, 4> successors;
745  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
746         SE = MBB->succ_end(); SI != SE; ++SI) {
747    MachineBasicBlock* SUCC = *SI;
748    if (SUCC != MBB)
749      successors.push_back(SUCC);
750  }
751  unsigned i = 0, e = successors.size();
752  if (i != e) {
753    MachineBasicBlock* SUCC = successors[i];
754    availOutSucc = UsedCSRegs - AvailOut[SUCC];
755    for (++i; i != e; ++i) {
756      SUCC = successors[i];
757      availOutSucc &= (UsedCSRegs - AvailOut[SUCC]);
758    }
759  } else {
760    if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) {
761      // Handle uses in return blocks (which have no successors).
762      // This is necessary because the DFA formulation assumes the
763      // entry and (multiple) exit nodes cannot have CSR uses, which
764      // is not the case in the real world.
765      availOutSucc = UsedCSRegs;
766    }
767  }
768  // Compute restores required at MBB:
769  CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc;
770
771  // Postprocess restore placements at MBB.
772  // Remove the CSRs that are restored in the return blocks.
773  // Lest this be confusing, note that:
774  // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks.
775  if (MBB->succ_size() && ! CSRRestore[MBB].empty()) {
776    if (! CSRSave[EntryBlock].empty())
777      CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock];
778  }
779  placedRestores = (CSRRestore[MBB] != prevRestores[MBB]);
780  prevRestores[MBB] = CSRRestore[MBB];
781  // Remember this block for adding saves to predecessor
782  // blocks for multi-entry region.
783  if (placedRestores)
784    blks.push_back(MBB);
785
786  DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations)
787          dbgs() << "RESTORE[" << getBasicBlockName(MBB) << "] = "
788               << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
789
790  return placedRestores;
791}
792
793/// placeSpillsAndRestores - place spills and restores of CSRs
794/// used in MBBs in minimal regions that contain the uses.
795///
796void PEI::placeSpillsAndRestores(MachineFunction &Fn) {
797  CSRegBlockMap prevCSRSave;
798  CSRegBlockMap prevCSRRestore;
799  SmallVector<MachineBasicBlock*, 4> cvBlocks, ncvBlocks;
800  bool changed = true;
801  unsigned iterations = 0;
802
803  // Iterate computation of spill and restore placements in the MCFG until:
804  //   1. CSR use info has been fully propagated around the MCFG, and
805  //   2. computation of CSRSave[], CSRRestore[] reach fixed points.
806  while (changed) {
807    changed = false;
808    ++iterations;
809
810    DEBUG(if (ShrinkWrapDebugging >= Iterations)
811            dbgs() << "iter " << iterations
812                 << " --------------------------------------------------\n");
813
814    // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG,
815    // which determines the placements of spills and restores.
816    // Keep track of changes to spills, restores in each iteration to
817    // minimize the total iterations.
818    bool SRChanged = false;
819    for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
820         MBBI != MBBE; ++MBBI) {
821      MachineBasicBlock* MBB = MBBI;
822
823      // Place spills for CSRs in MBB.
824      SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave);
825
826      // Place restores for CSRs in MBB.
827      SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore);
828    }
829
830    // Add uses of CSRs used inside loops where needed.
831    changed |= addUsesForTopLevelLoops(cvBlocks);
832
833    // Add uses for CSRs spilled or restored at branch, join points.
834    if (changed || SRChanged) {
835      while (! cvBlocks.empty()) {
836        MachineBasicBlock* MBB = cvBlocks.pop_back_val();
837        changed |= addUsesForMEMERegion(MBB, ncvBlocks);
838      }
839      if (! ncvBlocks.empty()) {
840        cvBlocks = ncvBlocks;
841        ncvBlocks.clear();
842      }
843    }
844
845    if (changed) {
846      calculateAnticAvail(Fn);
847      CSRSave.clear();
848      CSRRestore.clear();
849    }
850  }
851
852  // Check for effectiveness:
853  //  SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks}
854  //  numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock]
855  // Gives a measure of how many CSR spills have been moved from EntryBlock
856  // to minimal regions enclosing their uses.
857  CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]);
858  unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count();
859  numSRReduced += numSRReducedThisFunc;
860  DEBUG(if (ShrinkWrapDebugging >= BasicInfo) {
861      dbgs() << "-----------------------------------------------------------\n";
862      dbgs() << "total iterations = " << iterations << " ( "
863           << Fn.getName()
864           << " " << numSRReducedThisFunc
865           << " " << Fn.size()
866           << " )\n";
867      dbgs() << "-----------------------------------------------------------\n";
868      dumpSRSets();
869      dbgs() << "-----------------------------------------------------------\n";
870      if (numSRReducedThisFunc)
871        verifySpillRestorePlacement();
872    });
873}
874
875// Debugging methods.
876#ifndef NDEBUG
877/// findFastExitPath - debugging method used to detect functions
878/// with at least one path from the entry block to a return block
879/// directly or which has a very small number of edges.
880///
881void PEI::findFastExitPath() {
882  if (! EntryBlock)
883    return;
884  // Fina a path from EntryBlock to any return block that does not branch:
885  //        Entry
886  //          |     ...
887  //          v      |
888  //         B1<-----+
889  //          |
890  //          v
891  //       Return
892  for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
893         SE = EntryBlock->succ_end(); SI != SE; ++SI) {
894    MachineBasicBlock* SUCC = *SI;
895
896    // Assume positive, disprove existence of fast path.
897    HasFastExitPath = true;
898
899    // Check the immediate successors.
900    if (isReturnBlock(SUCC)) {
901      if (ShrinkWrapDebugging >= BasicInfo)
902        dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
903             << "->" << getBasicBlockName(SUCC) << "\n";
904      break;
905    }
906    // Traverse df from SUCC, look for a branch block.
907    std::string exitPath = getBasicBlockName(SUCC);
908    for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC),
909           BE = df_end(SUCC); BI != BE; ++BI) {
910      MachineBasicBlock* SBB = *BI;
911      // Reject paths with branch nodes.
912      if (SBB->succ_size() > 1) {
913        HasFastExitPath = false;
914        break;
915      }
916      exitPath += "->" + getBasicBlockName(SBB);
917    }
918    if (HasFastExitPath) {
919      if (ShrinkWrapDebugging >= BasicInfo)
920        dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
921             << "->" << exitPath << "\n";
922      break;
923    }
924  }
925}
926
927/// verifySpillRestorePlacement - check the current spill/restore
928/// sets for safety. Attempt to find spills without restores or
929/// restores without spills.
930/// Spills: walk df from each MBB in spill set ensuring that
931///         all CSRs spilled at MMBB are restored on all paths
932///         from MBB to all exit blocks.
933/// Restores: walk idf from each MBB in restore set ensuring that
934///           all CSRs restored at MBB are spilled on all paths
935///           reaching MBB.
936///
937void PEI::verifySpillRestorePlacement() {
938  unsigned numReturnBlocks = 0;
939  for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
940       MBBI != MBBE; ++MBBI) {
941    MachineBasicBlock* MBB = MBBI;
942    if (isReturnBlock(MBB) || MBB->succ_size() == 0)
943      ++numReturnBlocks;
944  }
945  for (CSRegBlockMap::iterator BI = CSRSave.begin(),
946         BE = CSRSave.end(); BI != BE; ++BI) {
947    MachineBasicBlock* MBB = BI->first;
948    CSRegSet spilled = BI->second;
949    CSRegSet restored;
950
951    if (spilled.empty())
952      continue;
953
954    DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
955                 << stringifyCSRegSet(spilled)
956                 << "  RESTORE[" << getBasicBlockName(MBB) << "] = "
957                 << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
958
959    if (CSRRestore[MBB].intersects(spilled)) {
960      restored |= (CSRRestore[MBB] & spilled);
961    }
962
963    // Walk depth first from MBB to find restores of all CSRs spilled at MBB:
964    // we must find restores for all spills w/no intervening spills on all
965    // paths from MBB to all return blocks.
966    for (df_iterator<MachineBasicBlock*> BI = df_begin(MBB),
967           BE = df_end(MBB); BI != BE; ++BI) {
968      MachineBasicBlock* SBB = *BI;
969      if (SBB == MBB)
970        continue;
971      // Stop when we encounter spills of any CSRs spilled at MBB that
972      // have not yet been seen to be restored.
973      if (CSRSave[SBB].intersects(spilled) &&
974          !restored.contains(CSRSave[SBB] & spilled))
975        break;
976      // Collect the CSRs spilled at MBB that are restored
977      // at this DF successor of MBB.
978      if (CSRRestore[SBB].intersects(spilled))
979        restored |= (CSRRestore[SBB] & spilled);
980      // If we are at a retun block, check that the restores
981      // we have seen so far exhaust the spills at MBB, then
982      // reset the restores.
983      if (isReturnBlock(SBB) || SBB->succ_size() == 0) {
984        if (restored != spilled) {
985          CSRegSet notRestored = (spilled - restored);
986          DEBUG(dbgs() << MF->getName() << ": "
987                       << stringifyCSRegSet(notRestored)
988                       << " spilled at " << getBasicBlockName(MBB)
989                       << " are never restored on path to return "
990                       << getBasicBlockName(SBB) << "\n");
991        }
992        restored.clear();
993      }
994    }
995  }
996
997  // Check restore placements.
998  for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
999         BE = CSRRestore.end(); BI != BE; ++BI) {
1000    MachineBasicBlock* MBB = BI->first;
1001    CSRegSet restored = BI->second;
1002    CSRegSet spilled;
1003
1004    if (restored.empty())
1005      continue;
1006
1007    DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
1008                 << stringifyCSRegSet(CSRSave[MBB])
1009                 << "  RESTORE[" << getBasicBlockName(MBB) << "] = "
1010                 << stringifyCSRegSet(restored) << "\n");
1011
1012    if (CSRSave[MBB].intersects(restored)) {
1013      spilled |= (CSRSave[MBB] & restored);
1014    }
1015    // Walk inverse depth first from MBB to find spills of all
1016    // CSRs restored at MBB:
1017    for (idf_iterator<MachineBasicBlock*> BI = idf_begin(MBB),
1018           BE = idf_end(MBB); BI != BE; ++BI) {
1019      MachineBasicBlock* PBB = *BI;
1020      if (PBB == MBB)
1021        continue;
1022      // Stop when we encounter restores of any CSRs restored at MBB that
1023      // have not yet been seen to be spilled.
1024      if (CSRRestore[PBB].intersects(restored) &&
1025          !spilled.contains(CSRRestore[PBB] & restored))
1026        break;
1027      // Collect the CSRs restored at MBB that are spilled
1028      // at this DF predecessor of MBB.
1029      if (CSRSave[PBB].intersects(restored))
1030        spilled |= (CSRSave[PBB] & restored);
1031    }
1032    if (spilled != restored) {
1033      CSRegSet notSpilled = (restored - spilled);
1034      DEBUG(dbgs() << MF->getName() << ": "
1035                   << stringifyCSRegSet(notSpilled)
1036                   << " restored at " << getBasicBlockName(MBB)
1037                   << " are never spilled\n");
1038    }
1039  }
1040}
1041
1042// Debugging print methods.
1043std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) {
1044  if (!MBB)
1045    return "";
1046
1047  if (MBB->getBasicBlock())
1048    return MBB->getBasicBlock()->getName().str();
1049
1050  std::ostringstream name;
1051  name << "_MBB_" << MBB->getNumber();
1052  return name.str();
1053}
1054
1055std::string PEI::stringifyCSRegSet(const CSRegSet& s) {
1056  const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
1057  const std::vector<CalleeSavedInfo> CSI =
1058    MF->getFrameInfo()->getCalleeSavedInfo();
1059
1060  std::ostringstream srep;
1061  if (CSI.size() == 0) {
1062    srep << "[]";
1063    return srep.str();
1064  }
1065  srep << "[";
1066  CSRegSet::iterator I = s.begin(), E = s.end();
1067  if (I != E) {
1068    unsigned reg = CSI[*I].getReg();
1069    srep << TRI->getName(reg);
1070    for (++I; I != E; ++I) {
1071      reg = CSI[*I].getReg();
1072      srep << ",";
1073      srep << TRI->getName(reg);
1074    }
1075  }
1076  srep << "]";
1077  return srep.str();
1078}
1079
1080void PEI::dumpSet(const CSRegSet& s) {
1081  DEBUG(dbgs() << stringifyCSRegSet(s) << "\n");
1082}
1083
1084void PEI::dumpUsed(MachineBasicBlock* MBB) {
1085  DEBUG({
1086      if (MBB)
1087        dbgs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = "
1088               << stringifyCSRegSet(CSRUsed[MBB])  << "\n";
1089    });
1090}
1091
1092void PEI::dumpAllUsed() {
1093    for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
1094         MBBI != MBBE; ++MBBI) {
1095      MachineBasicBlock* MBB = MBBI;
1096      dumpUsed(MBB);
1097    }
1098}
1099
1100void PEI::dumpSets(MachineBasicBlock* MBB) {
1101  DEBUG({
1102      if (MBB)
1103        dbgs() << getBasicBlockName(MBB)           << " | "
1104               << stringifyCSRegSet(CSRUsed[MBB])  << " | "
1105               << stringifyCSRegSet(AnticIn[MBB])  << " | "
1106               << stringifyCSRegSet(AnticOut[MBB]) << " | "
1107               << stringifyCSRegSet(AvailIn[MBB])  << " | "
1108               << stringifyCSRegSet(AvailOut[MBB]) << "\n";
1109    });
1110}
1111
1112void PEI::dumpSets1(MachineBasicBlock* MBB) {
1113  DEBUG({
1114      if (MBB)
1115        dbgs() << getBasicBlockName(MBB)             << " | "
1116               << stringifyCSRegSet(CSRUsed[MBB])    << " | "
1117               << stringifyCSRegSet(AnticIn[MBB])    << " | "
1118               << stringifyCSRegSet(AnticOut[MBB])   << " | "
1119               << stringifyCSRegSet(AvailIn[MBB])    << " | "
1120               << stringifyCSRegSet(AvailOut[MBB])   << " | "
1121               << stringifyCSRegSet(CSRSave[MBB])    << " | "
1122               << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
1123    });
1124}
1125
1126void PEI::dumpAllSets() {
1127    for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
1128         MBBI != MBBE; ++MBBI) {
1129      MachineBasicBlock* MBB = MBBI;
1130      dumpSets1(MBB);
1131    }
1132}
1133
1134void PEI::dumpSRSets() {
1135  DEBUG({
1136      for (MachineFunction::iterator MBB = MF->begin(), E = MF->end();
1137           MBB != E; ++MBB) {
1138        if (!CSRSave[MBB].empty()) {
1139          dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
1140                 << stringifyCSRegSet(CSRSave[MBB]);
1141          if (CSRRestore[MBB].empty())
1142            dbgs() << '\n';
1143        }
1144
1145        if (!CSRRestore[MBB].empty() && !CSRSave[MBB].empty())
1146          dbgs() << "    "
1147                 << "RESTORE[" << getBasicBlockName(MBB) << "] = "
1148                 << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
1149      }
1150    });
1151}
1152#endif
1153