MachineLICM.cpp revision 03a9fdf2e755c1cebdb8371d79b591d46daa9463
1//===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass performs loop invariant code motion on machine instructions. We
11// attempt to remove as much code from the body of a loop as possible.
12//
13// This pass does not attempt to throttle itself to limit register pressure.
14// The register allocation phases are expected to perform rematerialization
15// to recover when register pressure is high.
16//
17// This pass is not intended to be a replacement or a complete alternative
18// for the LLVM-IR-level LICM pass. It is only designed to hoist simple
19// constructs that are not exposed before lowering and instruction selection.
20//
21//===----------------------------------------------------------------------===//
22
23#define DEBUG_TYPE "machine-licm"
24#include "llvm/CodeGen/Passes.h"
25#include "llvm/CodeGen/MachineDominators.h"
26#include "llvm/CodeGen/MachineFrameInfo.h"
27#include "llvm/CodeGen/MachineLoopInfo.h"
28#include "llvm/CodeGen/MachineMemOperand.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/CodeGen/PseudoSourceValue.h"
31#include "llvm/Target/TargetLowering.h"
32#include "llvm/Target/TargetRegisterInfo.h"
33#include "llvm/Target/TargetInstrInfo.h"
34#include "llvm/Target/TargetInstrItineraries.h"
35#include "llvm/Target/TargetMachine.h"
36#include "llvm/Analysis/AliasAnalysis.h"
37#include "llvm/ADT/DenseMap.h"
38#include "llvm/ADT/SmallSet.h"
39#include "llvm/ADT/Statistic.h"
40#include "llvm/Support/CommandLine.h"
41#include "llvm/Support/Debug.h"
42#include "llvm/Support/raw_ostream.h"
43
44using namespace llvm;
45
46static cl::opt<bool>
47TrackRegPressure("rp-aware-machine-licm",
48                 cl::desc("Register pressure aware machine LICM"),
49                 cl::init(false), cl::Hidden);
50
51STATISTIC(NumHoisted,
52          "Number of machine instructions hoisted out of loops");
53STATISTIC(NumLowRP,
54          "Number of instructions hoisted in low reg pressure situation");
55STATISTIC(NumHighLatency,
56          "Number of high latency instructions hoisted");
57STATISTIC(NumCSEed,
58          "Number of hoisted machine instructions CSEed");
59STATISTIC(NumPostRAHoisted,
60          "Number of machine instructions hoisted out of loops post regalloc");
61
62namespace {
63  class MachineLICM : public MachineFunctionPass {
64    bool PreRegAlloc;
65
66    const TargetMachine   *TM;
67    const TargetInstrInfo *TII;
68    const TargetLowering *TLI;
69    const TargetRegisterInfo *TRI;
70    const MachineFrameInfo *MFI;
71    MachineRegisterInfo *MRI;
72    const InstrItineraryData *InstrItins;
73
74    // Various analyses that we use...
75    AliasAnalysis        *AA;      // Alias analysis info.
76    MachineLoopInfo      *MLI;     // Current MachineLoopInfo
77    MachineDominatorTree *DT;      // Machine dominator tree for the cur loop
78
79    // State that is updated as we process loops
80    bool         Changed;          // True if a loop is changed.
81    bool         FirstInLoop;      // True if it's the first LICM in the loop.
82    MachineLoop *CurLoop;          // The current loop we are working on.
83    MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
84
85    BitVector AllocatableSet;
86
87    // Track 'estimated' register pressure.
88    SmallSet<unsigned, 32> RegSeen;
89    SmallVector<unsigned, 8> RegPressure;
90
91    // Register pressure "limit" per register class. If the pressure
92    // is higher than the limit, then it's considered high.
93    SmallVector<unsigned, 8> RegLimit;
94
95    // Register pressure on path leading from loop preheader to current BB.
96    SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
97
98    // For each opcode, keep a list of potential CSE instructions.
99    DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap;
100
101  public:
102    static char ID; // Pass identification, replacement for typeid
103    MachineLICM() :
104      MachineFunctionPass(ID), PreRegAlloc(true) {}
105
106    explicit MachineLICM(bool PreRA) :
107      MachineFunctionPass(ID), PreRegAlloc(PreRA) {}
108
109    virtual bool runOnMachineFunction(MachineFunction &MF);
110
111    const char *getPassName() const { return "Machine Instruction LICM"; }
112
113    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
114      AU.setPreservesCFG();
115      AU.addRequired<MachineLoopInfo>();
116      AU.addRequired<MachineDominatorTree>();
117      AU.addRequired<AliasAnalysis>();
118      AU.addPreserved<MachineLoopInfo>();
119      AU.addPreserved<MachineDominatorTree>();
120      MachineFunctionPass::getAnalysisUsage(AU);
121    }
122
123    virtual void releaseMemory() {
124      RegSeen.clear();
125      RegPressure.clear();
126      RegLimit.clear();
127      for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator
128             CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI)
129        CI->second.clear();
130      CSEMap.clear();
131    }
132
133  private:
134    /// CandidateInfo - Keep track of information about hoisting candidates.
135    struct CandidateInfo {
136      MachineInstr *MI;
137      unsigned      Def;
138      int           FI;
139      CandidateInfo(MachineInstr *mi, unsigned def, int fi)
140        : MI(mi), Def(def), FI(fi) {}
141    };
142
143    /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
144    /// invariants out to the preheader.
145    void HoistRegionPostRA();
146
147    /// HoistPostRA - When an instruction is found to only use loop invariant
148    /// operands that is safe to hoist, this instruction is called to do the
149    /// dirty work.
150    void HoistPostRA(MachineInstr *MI, unsigned Def);
151
152    /// ProcessMI - Examine the instruction for potentai LICM candidate. Also
153    /// gather register def and frame object update information.
154    void ProcessMI(MachineInstr *MI, unsigned *PhysRegDefs,
155                   SmallSet<int, 32> &StoredFIs,
156                   SmallVector<CandidateInfo, 32> &Candidates);
157
158    /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the
159    /// current loop.
160    void AddToLiveIns(unsigned Reg);
161
162    /// IsLICMCandidate - Returns true if the instruction may be a suitable
163    /// candidate for LICM. e.g. If the instruction is a call, then it's
164    /// obviously not safe to hoist it.
165    bool IsLICMCandidate(MachineInstr &I);
166
167    /// IsLoopInvariantInst - Returns true if the instruction is loop
168    /// invariant. I.e., all virtual register operands are defined outside of
169    /// the loop, physical registers aren't accessed (explicitly or implicitly),
170    /// and the instruction is hoistable.
171    ///
172    bool IsLoopInvariantInst(MachineInstr &I);
173
174    /// ComputeOperandLatency - Compute operand latency between a def of 'Reg'
175    /// and an use in the current loop.
176    int ComputeOperandLatency(MachineInstr &MI, unsigned DefIdx, unsigned Reg);
177
178    /// IncreaseHighRegPressure - Visit BBs from preheader to current BB, check
179    /// if hoisting an instruction of the given cost matrix can cause high
180    /// register pressure.
181    bool IncreaseHighRegPressure(DenseMap<unsigned, int> &Cost);
182
183    /// IsProfitableToHoist - Return true if it is potentially profitable to
184    /// hoist the given loop invariant.
185    bool IsProfitableToHoist(MachineInstr &MI);
186
187    /// HoistRegion - Walk the specified region of the CFG (defined by all
188    /// blocks dominated by the specified block, and that are in the current
189    /// loop) in depth first order w.r.t the DominatorTree. This allows us to
190    /// visit definitions before uses, allowing us to hoist a loop body in one
191    /// pass without iteration.
192    ///
193    void HoistRegion(MachineDomTreeNode *N, bool IsHeader = false);
194
195    /// InitRegPressure - Find all virtual register references that are liveout
196    /// of the preheader to initialize the starting "register pressure". Note
197    /// this does not count live through (livein but not used) registers.
198    void InitRegPressure(MachineBasicBlock *BB);
199
200    /// UpdateRegPressureBefore / UpdateRegPressureAfter - Update estimate of
201    /// register pressure before and after executing a specifi instruction.
202    void UpdateRegPressureBefore(const MachineInstr *MI);
203    void UpdateRegPressureAfter(const MachineInstr *MI);
204
205    /// isLoadFromConstantMemory - Return true if the given instruction is a
206    /// load from constant memory.
207    bool isLoadFromConstantMemory(MachineInstr *MI);
208
209    /// ExtractHoistableLoad - Unfold a load from the given machineinstr if
210    /// the load itself could be hoisted. Return the unfolded and hoistable
211    /// load, or null if the load couldn't be unfolded or if it wouldn't
212    /// be hoistable.
213    MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
214
215    /// LookForDuplicate - Find an instruction amount PrevMIs that is a
216    /// duplicate of MI. Return this instruction if it's found.
217    const MachineInstr *LookForDuplicate(const MachineInstr *MI,
218                                     std::vector<const MachineInstr*> &PrevMIs);
219
220    /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on
221    /// the preheader that compute the same value. If it's found, do a RAU on
222    /// with the definition of the existing instruction rather than hoisting
223    /// the instruction to the preheader.
224    bool EliminateCSE(MachineInstr *MI,
225           DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI);
226
227    /// Hoist - When an instruction is found to only use loop invariant operands
228    /// that is safe to hoist, this instruction is called to do the dirty work.
229    ///
230    void Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
231
232    /// InitCSEMap - Initialize the CSE map with instructions that are in the
233    /// current loop preheader that may become duplicates of instructions that
234    /// are hoisted out of the loop.
235    void InitCSEMap(MachineBasicBlock *BB);
236
237    /// getCurPreheader - Get the preheader for the current loop, splitting
238    /// a critical edge if needed.
239    MachineBasicBlock *getCurPreheader();
240  };
241} // end anonymous namespace
242
243char MachineLICM::ID = 0;
244INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm",
245                "Machine Loop Invariant Code Motion", false, false)
246INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
247INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
248INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
249INITIALIZE_PASS_END(MachineLICM, "machinelicm",
250                "Machine Loop Invariant Code Motion", false, false)
251
252FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) {
253  return new MachineLICM(PreRegAlloc);
254}
255
256/// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most
257/// loop that has a unique predecessor.
258static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
259  // Check whether this loop even has a unique predecessor.
260  if (!CurLoop->getLoopPredecessor())
261    return false;
262  // Ok, now check to see if any of its outer loops do.
263  for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop())
264    if (L->getLoopPredecessor())
265      return false;
266  // None of them did, so this is the outermost with a unique predecessor.
267  return true;
268}
269
270bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
271  if (PreRegAlloc)
272    DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
273  else
274    DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
275  DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n");
276
277  Changed = FirstInLoop = false;
278  TM = &MF.getTarget();
279  TII = TM->getInstrInfo();
280  TLI = TM->getTargetLowering();
281  TRI = TM->getRegisterInfo();
282  MFI = MF.getFrameInfo();
283  MRI = &MF.getRegInfo();
284  InstrItins = TM->getInstrItineraryData();
285  AllocatableSet = TRI->getAllocatableSet(MF);
286
287  if (PreRegAlloc) {
288    // Estimate register pressure during pre-regalloc pass.
289    unsigned NumRC = TRI->getNumRegClasses();
290    RegPressure.resize(NumRC);
291    std::fill(RegPressure.begin(), RegPressure.end(), 0);
292    RegLimit.resize(NumRC);
293    for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
294           E = TRI->regclass_end(); I != E; ++I)
295      RegLimit[(*I)->getID()] = TLI->getRegPressureLimit(*I, MF);
296  }
297
298  // Get our Loop information...
299  MLI = &getAnalysis<MachineLoopInfo>();
300  DT  = &getAnalysis<MachineDominatorTree>();
301  AA  = &getAnalysis<AliasAnalysis>();
302
303  SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
304  while (!Worklist.empty()) {
305    CurLoop = Worklist.pop_back_val();
306    CurPreheader = 0;
307
308    // If this is done before regalloc, only visit outer-most preheader-sporting
309    // loops.
310    if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) {
311      Worklist.append(CurLoop->begin(), CurLoop->end());
312      continue;
313    }
314
315    if (!PreRegAlloc)
316      HoistRegionPostRA();
317    else {
318      // CSEMap is initialized for loop header when the first instruction is
319      // being hoisted.
320      MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
321      FirstInLoop = true;
322      HoistRegion(N, true);
323      CSEMap.clear();
324    }
325  }
326
327  return Changed;
328}
329
330/// InstructionStoresToFI - Return true if instruction stores to the
331/// specified frame.
332static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
333  for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
334         oe = MI->memoperands_end(); o != oe; ++o) {
335    if (!(*o)->isStore() || !(*o)->getValue())
336      continue;
337    if (const FixedStackPseudoSourceValue *Value =
338        dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
339      if (Value->getFrameIndex() == FI)
340        return true;
341    }
342  }
343  return false;
344}
345
346/// ProcessMI - Examine the instruction for potentai LICM candidate. Also
347/// gather register def and frame object update information.
348void MachineLICM::ProcessMI(MachineInstr *MI,
349                            unsigned *PhysRegDefs,
350                            SmallSet<int, 32> &StoredFIs,
351                            SmallVector<CandidateInfo, 32> &Candidates) {
352  bool RuledOut = false;
353  bool HasNonInvariantUse = false;
354  unsigned Def = 0;
355  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
356    const MachineOperand &MO = MI->getOperand(i);
357    if (MO.isFI()) {
358      // Remember if the instruction stores to the frame index.
359      int FI = MO.getIndex();
360      if (!StoredFIs.count(FI) &&
361          MFI->isSpillSlotObjectIndex(FI) &&
362          InstructionStoresToFI(MI, FI))
363        StoredFIs.insert(FI);
364      HasNonInvariantUse = true;
365      continue;
366    }
367
368    if (!MO.isReg())
369      continue;
370    unsigned Reg = MO.getReg();
371    if (!Reg)
372      continue;
373    assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
374           "Not expecting virtual register!");
375
376    if (!MO.isDef()) {
377      if (Reg && PhysRegDefs[Reg])
378        // If it's using a non-loop-invariant register, then it's obviously not
379        // safe to hoist.
380        HasNonInvariantUse = true;
381      continue;
382    }
383
384    if (MO.isImplicit()) {
385      ++PhysRegDefs[Reg];
386      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
387        ++PhysRegDefs[*AS];
388      if (!MO.isDead())
389        // Non-dead implicit def? This cannot be hoisted.
390        RuledOut = true;
391      // No need to check if a dead implicit def is also defined by
392      // another instruction.
393      continue;
394    }
395
396    // FIXME: For now, avoid instructions with multiple defs, unless
397    // it's a dead implicit def.
398    if (Def)
399      RuledOut = true;
400    else
401      Def = Reg;
402
403    // If we have already seen another instruction that defines the same
404    // register, then this is not safe.
405    if (++PhysRegDefs[Reg] > 1)
406      // MI defined register is seen defined by another instruction in
407      // the loop, it cannot be a LICM candidate.
408      RuledOut = true;
409    for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
410      if (++PhysRegDefs[*AS] > 1)
411        RuledOut = true;
412  }
413
414  // Only consider reloads for now and remats which do not have register
415  // operands. FIXME: Consider unfold load folding instructions.
416  if (Def && !RuledOut) {
417    int FI = INT_MIN;
418    if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) ||
419        (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
420      Candidates.push_back(CandidateInfo(MI, Def, FI));
421  }
422}
423
424/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
425/// invariants out to the preheader.
426void MachineLICM::HoistRegionPostRA() {
427  unsigned NumRegs = TRI->getNumRegs();
428  unsigned *PhysRegDefs = new unsigned[NumRegs];
429  std::fill(PhysRegDefs, PhysRegDefs + NumRegs, 0);
430
431  SmallVector<CandidateInfo, 32> Candidates;
432  SmallSet<int, 32> StoredFIs;
433
434  // Walk the entire region, count number of defs for each register, and
435  // collect potential LICM candidates.
436  const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks();
437  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
438    MachineBasicBlock *BB = Blocks[i];
439    // Conservatively treat live-in's as an external def.
440    // FIXME: That means a reload that're reused in successor block(s) will not
441    // be LICM'ed.
442    for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
443           E = BB->livein_end(); I != E; ++I) {
444      unsigned Reg = *I;
445      ++PhysRegDefs[Reg];
446      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
447        ++PhysRegDefs[*AS];
448    }
449
450    for (MachineBasicBlock::iterator
451           MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
452      MachineInstr *MI = &*MII;
453      ProcessMI(MI, PhysRegDefs, StoredFIs, Candidates);
454    }
455  }
456
457  // Now evaluate whether the potential candidates qualify.
458  // 1. Check if the candidate defined register is defined by another
459  //    instruction in the loop.
460  // 2. If the candidate is a load from stack slot (always true for now),
461  //    check if the slot is stored anywhere in the loop.
462  for (unsigned i = 0, e = Candidates.size(); i != e; ++i) {
463    if (Candidates[i].FI != INT_MIN &&
464        StoredFIs.count(Candidates[i].FI))
465      continue;
466
467    if (PhysRegDefs[Candidates[i].Def] == 1) {
468      bool Safe = true;
469      MachineInstr *MI = Candidates[i].MI;
470      for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
471        const MachineOperand &MO = MI->getOperand(j);
472        if (!MO.isReg() || MO.isDef() || !MO.getReg())
473          continue;
474        if (PhysRegDefs[MO.getReg()]) {
475          // If it's using a non-loop-invariant register, then it's obviously
476          // not safe to hoist.
477          Safe = false;
478          break;
479        }
480      }
481      if (Safe)
482        HoistPostRA(MI, Candidates[i].Def);
483    }
484  }
485
486  delete[] PhysRegDefs;
487}
488
489/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current
490/// loop, and make sure it is not killed by any instructions in the loop.
491void MachineLICM::AddToLiveIns(unsigned Reg) {
492  const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks();
493  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
494    MachineBasicBlock *BB = Blocks[i];
495    if (!BB->isLiveIn(Reg))
496      BB->addLiveIn(Reg);
497    for (MachineBasicBlock::iterator
498           MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
499      MachineInstr *MI = &*MII;
500      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
501        MachineOperand &MO = MI->getOperand(i);
502        if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue;
503        if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg()))
504          MO.setIsKill(false);
505      }
506    }
507  }
508}
509
510/// HoistPostRA - When an instruction is found to only use loop invariant
511/// operands that is safe to hoist, this instruction is called to do the
512/// dirty work.
513void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
514  MachineBasicBlock *Preheader = getCurPreheader();
515  if (!Preheader) return;
516
517  // Now move the instructions to the predecessor, inserting it before any
518  // terminator instructions.
519  DEBUG({
520      dbgs() << "Hoisting " << *MI;
521      if (Preheader->getBasicBlock())
522        dbgs() << " to MachineBasicBlock "
523               << Preheader->getName();
524      if (MI->getParent()->getBasicBlock())
525        dbgs() << " from MachineBasicBlock "
526               << MI->getParent()->getName();
527      dbgs() << "\n";
528    });
529
530  // Splice the instruction to the preheader.
531  MachineBasicBlock *MBB = MI->getParent();
532  Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
533
534  // Add register to livein list to all the BBs in the current loop since a
535  // loop invariant must be kept live throughout the whole loop. This is
536  // important to ensure later passes do not scavenge the def register.
537  AddToLiveIns(Def);
538
539  ++NumPostRAHoisted;
540  Changed = true;
541}
542
543/// HoistRegion - Walk the specified region of the CFG (defined by all blocks
544/// dominated by the specified block, and that are in the current loop) in depth
545/// first order w.r.t the DominatorTree. This allows us to visit definitions
546/// before uses, allowing us to hoist a loop body in one pass without iteration.
547///
548void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) {
549  assert(N != 0 && "Null dominator tree node?");
550  MachineBasicBlock *BB = N->getBlock();
551
552  // If this subregion is not in the top level loop at all, exit.
553  if (!CurLoop->contains(BB)) return;
554
555  MachineBasicBlock *Preheader = getCurPreheader();
556  if (!Preheader)
557    return;
558
559  if (TrackRegPressure) {
560    if (IsHeader) {
561      // Compute registers which are liveout of preheader.
562      RegSeen.clear();
563      BackTrace.clear();
564      InitRegPressure(Preheader);
565    }
566
567    // Remember livein register pressure.
568    BackTrace.push_back(RegPressure);
569  }
570
571  for (MachineBasicBlock::iterator
572         MII = BB->begin(), E = BB->end(); MII != E; ) {
573    MachineBasicBlock::iterator NextMII = MII; ++NextMII;
574    MachineInstr *MI = &*MII;
575
576    if (TrackRegPressure)
577      UpdateRegPressureBefore(MI);
578    Hoist(MI, Preheader);
579    if (TrackRegPressure)
580      UpdateRegPressureAfter(MI);
581
582    MII = NextMII;
583  }
584
585  // Don't hoist things out of a large switch statement.  This often causes
586  // code to be hoisted that wasn't going to be executed, and increases
587  // register pressure in a situation where it's likely to matter.
588  if (BB->succ_size() < 25) {
589    const std::vector<MachineDomTreeNode*> &Children = N->getChildren();
590    for (unsigned I = 0, E = Children.size(); I != E; ++I)
591      HoistRegion(Children[I]);
592  }
593
594  if (TrackRegPressure)
595    BackTrace.pop_back();
596}
597
598/// InitRegPressure - Find all virtual register references that are liveout of
599/// the preheader to initialize the starting "register pressure". Note this
600/// does not count live through (livein but not used) registers.
601void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
602  std::fill(RegPressure.begin(), RegPressure.end(), 0);
603
604  for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end();
605       MII != E; ++MII) {
606    MachineInstr *MI = &*MII;
607    for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
608      const MachineOperand &MO = MI->getOperand(i);
609      if (!MO.isReg() || MO.isImplicit())
610        continue;
611      unsigned Reg = MO.getReg();
612      if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
613        continue;
614
615      bool isNew = !RegSeen.insert(Reg);
616      const TargetRegisterClass *RC = MRI->getRegClass(Reg);
617      EVT VT = *RC->vt_begin();
618      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
619      if (MO.isDef())
620        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
621      else {
622        if (isNew && !MO.isKill())
623          // Haven't seen this, it must be a livein.
624          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
625        else if (!isNew && MO.isKill())
626          RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
627      }
628    }
629  }
630}
631
632/// UpdateRegPressureBefore / UpdateRegPressureAfter - Update estimate of
633/// register pressure before and after executing a specifi instruction.
634void MachineLICM::UpdateRegPressureBefore(const MachineInstr *MI) {
635  bool NoImpact = MI->isImplicitDef() || MI->isPHI();
636
637  for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
638    const MachineOperand &MO = MI->getOperand(i);
639    if (!MO.isReg() || MO.isImplicit() || !MO.isUse())
640      continue;
641    unsigned Reg = MO.getReg();
642    if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
643      continue;
644
645    bool isNew = !RegSeen.insert(Reg);
646    if (NoImpact)
647      continue;
648
649    if (!isNew && MO.isKill()) {
650      const TargetRegisterClass *RC = MRI->getRegClass(Reg);
651      EVT VT = *RC->vt_begin();
652      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
653      unsigned RCCost = TLI->getRepRegClassCostFor(VT);
654
655      assert(RCCost <= RegPressure[RCId]);
656      RegPressure[RCId] -= RCCost;
657    }
658  }
659}
660
661void MachineLICM::UpdateRegPressureAfter(const MachineInstr *MI) {
662  bool NoImpact = MI->isImplicitDef() || MI->isPHI();
663
664  for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
665    const MachineOperand &MO = MI->getOperand(i);
666    if (!MO.isReg() || MO.isImplicit() || !MO.isDef())
667      continue;
668    unsigned Reg = MO.getReg();
669    if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
670      continue;
671
672    RegSeen.insert(Reg);
673    if (NoImpact)
674      continue;
675
676    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
677    EVT VT = *RC->vt_begin();
678    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
679    unsigned RCCost = TLI->getRepRegClassCostFor(VT);
680    RegPressure[RCId] += RCCost;
681  }
682}
683
684/// IsLICMCandidate - Returns true if the instruction may be a suitable
685/// candidate for LICM. e.g. If the instruction is a call, then it's obviously
686/// not safe to hoist it.
687bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
688  // Check if it's safe to move the instruction.
689  bool DontMoveAcrossStore = true;
690  if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore))
691    return false;
692
693  return true;
694}
695
696/// IsLoopInvariantInst - Returns true if the instruction is loop
697/// invariant. I.e., all virtual register operands are defined outside of the
698/// loop, physical registers aren't accessed explicitly, and there are no side
699/// effects that aren't captured by the operands or other flags.
700///
701bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
702  if (!IsLICMCandidate(I))
703    return false;
704
705  // The instruction is loop invariant if all of its operands are.
706  for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
707    const MachineOperand &MO = I.getOperand(i);
708
709    if (!MO.isReg())
710      continue;
711
712    unsigned Reg = MO.getReg();
713    if (Reg == 0) continue;
714
715    // Don't hoist an instruction that uses or defines a physical register.
716    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
717      if (MO.isUse()) {
718        // If the physreg has no defs anywhere, it's just an ambient register
719        // and we can freely move its uses. Alternatively, if it's allocatable,
720        // it could get allocated to something with a def during allocation.
721        if (!MRI->def_empty(Reg))
722          return false;
723        if (AllocatableSet.test(Reg))
724          return false;
725        // Check for a def among the register's aliases too.
726        for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
727          unsigned AliasReg = *Alias;
728          if (!MRI->def_empty(AliasReg))
729            return false;
730          if (AllocatableSet.test(AliasReg))
731            return false;
732        }
733        // Otherwise it's safe to move.
734        continue;
735      } else if (!MO.isDead()) {
736        // A def that isn't dead. We can't move it.
737        return false;
738      } else if (CurLoop->getHeader()->isLiveIn(Reg)) {
739        // If the reg is live into the loop, we can't hoist an instruction
740        // which would clobber it.
741        return false;
742      }
743    }
744
745    if (!MO.isUse())
746      continue;
747
748    assert(MRI->getVRegDef(Reg) &&
749           "Machine instr not mapped for this vreg?!");
750
751    // If the loop contains the definition of an operand, then the instruction
752    // isn't loop invariant.
753    if (CurLoop->contains(MRI->getVRegDef(Reg)))
754      return false;
755  }
756
757  // If we got this far, the instruction is loop invariant!
758  return true;
759}
760
761
762/// HasPHIUses - Return true if the specified register has any PHI use.
763static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *MRI) {
764  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
765         UE = MRI->use_end(); UI != UE; ++UI) {
766    MachineInstr *UseMI = &*UI;
767    if (UseMI->isPHI())
768      return true;
769  }
770  return false;
771}
772
773/// isLoadFromConstantMemory - Return true if the given instruction is a
774/// load from constant memory. Machine LICM will hoist these even if they are
775/// not re-materializable.
776bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) {
777  if (!MI->getDesc().mayLoad()) return false;
778  if (!MI->hasOneMemOperand()) return false;
779  MachineMemOperand *MMO = *MI->memoperands_begin();
780  if (MMO->isVolatile()) return false;
781  if (!MMO->getValue()) return false;
782  const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(MMO->getValue());
783  if (PSV) {
784    MachineFunction &MF = *MI->getParent()->getParent();
785    return PSV->isConstant(MF.getFrameInfo());
786  } else {
787    return AA->pointsToConstantMemory(MMO->getValue());
788  }
789}
790
791/// ComputeOperandLatency - Compute operand latency between a def of 'Reg'
792/// and an use in the current loop.
793int MachineLICM::ComputeOperandLatency(MachineInstr &MI,
794                                       unsigned DefIdx, unsigned Reg) {
795  if (MRI->use_nodbg_empty(Reg))
796    // No use? Return arbitrary large number!
797    return 300;
798
799  int Latency = -1;
800  for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
801         E = MRI->use_nodbg_end(); I != E; ++I) {
802    MachineInstr *UseMI = &*I;
803    if (!CurLoop->contains(UseMI->getParent()))
804      continue;
805    for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
806      const MachineOperand &MO = UseMI->getOperand(i);
807      if (!MO.isReg() || !MO.isUse())
808        continue;
809      unsigned MOReg = MO.getReg();
810      if (MOReg != Reg)
811        continue;
812
813      int UseCycle = TII->getOperandLatency(InstrItins, &MI, DefIdx, UseMI, i);
814      Latency = std::max(Latency, UseCycle);
815    }
816
817    if (Latency != -1)
818      break;
819  }
820
821  if (Latency == -1)
822    Latency = InstrItins->getOperandCycle(MI.getDesc().getSchedClass(), DefIdx);
823
824  return Latency;
825}
826
827/// IncreaseHighRegPressure - Visit BBs from preheader to current BB, check
828/// if hoisting an instruction of the given cost matrix can cause high
829/// register pressure.
830bool MachineLICM::IncreaseHighRegPressure(DenseMap<unsigned, int> &Cost) {
831  for (unsigned i = BackTrace.size(); i != 0; --i) {
832    bool AnyIncrease = false;
833    SmallVector<unsigned, 8> &RP = BackTrace[i-1];
834    for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
835         CI != CE; ++CI) {
836      if (CI->second <= 0)
837        continue;
838      AnyIncrease = true;
839      unsigned RCId = CI->first;
840      if (RP[RCId] + CI->second >= RegLimit[RCId])
841        return true;
842    }
843
844    if (!AnyIncrease)
845      // Hoisting the instruction doesn't increase register pressure.
846      return false;
847  }
848
849  return false;
850}
851
852/// IsProfitableToHoist - Return true if it is potentially profitable to hoist
853/// the given loop invariant.
854bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
855  if (MI.isImplicitDef())
856    return true;
857
858  // FIXME: For now, only hoist re-materilizable instructions. LICM will
859  // increase register pressure. We want to make sure it doesn't increase
860  // spilling.
861  // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting
862  // these tend to help performance in low register pressure situation. The
863  // trade off is it may cause spill in high pressure situation. It will end up
864  // adding a store in the loop preheader. But the reload is no more expensive.
865  // The side benefit is these loads are frequently CSE'ed.
866  if (!TrackRegPressure || MI.getDesc().isAsCheapAsAMove()) {
867    if (!TII->isTriviallyReMaterializable(&MI, AA) &&
868        !isLoadFromConstantMemory(&MI))
869      return false;
870  } else {
871    // In low register pressure situation, we can be more aggressive about
872    // hoisting. Also, favors hoisting long latency instructions even in
873    // moderately high pressure situation.
874    DenseMap<unsigned, int> Cost;
875    for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
876      const MachineOperand &MO = MI.getOperand(i);
877      if (!MO.isReg() || MO.isImplicit())
878        continue;
879      unsigned Reg = MO.getReg();
880      if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
881        continue;
882      if (MO.isDef()) {
883        if (InstrItins && !InstrItins->isEmpty()) {
884          int Cycle = ComputeOperandLatency(MI, i, Reg);
885          if (Cycle > 3) {
886            // FIXME: Target specific high latency limit?
887            ++NumHighLatency;
888            return true;
889          }
890        }
891
892        const TargetRegisterClass *RC = MRI->getRegClass(Reg);
893        EVT VT = *RC->vt_begin();
894        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
895        unsigned RCCost = TLI->getRepRegClassCostFor(VT);
896        DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
897        // If the instruction is not register pressure neutrail (or better),
898        // check if hoisting it will cause high register pressure in BB's
899        // leading up to this point.
900        if (CI != Cost.end())
901          CI->second += RCCost;
902        else
903          Cost.insert(std::make_pair(RCId, RCCost));
904      } else if (MO.isKill()) {
905        // Is a virtual register use is a kill, hoisting it out of the loop
906        // may actually reduce register pressure or be register pressure
907        // neutral
908        const TargetRegisterClass *RC = MRI->getRegClass(Reg);
909        EVT VT = *RC->vt_begin();
910        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
911        unsigned RCCost = TLI->getRepRegClassCostFor(VT);
912        DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
913        if (CI != Cost.end())
914          CI->second -= RCCost;
915        else
916          Cost.insert(std::make_pair(RCId, -RCCost));
917      }
918    }
919
920    // Visit BBs from preheader to current BB, if hoisting this doesn't cause
921    // high register pressure, then it's safe to proceed.
922    if (!IncreaseHighRegPressure(Cost)) {
923      ++NumLowRP;
924      return true;
925    }
926
927    // High register pressure situation, only hoist if the instruction is going to
928    // be remat'ed.
929    if (!TII->isTriviallyReMaterializable(&MI, AA) &&
930        !isLoadFromConstantMemory(&MI))
931      return false;
932  }
933
934  // If result(s) of this instruction is used by PHIs, then don't hoist it.
935  // The presence of joins makes it difficult for current register allocator
936  // implementation to perform remat.
937  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
938    const MachineOperand &MO = MI.getOperand(i);
939    if (!MO.isReg() || !MO.isDef())
940      continue;
941    if (HasPHIUses(MO.getReg(), MRI))
942      return false;
943  }
944
945  return true;
946}
947
948MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
949  // Don't unfold simple loads.
950  if (MI->getDesc().canFoldAsLoad())
951    return 0;
952
953  // If not, we may be able to unfold a load and hoist that.
954  // First test whether the instruction is loading from an amenable
955  // memory location.
956  if (!isLoadFromConstantMemory(MI))
957    return 0;
958
959  // Next determine the register class for a temporary register.
960  unsigned LoadRegIndex;
961  unsigned NewOpc =
962    TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(),
963                                    /*UnfoldLoad=*/true,
964                                    /*UnfoldStore=*/false,
965                                    &LoadRegIndex);
966  if (NewOpc == 0) return 0;
967  const TargetInstrDesc &TID = TII->get(NewOpc);
968  if (TID.getNumDefs() != 1) return 0;
969  const TargetRegisterClass *RC = TID.OpInfo[LoadRegIndex].getRegClass(TRI);
970  // Ok, we're unfolding. Create a temporary register and do the unfold.
971  unsigned Reg = MRI->createVirtualRegister(RC);
972
973  MachineFunction &MF = *MI->getParent()->getParent();
974  SmallVector<MachineInstr *, 2> NewMIs;
975  bool Success =
976    TII->unfoldMemoryOperand(MF, MI, Reg,
977                             /*UnfoldLoad=*/true, /*UnfoldStore=*/false,
978                             NewMIs);
979  (void)Success;
980  assert(Success &&
981         "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
982         "succeeded!");
983  assert(NewMIs.size() == 2 &&
984         "Unfolded a load into multiple instructions!");
985  MachineBasicBlock *MBB = MI->getParent();
986  MBB->insert(MI, NewMIs[0]);
987  MBB->insert(MI, NewMIs[1]);
988  // If unfolding produced a load that wasn't loop-invariant or profitable to
989  // hoist, discard the new instructions and bail.
990  if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
991    NewMIs[0]->eraseFromParent();
992    NewMIs[1]->eraseFromParent();
993    return 0;
994  }
995  // Otherwise we successfully unfolded a load that we can hoist.
996  MI->eraseFromParent();
997  return NewMIs[0];
998}
999
1000void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
1001  for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
1002    const MachineInstr *MI = &*I;
1003    // FIXME: For now, only hoist re-materilizable instructions. LICM will
1004    // increase register pressure. We want to make sure it doesn't increase
1005    // spilling.
1006    if (TII->isTriviallyReMaterializable(MI, AA)) {
1007      unsigned Opcode = MI->getOpcode();
1008      DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
1009        CI = CSEMap.find(Opcode);
1010      if (CI != CSEMap.end())
1011        CI->second.push_back(MI);
1012      else {
1013        std::vector<const MachineInstr*> CSEMIs;
1014        CSEMIs.push_back(MI);
1015        CSEMap.insert(std::make_pair(Opcode, CSEMIs));
1016      }
1017    }
1018  }
1019}
1020
1021const MachineInstr*
1022MachineLICM::LookForDuplicate(const MachineInstr *MI,
1023                              std::vector<const MachineInstr*> &PrevMIs) {
1024  for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
1025    const MachineInstr *PrevMI = PrevMIs[i];
1026    if (TII->produceSameValue(MI, PrevMI))
1027      return PrevMI;
1028  }
1029  return 0;
1030}
1031
1032bool MachineLICM::EliminateCSE(MachineInstr *MI,
1033          DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
1034  // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
1035  // the undef property onto uses.
1036  if (CI == CSEMap.end() || MI->isImplicitDef())
1037    return false;
1038
1039  if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
1040    DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
1041
1042    // Replace virtual registers defined by MI by their counterparts defined
1043    // by Dup.
1044    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1045      const MachineOperand &MO = MI->getOperand(i);
1046
1047      // Physical registers may not differ here.
1048      assert((!MO.isReg() || MO.getReg() == 0 ||
1049              !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
1050              MO.getReg() == Dup->getOperand(i).getReg()) &&
1051             "Instructions with different phys regs are not identical!");
1052
1053      if (MO.isReg() && MO.isDef() &&
1054          !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
1055        MRI->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
1056        MRI->clearKillFlags(Dup->getOperand(i).getReg());
1057      }
1058    }
1059    MI->eraseFromParent();
1060    ++NumCSEed;
1061    return true;
1062  }
1063  return false;
1064}
1065
1066/// Hoist - When an instruction is found to use only loop invariant operands
1067/// that are safe to hoist, this instruction is called to do the dirty work.
1068///
1069void MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
1070  // First check whether we should hoist this instruction.
1071  if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
1072    // If not, try unfolding a hoistable load.
1073    MI = ExtractHoistableLoad(MI);
1074    if (!MI) return;
1075  }
1076
1077  // Now move the instructions to the predecessor, inserting it before any
1078  // terminator instructions.
1079  DEBUG({
1080      dbgs() << "Hoisting " << *MI;
1081      if (Preheader->getBasicBlock())
1082        dbgs() << " to MachineBasicBlock "
1083               << Preheader->getName();
1084      if (MI->getParent()->getBasicBlock())
1085        dbgs() << " from MachineBasicBlock "
1086               << MI->getParent()->getName();
1087      dbgs() << "\n";
1088    });
1089
1090  // If this is the first instruction being hoisted to the preheader,
1091  // initialize the CSE map with potential common expressions.
1092  if (FirstInLoop) {
1093    InitCSEMap(Preheader);
1094    FirstInLoop = false;
1095  }
1096
1097  // Look for opportunity to CSE the hoisted instruction.
1098  unsigned Opcode = MI->getOpcode();
1099  DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
1100    CI = CSEMap.find(Opcode);
1101  if (!EliminateCSE(MI, CI)) {
1102    // Otherwise, splice the instruction to the preheader.
1103    Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
1104
1105    // Clear the kill flags of any register this instruction defines,
1106    // since they may need to be live throughout the entire loop
1107    // rather than just live for part of it.
1108    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1109      MachineOperand &MO = MI->getOperand(i);
1110      if (MO.isReg() && MO.isDef() && !MO.isDead())
1111        MRI->clearKillFlags(MO.getReg());
1112    }
1113
1114    // Add to the CSE map.
1115    if (CI != CSEMap.end())
1116      CI->second.push_back(MI);
1117    else {
1118      std::vector<const MachineInstr*> CSEMIs;
1119      CSEMIs.push_back(MI);
1120      CSEMap.insert(std::make_pair(Opcode, CSEMIs));
1121    }
1122  }
1123
1124  ++NumHoisted;
1125  Changed = true;
1126}
1127
1128MachineBasicBlock *MachineLICM::getCurPreheader() {
1129  // Determine the block to which to hoist instructions. If we can't find a
1130  // suitable loop predecessor, we can't do any hoisting.
1131
1132  // If we've tried to get a preheader and failed, don't try again.
1133  if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1))
1134    return 0;
1135
1136  if (!CurPreheader) {
1137    CurPreheader = CurLoop->getLoopPreheader();
1138    if (!CurPreheader) {
1139      MachineBasicBlock *Pred = CurLoop->getLoopPredecessor();
1140      if (!Pred) {
1141        CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
1142        return 0;
1143      }
1144
1145      CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this);
1146      if (!CurPreheader) {
1147        CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
1148        return 0;
1149      }
1150    }
1151  }
1152  return CurPreheader;
1153}
1154