PeepholeOptimizer.cpp revision 30a343aeedf777f9b8b6be9823da750afbf765b1
1//===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Perform peephole optimizations on the machine code:
11//
12// - Optimize Extensions
13//
14//     Optimization of sign / zero extension instructions. It may be extended to
15//     handle other instructions with similar properties.
16//
17//     On some targets, some instructions, e.g. X86 sign / zero extension, may
18//     leave the source value in the lower part of the result. This optimization
19//     will replace some uses of the pre-extension value with uses of the
20//     sub-register of the results.
21//
22// - Optimize Comparisons
23//
24//     Optimization of comparison instructions. For instance, in this code:
25//
26//       sub r1, 1
27//       cmp r1, 0
28//       bz  L1
29//
30//     If the "sub" instruction all ready sets (or could be modified to set) the
31//     same flag that the "cmp" instruction sets and that "bz" uses, then we can
32//     eliminate the "cmp" instruction.
33//
34//===----------------------------------------------------------------------===//
35
36#define DEBUG_TYPE "peephole-opt"
37#include "llvm/CodeGen/Passes.h"
38#include "llvm/CodeGen/MachineDominators.h"
39#include "llvm/CodeGen/MachineInstrBuilder.h"
40#include "llvm/CodeGen/MachineRegisterInfo.h"
41#include "llvm/Target/TargetInstrInfo.h"
42#include "llvm/Target/TargetRegisterInfo.h"
43#include "llvm/Support/CommandLine.h"
44#include "llvm/ADT/DenseMap.h"
45#include "llvm/ADT/SmallPtrSet.h"
46#include "llvm/ADT/SmallSet.h"
47#include "llvm/ADT/Statistic.h"
48using namespace llvm;
49
50// Optimize Extensions
51static cl::opt<bool>
52Aggressive("aggressive-ext-opt", cl::Hidden,
53           cl::desc("Aggressive extension optimization"));
54
55static cl::opt<bool>
56DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
57                cl::desc("Disable the peephole optimizer"));
58
59STATISTIC(NumReuse,      "Number of extension results reused");
60STATISTIC(NumEliminated, "Number of compares eliminated");
61STATISTIC(NumImmFold,    "Number of move immediate foled");
62
63namespace {
64  class PeepholeOptimizer : public MachineFunctionPass {
65    const TargetMachine   *TM;
66    const TargetInstrInfo *TII;
67    MachineRegisterInfo   *MRI;
68    MachineDominatorTree  *DT;  // Machine dominator tree
69
70  public:
71    static char ID; // Pass identification
72    PeepholeOptimizer() : MachineFunctionPass(ID) {
73      initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
74    }
75
76    virtual bool runOnMachineFunction(MachineFunction &MF);
77
78    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
79      AU.setPreservesCFG();
80      MachineFunctionPass::getAnalysisUsage(AU);
81      if (Aggressive) {
82        AU.addRequired<MachineDominatorTree>();
83        AU.addPreserved<MachineDominatorTree>();
84      }
85    }
86
87  private:
88    bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
89    bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
90                          SmallPtrSet<MachineInstr*, 8> &LocalMIs);
91    bool isMoveImmediate(MachineInstr *MI,
92                         SmallSet<unsigned, 4> &ImmDefRegs,
93                         DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
94    bool FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
95                       SmallSet<unsigned, 4> &ImmDefRegs,
96                       DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
97  };
98}
99
100char PeepholeOptimizer::ID = 0;
101INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts",
102                "Peephole Optimizations", false, false)
103INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
104INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
105                "Peephole Optimizations", false, false)
106
107FunctionPass *llvm::createPeepholeOptimizerPass() {
108  return new PeepholeOptimizer();
109}
110
111/// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
112/// a single register and writes a single register and it does not modify the
113/// source, and if the source value is preserved as a sub-register of the
114/// result, then replace all reachable uses of the source with the subreg of the
115/// result.
116///
117/// Do not generate an EXTRACT that is used only in a debug use, as this changes
118/// the code. Since this code does not currently share EXTRACTs, just ignore all
119/// debug uses.
120bool PeepholeOptimizer::
121OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
122                 SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
123  unsigned SrcReg, DstReg, SubIdx;
124  if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
125    return false;
126
127  if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
128      TargetRegisterInfo::isPhysicalRegister(SrcReg))
129    return false;
130
131  MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg);
132  if (++UI == MRI->use_nodbg_end())
133    // No other uses.
134    return false;
135
136  // The source has other uses. See if we can replace the other uses with use of
137  // the result of the extension.
138  SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
139  UI = MRI->use_nodbg_begin(DstReg);
140  for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
141       UI != UE; ++UI)
142    ReachedBBs.insert(UI->getParent());
143
144  // Uses that are in the same BB of uses of the result of the instruction.
145  SmallVector<MachineOperand*, 8> Uses;
146
147  // Uses that the result of the instruction can reach.
148  SmallVector<MachineOperand*, 8> ExtendedUses;
149
150  bool ExtendLife = true;
151  UI = MRI->use_nodbg_begin(SrcReg);
152  for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
153       UI != UE; ++UI) {
154    MachineOperand &UseMO = UI.getOperand();
155    MachineInstr *UseMI = &*UI;
156    if (UseMI == MI)
157      continue;
158
159    if (UseMI->isPHI()) {
160      ExtendLife = false;
161      continue;
162    }
163
164    // It's an error to translate this:
165    //
166    //    %reg1025 = <sext> %reg1024
167    //     ...
168    //    %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
169    //
170    // into this:
171    //
172    //    %reg1025 = <sext> %reg1024
173    //     ...
174    //    %reg1027 = COPY %reg1025:4
175    //    %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
176    //
177    // The problem here is that SUBREG_TO_REG is there to assert that an
178    // implicit zext occurs. It doesn't insert a zext instruction. If we allow
179    // the COPY here, it will give us the value after the <sext>, not the
180    // original value of %reg1024 before <sext>.
181    if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
182      continue;
183
184    MachineBasicBlock *UseMBB = UseMI->getParent();
185    if (UseMBB == MBB) {
186      // Local uses that come after the extension.
187      if (!LocalMIs.count(UseMI))
188        Uses.push_back(&UseMO);
189    } else if (ReachedBBs.count(UseMBB)) {
190      // Non-local uses where the result of the extension is used. Always
191      // replace these unless it's a PHI.
192      Uses.push_back(&UseMO);
193    } else if (Aggressive && DT->dominates(MBB, UseMBB)) {
194      // We may want to extend the live range of the extension result in order
195      // to replace these uses.
196      ExtendedUses.push_back(&UseMO);
197    } else {
198      // Both will be live out of the def MBB anyway. Don't extend live range of
199      // the extension result.
200      ExtendLife = false;
201      break;
202    }
203  }
204
205  if (ExtendLife && !ExtendedUses.empty())
206    // Extend the liveness of the extension result.
207    std::copy(ExtendedUses.begin(), ExtendedUses.end(),
208              std::back_inserter(Uses));
209
210  // Now replace all uses.
211  bool Changed = false;
212  if (!Uses.empty()) {
213    SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;
214
215    // Look for PHI uses of the extended result, we don't want to extend the
216    // liveness of a PHI input. It breaks all kinds of assumptions down
217    // stream. A PHI use is expected to be the kill of its source values.
218    UI = MRI->use_nodbg_begin(DstReg);
219    for (MachineRegisterInfo::use_nodbg_iterator
220           UE = MRI->use_nodbg_end(); UI != UE; ++UI)
221      if (UI->isPHI())
222        PHIBBs.insert(UI->getParent());
223
224    const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
225    for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
226      MachineOperand *UseMO = Uses[i];
227      MachineInstr *UseMI = UseMO->getParent();
228      MachineBasicBlock *UseMBB = UseMI->getParent();
229      if (PHIBBs.count(UseMBB))
230        continue;
231
232      unsigned NewVR = MRI->createVirtualRegister(RC);
233      BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
234              TII->get(TargetOpcode::COPY), NewVR)
235        .addReg(DstReg, 0, SubIdx);
236
237      UseMO->setReg(NewVR);
238      ++NumReuse;
239      Changed = true;
240    }
241  }
242
243  return Changed;
244}
245
246/// OptimizeCmpInstr - If the instruction is a compare and the previous
247/// instruction it's comparing against all ready sets (or could be modified to
248/// set) the same flag as the compare, then we can remove the comparison and use
249/// the flag from the previous instruction.
250bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
251                                         MachineBasicBlock *MBB){
252  // If this instruction is a comparison against zero and isn't comparing a
253  // physical register, we can try to optimize it.
254  unsigned SrcReg;
255  int CmpMask, CmpValue;
256  if (!TII->AnalyzeCompare(MI, SrcReg, CmpMask, CmpValue) ||
257      TargetRegisterInfo::isPhysicalRegister(SrcReg))
258    return false;
259
260  // Attempt to optimize the comparison instruction.
261  if (TII->OptimizeCompareInstr(MI, SrcReg, CmpMask, CmpValue, MRI)) {
262    ++NumEliminated;
263    return true;
264  }
265
266  return false;
267}
268
269bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
270                                        SmallSet<unsigned, 4> &ImmDefRegs,
271                                 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
272  const TargetInstrDesc &TID = MI->getDesc();
273  if (!TID.isMoveImmediate())
274    return false;
275  if (TID.getNumDefs() != 1)
276    return false;
277  unsigned Reg = MI->getOperand(0).getReg();
278  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
279    ImmDefMIs.insert(std::make_pair(Reg, MI));
280    ImmDefRegs.insert(Reg);
281    return true;
282  }
283
284  return false;
285}
286
287/// FoldImmediate - Try folding register operands that are defined by move
288/// immediate instructions, i.e. a trivial constant folding optimization, if
289/// and only if the def and use are in the same BB.
290bool PeepholeOptimizer::FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
291                                      SmallSet<unsigned, 4> &ImmDefRegs,
292                                 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
293  for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
294    MachineOperand &MO = MI->getOperand(i);
295    if (!MO.isReg() || MO.isDef())
296      continue;
297    unsigned Reg = MO.getReg();
298    if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
299      continue;
300    if (ImmDefRegs.count(Reg) == 0)
301      continue;
302    DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
303    assert(II != ImmDefMIs.end());
304    if (TII->FoldImmediate(MI, II->second, Reg, MRI)) {
305      ++NumImmFold;
306      return true;
307    }
308  }
309  return false;
310}
311
312bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
313  if (DisablePeephole)
314    return false;
315
316  TM  = &MF.getTarget();
317  TII = TM->getInstrInfo();
318  MRI = &MF.getRegInfo();
319  DT  = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0;
320
321  bool Changed = false;
322
323  SmallPtrSet<MachineInstr*, 8> LocalMIs;
324  SmallSet<unsigned, 4> ImmDefRegs;
325  DenseMap<unsigned, MachineInstr*> ImmDefMIs;
326  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
327    MachineBasicBlock *MBB = &*I;
328
329    bool SeenMoveImm = false;
330    LocalMIs.clear();
331    ImmDefRegs.clear();
332    ImmDefMIs.clear();
333
334    for (MachineBasicBlock::iterator
335           MII = I->begin(), MIE = I->end(); MII != MIE; ) {
336      MachineInstr *MI = &*MII++;
337      LocalMIs.insert(MI);
338
339      if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
340          MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
341          MI->getDesc().hasUnmodeledSideEffects())
342        continue;
343
344      if (MI->getDesc().isCompare()) {
345        Changed |= OptimizeCmpInstr(MI, MBB);
346      } else if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
347        SeenMoveImm = true;
348      } else {
349        Changed |= OptimizeExtInstr(MI, MBB, LocalMIs);
350        if (SeenMoveImm)
351          Changed |= FoldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
352      }
353    }
354  }
355
356  return Changed;
357}
358