PeepholeOptimizer.cpp revision a8c58a6f59880a5bb2ef8fc86ebdeb15e96ed110
1//===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Perform peephole optimizations on the machine code:
11//
12// - Optimize Extensions
13//
14//     Optimization of sign / zero extension instructions. It may be extended to
15//     handle other instructions with similar properties.
16//
17//     On some targets, some instructions, e.g. X86 sign / zero extension, may
18//     leave the source value in the lower part of the result. This optimization
19//     will replace some uses of the pre-extension value with uses of the
20//     sub-register of the results.
21//
22// - Optimize Comparisons
23//
24//     Optimization of comparison instructions. For instance, in this code:
25//
26//       sub r1, 1
27//       cmp r1, 0
28//       bz  L1
29//
30//     If the "sub" instruction all ready sets (or could be modified to set) the
31//     same flag that the "cmp" instruction sets and that "bz" uses, then we can
32//     eliminate the "cmp" instruction.
33//
34//===----------------------------------------------------------------------===//
35
36#define DEBUG_TYPE "peephole-opt"
37#include "llvm/CodeGen/Passes.h"
38#include "llvm/CodeGen/MachineDominators.h"
39#include "llvm/CodeGen/MachineInstrBuilder.h"
40#include "llvm/CodeGen/MachineRegisterInfo.h"
41#include "llvm/Target/TargetInstrInfo.h"
42#include "llvm/Target/TargetRegisterInfo.h"
43#include "llvm/Support/CommandLine.h"
44#include "llvm/ADT/SmallPtrSet.h"
45#include "llvm/ADT/Statistic.h"
46using namespace llvm;
47
48// Optimize Extensions
49static cl::opt<bool>
50Aggressive("aggressive-ext-opt", cl::Hidden,
51           cl::desc("Aggressive extension optimization"));
52
53STATISTIC(NumReuse, "Number of extension results reused");
54
55// Optimize Comparisons
56static cl::opt<bool>
57EnableOptCmps("enable-optimize-cmps", cl::init(false), cl::Hidden);
58
59STATISTIC(NumEliminated, "Number of compares eliminated");
60
61namespace {
62  class PeepholeOptimizer : public MachineFunctionPass {
63    const TargetMachine   *TM;
64    const TargetInstrInfo *TII;
65    MachineRegisterInfo   *MRI;
66    MachineDominatorTree  *DT;  // Machine dominator tree
67
68  public:
69    static char ID; // Pass identification
70    PeepholeOptimizer() : MachineFunctionPass(ID) {}
71
72    virtual bool runOnMachineFunction(MachineFunction &MF);
73
74    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
75      AU.setPreservesCFG();
76      MachineFunctionPass::getAnalysisUsage(AU);
77      if (Aggressive) {
78        AU.addRequired<MachineDominatorTree>();
79        AU.addPreserved<MachineDominatorTree>();
80      }
81    }
82
83  private:
84    bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
85    bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
86                          SmallPtrSet<MachineInstr*, 8> &LocalMIs);
87  };
88}
89
90char PeepholeOptimizer::ID = 0;
91INITIALIZE_PASS(PeepholeOptimizer, "peephole-opts",
92                "Peephole Optimizations", false, false);
93
94FunctionPass *llvm::createPeepholeOptimizerPass() {
95  return new PeepholeOptimizer();
96}
97
98/// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
99/// a single register and writes a single register and it does not modify the
100/// source, and if the source value is preserved as a sub-register of the
101/// result, then replace all reachable uses of the source with the subreg of the
102/// result.
103///
104/// Do not generate an EXTRACT that is used only in a debug use, as this changes
105/// the code. Since this code does not currently share EXTRACTs, just ignore all
106/// debug uses.
107bool PeepholeOptimizer::
108OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
109                 SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
110  LocalMIs.insert(MI);
111
112  unsigned SrcReg, DstReg, SubIdx;
113  if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
114    return false;
115
116  if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
117      TargetRegisterInfo::isPhysicalRegister(SrcReg))
118    return false;
119
120  MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg);
121  if (++UI == MRI->use_nodbg_end())
122    // No other uses.
123    return false;
124
125  // The source has other uses. See if we can replace the other uses with use of
126  // the result of the extension.
127  SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
128  UI = MRI->use_nodbg_begin(DstReg);
129  for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
130       UI != UE; ++UI)
131    ReachedBBs.insert(UI->getParent());
132
133  // Uses that are in the same BB of uses of the result of the instruction.
134  SmallVector<MachineOperand*, 8> Uses;
135
136  // Uses that the result of the instruction can reach.
137  SmallVector<MachineOperand*, 8> ExtendedUses;
138
139  bool ExtendLife = true;
140  UI = MRI->use_nodbg_begin(SrcReg);
141  for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
142       UI != UE; ++UI) {
143    MachineOperand &UseMO = UI.getOperand();
144    MachineInstr *UseMI = &*UI;
145    if (UseMI == MI)
146      continue;
147
148    if (UseMI->isPHI()) {
149      ExtendLife = false;
150      continue;
151    }
152
153    // It's an error to translate this:
154    //
155    //    %reg1025 = <sext> %reg1024
156    //     ...
157    //    %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
158    //
159    // into this:
160    //
161    //    %reg1025 = <sext> %reg1024
162    //     ...
163    //    %reg1027 = COPY %reg1025:4
164    //    %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
165    //
166    // The problem here is that SUBREG_TO_REG is there to assert that an
167    // implicit zext occurs. It doesn't insert a zext instruction. If we allow
168    // the COPY here, it will give us the value after the <sext>, not the
169    // original value of %reg1024 before <sext>.
170    if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
171      continue;
172
173    MachineBasicBlock *UseMBB = UseMI->getParent();
174    if (UseMBB == MBB) {
175      // Local uses that come after the extension.
176      if (!LocalMIs.count(UseMI))
177        Uses.push_back(&UseMO);
178    } else if (ReachedBBs.count(UseMBB)) {
179      // Non-local uses where the result of the extension is used. Always
180      // replace these unless it's a PHI.
181      Uses.push_back(&UseMO);
182    } else if (Aggressive && DT->dominates(MBB, UseMBB)) {
183      // We may want to extend the live range of the extension result in order
184      // to replace these uses.
185      ExtendedUses.push_back(&UseMO);
186    } else {
187      // Both will be live out of the def MBB anyway. Don't extend live range of
188      // the extension result.
189      ExtendLife = false;
190      break;
191    }
192  }
193
194  if (ExtendLife && !ExtendedUses.empty())
195    // Extend the liveness of the extension result.
196    std::copy(ExtendedUses.begin(), ExtendedUses.end(),
197              std::back_inserter(Uses));
198
199  // Now replace all uses.
200  bool Changed = false;
201  if (!Uses.empty()) {
202    SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;
203
204    // Look for PHI uses of the extended result, we don't want to extend the
205    // liveness of a PHI input. It breaks all kinds of assumptions down
206    // stream. A PHI use is expected to be the kill of its source values.
207    UI = MRI->use_nodbg_begin(DstReg);
208    for (MachineRegisterInfo::use_nodbg_iterator
209           UE = MRI->use_nodbg_end(); UI != UE; ++UI)
210      if (UI->isPHI())
211        PHIBBs.insert(UI->getParent());
212
213    const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
214    for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
215      MachineOperand *UseMO = Uses[i];
216      MachineInstr *UseMI = UseMO->getParent();
217      MachineBasicBlock *UseMBB = UseMI->getParent();
218      if (PHIBBs.count(UseMBB))
219        continue;
220
221      unsigned NewVR = MRI->createVirtualRegister(RC);
222      BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
223              TII->get(TargetOpcode::COPY), NewVR)
224        .addReg(DstReg, 0, SubIdx);
225
226      UseMO->setReg(NewVR);
227      ++NumReuse;
228      Changed = true;
229    }
230  }
231
232  return Changed;
233}
234
235/// OptimizeCmpInstr - If the instruction is a compare and the previous
236/// instruction it's comparing against all ready sets (or could be modified to
237/// set) the same flag as the compare, then we can remove the comparison and use
238/// the flag from the previous instruction.
239bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
240                                         MachineBasicBlock *MBB) {
241  if (!EnableOptCmps) return false;
242
243  // If this instruction is a comparison against zero and isn't comparing a
244  // physical register, we can try to optimize it.
245  unsigned SrcReg;
246  int CmpValue;
247  if (!TII->AnalyzeCompare(MI, SrcReg, CmpValue) ||
248      TargetRegisterInfo::isPhysicalRegister(SrcReg) || CmpValue != 0)
249    return false;
250
251  MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg);
252  if (llvm::next(DI) != MRI->def_end())
253    // Only support one definition.
254    return false;
255
256  // Attempt to convert the defining instruction to set the "zero" flag.
257  if (TII->ConvertToSetZeroFlag(&*DI, MI)) {
258    ++NumEliminated;
259    return true;
260  }
261
262  return false;
263}
264
265bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
266  TM  = &MF.getTarget();
267  TII = TM->getInstrInfo();
268  MRI = &MF.getRegInfo();
269  DT  = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0;
270
271  bool Changed = false;
272
273  SmallPtrSet<MachineInstr*, 8> LocalMIs;
274  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
275    MachineBasicBlock *MBB = &*I;
276    LocalMIs.clear();
277
278    for (MachineBasicBlock::iterator
279           MII = I->begin(), ME = I->end(); MII != ME; ) {
280      MachineInstr *MI = &*MII;
281
282      if (MI->getDesc().isCompare()) {
283        ++MII; // The iterator may become invalid if the compare is deleted.
284        Changed |= OptimizeCmpInstr(MI, MBB);
285      } else {
286        Changed |= OptimizeExtInstr(MI, MBB, LocalMIs);
287        ++MII;
288      }
289    }
290  }
291
292  return Changed;
293}
294