PeepholeOptimizer.cpp revision a8c58a6f59880a5bb2ef8fc86ebdeb15e96ed110
1//===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Perform peephole optimizations on the machine code: 11// 12// - Optimize Extensions 13// 14// Optimization of sign / zero extension instructions. It may be extended to 15// handle other instructions with similar properties. 16// 17// On some targets, some instructions, e.g. X86 sign / zero extension, may 18// leave the source value in the lower part of the result. This optimization 19// will replace some uses of the pre-extension value with uses of the 20// sub-register of the results. 21// 22// - Optimize Comparisons 23// 24// Optimization of comparison instructions. For instance, in this code: 25// 26// sub r1, 1 27// cmp r1, 0 28// bz L1 29// 30// If the "sub" instruction all ready sets (or could be modified to set) the 31// same flag that the "cmp" instruction sets and that "bz" uses, then we can 32// eliminate the "cmp" instruction. 33// 34//===----------------------------------------------------------------------===// 35 36#define DEBUG_TYPE "peephole-opt" 37#include "llvm/CodeGen/Passes.h" 38#include "llvm/CodeGen/MachineDominators.h" 39#include "llvm/CodeGen/MachineInstrBuilder.h" 40#include "llvm/CodeGen/MachineRegisterInfo.h" 41#include "llvm/Target/TargetInstrInfo.h" 42#include "llvm/Target/TargetRegisterInfo.h" 43#include "llvm/Support/CommandLine.h" 44#include "llvm/ADT/SmallPtrSet.h" 45#include "llvm/ADT/Statistic.h" 46using namespace llvm; 47 48// Optimize Extensions 49static cl::opt<bool> 50Aggressive("aggressive-ext-opt", cl::Hidden, 51 cl::desc("Aggressive extension optimization")); 52 53STATISTIC(NumReuse, "Number of extension results reused"); 54 55// Optimize Comparisons 56static cl::opt<bool> 57EnableOptCmps("enable-optimize-cmps", cl::init(false), cl::Hidden); 58 59STATISTIC(NumEliminated, "Number of compares eliminated"); 60 61namespace { 62 class PeepholeOptimizer : public MachineFunctionPass { 63 const TargetMachine *TM; 64 const TargetInstrInfo *TII; 65 MachineRegisterInfo *MRI; 66 MachineDominatorTree *DT; // Machine dominator tree 67 68 public: 69 static char ID; // Pass identification 70 PeepholeOptimizer() : MachineFunctionPass(ID) {} 71 72 virtual bool runOnMachineFunction(MachineFunction &MF); 73 74 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 75 AU.setPreservesCFG(); 76 MachineFunctionPass::getAnalysisUsage(AU); 77 if (Aggressive) { 78 AU.addRequired<MachineDominatorTree>(); 79 AU.addPreserved<MachineDominatorTree>(); 80 } 81 } 82 83 private: 84 bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); 85 bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, 86 SmallPtrSet<MachineInstr*, 8> &LocalMIs); 87 }; 88} 89 90char PeepholeOptimizer::ID = 0; 91INITIALIZE_PASS(PeepholeOptimizer, "peephole-opts", 92 "Peephole Optimizations", false, false); 93 94FunctionPass *llvm::createPeepholeOptimizerPass() { 95 return new PeepholeOptimizer(); 96} 97 98/// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads 99/// a single register and writes a single register and it does not modify the 100/// source, and if the source value is preserved as a sub-register of the 101/// result, then replace all reachable uses of the source with the subreg of the 102/// result. 103/// 104/// Do not generate an EXTRACT that is used only in a debug use, as this changes 105/// the code. Since this code does not currently share EXTRACTs, just ignore all 106/// debug uses. 107bool PeepholeOptimizer:: 108OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, 109 SmallPtrSet<MachineInstr*, 8> &LocalMIs) { 110 LocalMIs.insert(MI); 111 112 unsigned SrcReg, DstReg, SubIdx; 113 if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) 114 return false; 115 116 if (TargetRegisterInfo::isPhysicalRegister(DstReg) || 117 TargetRegisterInfo::isPhysicalRegister(SrcReg)) 118 return false; 119 120 MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg); 121 if (++UI == MRI->use_nodbg_end()) 122 // No other uses. 123 return false; 124 125 // The source has other uses. See if we can replace the other uses with use of 126 // the result of the extension. 127 SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; 128 UI = MRI->use_nodbg_begin(DstReg); 129 for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); 130 UI != UE; ++UI) 131 ReachedBBs.insert(UI->getParent()); 132 133 // Uses that are in the same BB of uses of the result of the instruction. 134 SmallVector<MachineOperand*, 8> Uses; 135 136 // Uses that the result of the instruction can reach. 137 SmallVector<MachineOperand*, 8> ExtendedUses; 138 139 bool ExtendLife = true; 140 UI = MRI->use_nodbg_begin(SrcReg); 141 for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); 142 UI != UE; ++UI) { 143 MachineOperand &UseMO = UI.getOperand(); 144 MachineInstr *UseMI = &*UI; 145 if (UseMI == MI) 146 continue; 147 148 if (UseMI->isPHI()) { 149 ExtendLife = false; 150 continue; 151 } 152 153 // It's an error to translate this: 154 // 155 // %reg1025 = <sext> %reg1024 156 // ... 157 // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4 158 // 159 // into this: 160 // 161 // %reg1025 = <sext> %reg1024 162 // ... 163 // %reg1027 = COPY %reg1025:4 164 // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4 165 // 166 // The problem here is that SUBREG_TO_REG is there to assert that an 167 // implicit zext occurs. It doesn't insert a zext instruction. If we allow 168 // the COPY here, it will give us the value after the <sext>, not the 169 // original value of %reg1024 before <sext>. 170 if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) 171 continue; 172 173 MachineBasicBlock *UseMBB = UseMI->getParent(); 174 if (UseMBB == MBB) { 175 // Local uses that come after the extension. 176 if (!LocalMIs.count(UseMI)) 177 Uses.push_back(&UseMO); 178 } else if (ReachedBBs.count(UseMBB)) { 179 // Non-local uses where the result of the extension is used. Always 180 // replace these unless it's a PHI. 181 Uses.push_back(&UseMO); 182 } else if (Aggressive && DT->dominates(MBB, UseMBB)) { 183 // We may want to extend the live range of the extension result in order 184 // to replace these uses. 185 ExtendedUses.push_back(&UseMO); 186 } else { 187 // Both will be live out of the def MBB anyway. Don't extend live range of 188 // the extension result. 189 ExtendLife = false; 190 break; 191 } 192 } 193 194 if (ExtendLife && !ExtendedUses.empty()) 195 // Extend the liveness of the extension result. 196 std::copy(ExtendedUses.begin(), ExtendedUses.end(), 197 std::back_inserter(Uses)); 198 199 // Now replace all uses. 200 bool Changed = false; 201 if (!Uses.empty()) { 202 SmallPtrSet<MachineBasicBlock*, 4> PHIBBs; 203 204 // Look for PHI uses of the extended result, we don't want to extend the 205 // liveness of a PHI input. It breaks all kinds of assumptions down 206 // stream. A PHI use is expected to be the kill of its source values. 207 UI = MRI->use_nodbg_begin(DstReg); 208 for (MachineRegisterInfo::use_nodbg_iterator 209 UE = MRI->use_nodbg_end(); UI != UE; ++UI) 210 if (UI->isPHI()) 211 PHIBBs.insert(UI->getParent()); 212 213 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); 214 for (unsigned i = 0, e = Uses.size(); i != e; ++i) { 215 MachineOperand *UseMO = Uses[i]; 216 MachineInstr *UseMI = UseMO->getParent(); 217 MachineBasicBlock *UseMBB = UseMI->getParent(); 218 if (PHIBBs.count(UseMBB)) 219 continue; 220 221 unsigned NewVR = MRI->createVirtualRegister(RC); 222 BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), 223 TII->get(TargetOpcode::COPY), NewVR) 224 .addReg(DstReg, 0, SubIdx); 225 226 UseMO->setReg(NewVR); 227 ++NumReuse; 228 Changed = true; 229 } 230 } 231 232 return Changed; 233} 234 235/// OptimizeCmpInstr - If the instruction is a compare and the previous 236/// instruction it's comparing against all ready sets (or could be modified to 237/// set) the same flag as the compare, then we can remove the comparison and use 238/// the flag from the previous instruction. 239bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI, 240 MachineBasicBlock *MBB) { 241 if (!EnableOptCmps) return false; 242 243 // If this instruction is a comparison against zero and isn't comparing a 244 // physical register, we can try to optimize it. 245 unsigned SrcReg; 246 int CmpValue; 247 if (!TII->AnalyzeCompare(MI, SrcReg, CmpValue) || 248 TargetRegisterInfo::isPhysicalRegister(SrcReg) || CmpValue != 0) 249 return false; 250 251 MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg); 252 if (llvm::next(DI) != MRI->def_end()) 253 // Only support one definition. 254 return false; 255 256 // Attempt to convert the defining instruction to set the "zero" flag. 257 if (TII->ConvertToSetZeroFlag(&*DI, MI)) { 258 ++NumEliminated; 259 return true; 260 } 261 262 return false; 263} 264 265bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { 266 TM = &MF.getTarget(); 267 TII = TM->getInstrInfo(); 268 MRI = &MF.getRegInfo(); 269 DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0; 270 271 bool Changed = false; 272 273 SmallPtrSet<MachineInstr*, 8> LocalMIs; 274 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { 275 MachineBasicBlock *MBB = &*I; 276 LocalMIs.clear(); 277 278 for (MachineBasicBlock::iterator 279 MII = I->begin(), ME = I->end(); MII != ME; ) { 280 MachineInstr *MI = &*MII; 281 282 if (MI->getDesc().isCompare()) { 283 ++MII; // The iterator may become invalid if the compare is deleted. 284 Changed |= OptimizeCmpInstr(MI, MBB); 285 } else { 286 Changed |= OptimizeExtInstr(MI, MBB, LocalMIs); 287 ++MII; 288 } 289 } 290 } 291 292 return Changed; 293} 294