PeepholeOptimizer.cpp revision 326d976eb2b03d1f2b7537d7b90dff264fd84378
1//===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Perform peephole optimizations on the machine code: 11// 12// - Optimize Extensions 13// 14// Optimization of sign / zero extension instructions. It may be extended to 15// handle other instructions with similar properties. 16// 17// On some targets, some instructions, e.g. X86 sign / zero extension, may 18// leave the source value in the lower part of the result. This optimization 19// will replace some uses of the pre-extension value with uses of the 20// sub-register of the results. 21// 22// - Optimize Comparisons 23// 24// Optimization of comparison instructions. For instance, in this code: 25// 26// sub r1, 1 27// cmp r1, 0 28// bz L1 29// 30// If the "sub" instruction all ready sets (or could be modified to set) the 31// same flag that the "cmp" instruction sets and that "bz" uses, then we can 32// eliminate the "cmp" instruction. 33// 34//===----------------------------------------------------------------------===// 35 36#define DEBUG_TYPE "peephole-opt" 37#include "llvm/CodeGen/Passes.h" 38#include "llvm/CodeGen/MachineDominators.h" 39#include "llvm/CodeGen/MachineInstrBuilder.h" 40#include "llvm/CodeGen/MachineRegisterInfo.h" 41#include "llvm/Target/TargetInstrInfo.h" 42#include "llvm/Target/TargetRegisterInfo.h" 43#include "llvm/Support/CommandLine.h" 44#include "llvm/ADT/DenseMap.h" 45#include "llvm/ADT/SmallPtrSet.h" 46#include "llvm/ADT/SmallSet.h" 47#include "llvm/ADT/Statistic.h" 48using namespace llvm; 49 50// Optimize Extensions 51static cl::opt<bool> 52Aggressive("aggressive-ext-opt", cl::Hidden, 53 cl::desc("Aggressive extension optimization")); 54 55static cl::opt<bool> 56DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), 57 cl::desc("Disable the peephole optimizer")); 58 59STATISTIC(NumReuse, "Number of extension results reused"); 60STATISTIC(NumEliminated, "Number of compares eliminated"); 61STATISTIC(NumImmFold, "Number of move immediate foled"); 62 63namespace { 64 class PeepholeOptimizer : public MachineFunctionPass { 65 const TargetMachine *TM; 66 const TargetInstrInfo *TII; 67 MachineRegisterInfo *MRI; 68 MachineDominatorTree *DT; // Machine dominator tree 69 70 public: 71 static char ID; // Pass identification 72 PeepholeOptimizer() : MachineFunctionPass(ID) { 73 initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry()); 74 } 75 76 virtual bool runOnMachineFunction(MachineFunction &MF); 77 78 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 79 AU.setPreservesCFG(); 80 MachineFunctionPass::getAnalysisUsage(AU); 81 if (Aggressive) { 82 AU.addRequired<MachineDominatorTree>(); 83 AU.addPreserved<MachineDominatorTree>(); 84 } 85 } 86 87 private: 88 bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); 89 bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, 90 SmallPtrSet<MachineInstr*, 8> &LocalMIs); 91 bool isMoveImmediate(MachineInstr *MI, 92 SmallSet<unsigned, 4> &ImmDefRegs, 93 DenseMap<unsigned, MachineInstr*> &ImmDefMIs); 94 bool FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, 95 SmallSet<unsigned, 4> &ImmDefRegs, 96 DenseMap<unsigned, MachineInstr*> &ImmDefMIs); 97 }; 98} 99 100char PeepholeOptimizer::ID = 0; 101INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts", 102 "Peephole Optimizations", false, false) 103INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 104INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts", 105 "Peephole Optimizations", false, false) 106 107FunctionPass *llvm::createPeepholeOptimizerPass() { 108 return new PeepholeOptimizer(); 109} 110 111/// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads 112/// a single register and writes a single register and it does not modify the 113/// source, and if the source value is preserved as a sub-register of the 114/// result, then replace all reachable uses of the source with the subreg of the 115/// result. 116/// 117/// Do not generate an EXTRACT that is used only in a debug use, as this changes 118/// the code. Since this code does not currently share EXTRACTs, just ignore all 119/// debug uses. 120bool PeepholeOptimizer:: 121OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, 122 SmallPtrSet<MachineInstr*, 8> &LocalMIs) { 123 unsigned SrcReg, DstReg, SubIdx; 124 if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) 125 return false; 126 127 if (TargetRegisterInfo::isPhysicalRegister(DstReg) || 128 TargetRegisterInfo::isPhysicalRegister(SrcReg)) 129 return false; 130 131 MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg); 132 if (++UI == MRI->use_nodbg_end()) 133 // No other uses. 134 return false; 135 136 // The source has other uses. See if we can replace the other uses with use of 137 // the result of the extension. 138 SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; 139 UI = MRI->use_nodbg_begin(DstReg); 140 for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); 141 UI != UE; ++UI) 142 ReachedBBs.insert(UI->getParent()); 143 144 // Uses that are in the same BB of uses of the result of the instruction. 145 SmallVector<MachineOperand*, 8> Uses; 146 147 // Uses that the result of the instruction can reach. 148 SmallVector<MachineOperand*, 8> ExtendedUses; 149 150 bool ExtendLife = true; 151 UI = MRI->use_nodbg_begin(SrcReg); 152 for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); 153 UI != UE; ++UI) { 154 MachineOperand &UseMO = UI.getOperand(); 155 MachineInstr *UseMI = &*UI; 156 if (UseMI == MI) 157 continue; 158 159 if (UseMI->isPHI()) { 160 ExtendLife = false; 161 continue; 162 } 163 164 // It's an error to translate this: 165 // 166 // %reg1025 = <sext> %reg1024 167 // ... 168 // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4 169 // 170 // into this: 171 // 172 // %reg1025 = <sext> %reg1024 173 // ... 174 // %reg1027 = COPY %reg1025:4 175 // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4 176 // 177 // The problem here is that SUBREG_TO_REG is there to assert that an 178 // implicit zext occurs. It doesn't insert a zext instruction. If we allow 179 // the COPY here, it will give us the value after the <sext>, not the 180 // original value of %reg1024 before <sext>. 181 if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) 182 continue; 183 184 MachineBasicBlock *UseMBB = UseMI->getParent(); 185 if (UseMBB == MBB) { 186 // Local uses that come after the extension. 187 if (!LocalMIs.count(UseMI)) 188 Uses.push_back(&UseMO); 189 } else if (ReachedBBs.count(UseMBB)) { 190 // Non-local uses where the result of the extension is used. Always 191 // replace these unless it's a PHI. 192 Uses.push_back(&UseMO); 193 } else if (Aggressive && DT->dominates(MBB, UseMBB)) { 194 // We may want to extend the live range of the extension result in order 195 // to replace these uses. 196 ExtendedUses.push_back(&UseMO); 197 } else { 198 // Both will be live out of the def MBB anyway. Don't extend live range of 199 // the extension result. 200 ExtendLife = false; 201 break; 202 } 203 } 204 205 if (ExtendLife && !ExtendedUses.empty()) 206 // Extend the liveness of the extension result. 207 std::copy(ExtendedUses.begin(), ExtendedUses.end(), 208 std::back_inserter(Uses)); 209 210 // Now replace all uses. 211 bool Changed = false; 212 if (!Uses.empty()) { 213 SmallPtrSet<MachineBasicBlock*, 4> PHIBBs; 214 215 // Look for PHI uses of the extended result, we don't want to extend the 216 // liveness of a PHI input. It breaks all kinds of assumptions down 217 // stream. A PHI use is expected to be the kill of its source values. 218 UI = MRI->use_nodbg_begin(DstReg); 219 for (MachineRegisterInfo::use_nodbg_iterator 220 UE = MRI->use_nodbg_end(); UI != UE; ++UI) 221 if (UI->isPHI()) 222 PHIBBs.insert(UI->getParent()); 223 224 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); 225 for (unsigned i = 0, e = Uses.size(); i != e; ++i) { 226 MachineOperand *UseMO = Uses[i]; 227 MachineInstr *UseMI = UseMO->getParent(); 228 MachineBasicBlock *UseMBB = UseMI->getParent(); 229 if (PHIBBs.count(UseMBB)) 230 continue; 231 232 unsigned NewVR = MRI->createVirtualRegister(RC); 233 BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), 234 TII->get(TargetOpcode::COPY), NewVR) 235 .addReg(DstReg, 0, SubIdx); 236 237 UseMO->setReg(NewVR); 238 ++NumReuse; 239 Changed = true; 240 } 241 } 242 243 return Changed; 244} 245 246/// OptimizeCmpInstr - If the instruction is a compare and the previous 247/// instruction it's comparing against all ready sets (or could be modified to 248/// set) the same flag as the compare, then we can remove the comparison and use 249/// the flag from the previous instruction. 250bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI, 251 MachineBasicBlock *MBB){ 252 // If this instruction is a comparison against zero and isn't comparing a 253 // physical register, we can try to optimize it. 254 unsigned SrcReg; 255 int CmpMask, CmpValue; 256 if (!TII->AnalyzeCompare(MI, SrcReg, CmpMask, CmpValue) || 257 TargetRegisterInfo::isPhysicalRegister(SrcReg)) 258 return false; 259 260 // Attempt to optimize the comparison instruction. 261 if (TII->OptimizeCompareInstr(MI, SrcReg, CmpMask, CmpValue, MRI)) { 262 ++NumEliminated; 263 return true; 264 } 265 266 return false; 267} 268 269bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, 270 SmallSet<unsigned, 4> &ImmDefRegs, 271 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { 272 const TargetInstrDesc &TID = MI->getDesc(); 273 if (!TID.isMoveImmediate()) 274 return false; 275 if (TID.getNumDefs() != 1) 276 return false; 277 unsigned Reg = MI->getOperand(0).getReg(); 278 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 279 ImmDefMIs.insert(std::make_pair(Reg, MI)); 280 ImmDefRegs.insert(Reg); 281 return true; 282 } 283 284 return false; 285} 286 287/// FoldImmediate - Try folding register operands that are defined by move 288/// immediate instructions, i.e. a trivial constant folding optimization, if 289/// and only if the def and use are in the same BB. 290bool PeepholeOptimizer::FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, 291 SmallSet<unsigned, 4> &ImmDefRegs, 292 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { 293 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 294 MachineOperand &MO = MI->getOperand(i); 295 if (!MO.isReg() || MO.isDef()) 296 continue; 297 unsigned Reg = MO.getReg(); 298 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 299 continue; 300 if (ImmDefRegs.count(Reg) == 0) 301 continue; 302 DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg); 303 assert(II != ImmDefMIs.end()); 304 if (TII->FoldImmediate(MI, II->second, Reg, MRI)) { 305 ++NumImmFold; 306 return true; 307 } 308 } 309 return false; 310} 311 312bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { 313 if (DisablePeephole) 314 return false; 315 316 TM = &MF.getTarget(); 317 TII = TM->getInstrInfo(); 318 MRI = &MF.getRegInfo(); 319 DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0; 320 321 bool Changed = false; 322 323 SmallPtrSet<MachineInstr*, 8> LocalMIs; 324 SmallSet<unsigned, 4> ImmDefRegs; 325 DenseMap<unsigned, MachineInstr*> ImmDefMIs; 326 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { 327 MachineBasicBlock *MBB = &*I; 328 329 bool SeenMoveImm = false; 330 LocalMIs.clear(); 331 ImmDefRegs.clear(); 332 ImmDefMIs.clear(); 333 334 bool First = true; 335 MachineBasicBlock::iterator PMII; 336 for (MachineBasicBlock::iterator 337 MII = I->begin(), MIE = I->end(); MII != MIE; ) { 338 MachineInstr *MI = &*MII; 339 LocalMIs.insert(MI); 340 341 if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || 342 MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() || 343 MI->hasUnmodeledSideEffects()) { 344 ++MII; 345 continue; 346 } 347 348 if (MI->getDesc().isCompare()) { 349 if (OptimizeCmpInstr(MI, MBB)) { 350 // MI is deleted. 351 Changed = true; 352 MII = First ? I->begin() : llvm::next(PMII); 353 continue; 354 } 355 } 356 357 if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { 358 SeenMoveImm = true; 359 } else { 360 Changed |= OptimizeExtInstr(MI, MBB, LocalMIs); 361 if (SeenMoveImm) 362 Changed |= FoldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); 363 } 364 365 First = false; 366 PMII = MII; 367 ++MII; 368 } 369 } 370 371 return Changed; 372} 373