TailDuplication.cpp revision e83c9b214205978873745b7368df84cf9f117996
1//===-- TailDuplication.cpp - Duplicate blocks into predecessors' tails ---===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass duplicates basic blocks ending in unconditional branches into 11// the tails of their predecessors. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "tailduplication" 16#include "llvm/Function.h" 17#include "llvm/CodeGen/Passes.h" 18#include "llvm/CodeGen/MachineModuleInfo.h" 19#include "llvm/CodeGen/MachineFunctionPass.h" 20#include "llvm/CodeGen/MachineRegisterInfo.h" 21#include "llvm/CodeGen/MachineSSAUpdater.h" 22#include "llvm/Target/TargetInstrInfo.h" 23#include "llvm/Support/CommandLine.h" 24#include "llvm/Support/Debug.h" 25#include "llvm/Support/raw_ostream.h" 26#include "llvm/ADT/SmallSet.h" 27#include "llvm/ADT/SetVector.h" 28#include "llvm/ADT/Statistic.h" 29using namespace llvm; 30 31STATISTIC(NumTailDups , "Number of tail duplicated blocks"); 32STATISTIC(NumInstrDups , "Additional instructions due to tail duplication"); 33STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); 34 35// Heuristic for tail duplication. 36static cl::opt<unsigned> 37TailDuplicateSize("tail-dup-size", 38 cl::desc("Maximum instructions to consider tail duplicating"), 39 cl::init(2), cl::Hidden); 40 41typedef std::vector<unsigned> AvailableValsTy; 42 43namespace { 44 /// TailDuplicatePass - Perform tail duplication. 45 class TailDuplicatePass : public MachineFunctionPass { 46 const TargetInstrInfo *TII; 47 MachineModuleInfo *MMI; 48 MachineRegisterInfo *MRI; 49 50 // SSAUpdateVRs - A list of virtual registers for which to update SSA form. 51 SmallVector<unsigned, 16> SSAUpdateVRs; 52 53 // SSAUpdateVals - For each virtual register in SSAUpdateVals keep a list of 54 // source virtual registers. 55 DenseMap<unsigned, AvailableValsTy> SSAUpdateVals; 56 57 public: 58 static char ID; 59 explicit TailDuplicatePass() : MachineFunctionPass(&ID) {} 60 61 virtual bool runOnMachineFunction(MachineFunction &MF); 62 virtual const char *getPassName() const { return "Tail Duplication"; } 63 64 private: 65 void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg); 66 bool TailDuplicateBlocks(MachineFunction &MF); 67 bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF); 68 void RemoveDeadBlock(MachineBasicBlock *MBB); 69 }; 70 71 char TailDuplicatePass::ID = 0; 72} 73 74FunctionPass *llvm::createTailDuplicatePass() { 75 return new TailDuplicatePass(); 76} 77 78bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { 79 TII = MF.getTarget().getInstrInfo(); 80 MRI = &MF.getRegInfo(); 81 MMI = getAnalysisIfAvailable<MachineModuleInfo>(); 82 83 bool MadeChange = false; 84 bool MadeChangeThisIteration = true; 85 while (MadeChangeThisIteration) { 86 MadeChangeThisIteration = false; 87 MadeChangeThisIteration |= TailDuplicateBlocks(MF); 88 MadeChange |= MadeChangeThisIteration; 89 } 90 91 return MadeChange; 92} 93 94/// TailDuplicateBlocks - Look for small blocks that are unconditionally 95/// branched to and do not fall through. Tail-duplicate their instructions 96/// into their predecessors to eliminate (dynamic) branches. 97bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { 98 bool MadeChange = false; 99 100 SSAUpdateVRs.clear(); 101 SSAUpdateVals.clear(); 102 103 for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { 104 MachineBasicBlock *MBB = I++; 105 106 // Only duplicate blocks that end with unconditional branches. 107 if (MBB->canFallThrough()) 108 continue; 109 110 MadeChange |= TailDuplicate(MBB, MF); 111 112 // If it is dead, remove it. 113 if (MBB->pred_empty()) { 114 NumInstrDups -= MBB->size(); 115 RemoveDeadBlock(MBB); 116 MadeChange = true; 117 ++NumDeadBlocks; 118 } 119 } 120 121 if (!SSAUpdateVRs.empty()) { 122 // Update SSA form. 123 MachineSSAUpdater SSAUpdate(MF); 124 125 for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { 126 unsigned VReg = SSAUpdateVRs[i]; 127 SSAUpdate.Initialize(VReg); 128 129 // If the original definition is still around, add it as an available 130 // value. 131 MachineInstr *DefMI = MRI->getVRegDef(VReg); 132 MachineBasicBlock *DefBB = 0; 133 if (DefMI) { 134 DefBB = DefMI->getParent(); 135 SSAUpdate.AddAvailableValue(DefBB, VReg); 136 } 137 138 // Add the new vregs as available values. 139 DenseMap<unsigned, AvailableValsTy>::iterator LI = 140 SSAUpdateVals.find(VReg); 141 for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { 142 unsigned NewReg = LI->second[j]; 143 MachineInstr *DefMI = MRI->getVRegDef(NewReg); 144 SSAUpdate.AddAvailableValue(DefMI->getParent(), NewReg); 145 } 146 147 // Rewrite uses that are outside of the original def's block. 148 for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg), 149 UE = MRI->use_end(); UI != UE; ++UI) { 150 MachineInstr *UseMI = &*UI; 151 if (UseMI->getParent() != DefBB) 152 SSAUpdate.RewriteUse(UI.getOperand()); 153 } 154 } 155 } 156 157 return MadeChange; 158} 159 160static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB, 161 const MachineRegisterInfo *MRI) { 162 for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), 163 UE = MRI->use_end(); UI != UE; ++UI) { 164 MachineInstr *UseMI = &*UI; 165 if (UseMI->getParent() != BB) 166 return true; 167 } 168 return false; 169} 170 171static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) { 172 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) 173 if (MI->getOperand(i+1).getMBB() == SrcBB) 174 return i; 175 return 0; 176} 177 178/// AddSSAUpdateEntry - Add a definition and source virtual registers pair for 179/// SSA update. 180void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg) { 181 DenseMap<unsigned, AvailableValsTy>::iterator LI = 182 SSAUpdateVals.find(OrigReg); 183 if (LI != SSAUpdateVals.end()) 184 LI->second.push_back(NewReg); 185 else { 186 AvailableValsTy Vals; 187 Vals.push_back(NewReg); 188 SSAUpdateVals.insert(std::make_pair(OrigReg, Vals)); 189 SSAUpdateVRs.push_back(OrigReg); 190 } 191} 192 193/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each 194/// of its predecessors. 195bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, 196 MachineFunction &MF) { 197 // Don't try to tail-duplicate single-block loops. 198 if (TailBB->isSuccessor(TailBB)) 199 return false; 200 201 // Set the limit on the number of instructions to duplicate, with a default 202 // of one less than the tail-merge threshold. When optimizing for size, 203 // duplicate only one, because one branch instruction can be eliminated to 204 // compensate for the duplication. 205 unsigned MaxDuplicateCount; 206 if (!TailBB->empty() && TailBB->back().getDesc().isIndirectBranch()) 207 // If the target has hardware branch prediction that can handle indirect 208 // branches, duplicating them can often make them predictable when there 209 // are common paths through the code. The limit needs to be high enough 210 // to allow undoing the effects of tail merging. 211 MaxDuplicateCount = 20; 212 else if (MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) 213 MaxDuplicateCount = 1; 214 else 215 MaxDuplicateCount = TailDuplicateSize; 216 217 // Check the instructions in the block to determine whether tail-duplication 218 // is invalid or unlikely to be profitable. 219 unsigned InstrCount = 0; 220 bool HasCall = false; 221 for (MachineBasicBlock::iterator I = TailBB->begin(); 222 I != TailBB->end(); ++I) { 223 // Non-duplicable things shouldn't be tail-duplicated. 224 if (I->getDesc().isNotDuplicable()) return false; 225 // Don't duplicate more than the threshold. 226 if (InstrCount == MaxDuplicateCount) return false; 227 // Remember if we saw a call. 228 if (I->getDesc().isCall()) HasCall = true; 229 if (I->getOpcode() != TargetInstrInfo::PHI) 230 InstrCount += 1; 231 } 232 // Heuristically, don't tail-duplicate calls if it would expand code size, 233 // as it's less likely to be worth the extra cost. 234 if (InstrCount > 1 && HasCall) 235 return false; 236 237 // Iterate through all the unique predecessors and tail-duplicate this 238 // block into them, if possible. Copying the list ahead of time also 239 // avoids trouble with the predecessor list reallocating. 240 bool Changed = false; 241 SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(), 242 TailBB->pred_end()); 243 for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), 244 PE = Preds.end(); PI != PE; ++PI) { 245 MachineBasicBlock *PredBB = *PI; 246 247 assert(TailBB != PredBB && 248 "Single-block loop should have been rejected earlier!"); 249 if (PredBB->succ_size() > 1) continue; 250 251 MachineBasicBlock *PredTBB, *PredFBB; 252 SmallVector<MachineOperand, 4> PredCond; 253 if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) 254 continue; 255 if (!PredCond.empty()) 256 continue; 257 // EH edges are ignored by AnalyzeBranch. 258 if (PredBB->succ_size() != 1) 259 continue; 260 // Don't duplicate into a fall-through predecessor (at least for now). 261 if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) 262 continue; 263 264 DEBUG(errs() << "\nTail-duplicating into PredBB: " << *PredBB 265 << "From Succ: " << *TailBB); 266 267 // Remove PredBB's unconditional branch. 268 TII->RemoveBranch(*PredBB); 269 270 // Clone the contents of TailBB into PredBB. 271 DenseMap<unsigned, unsigned> LocalVRMap; 272 MachineBasicBlock::iterator I = TailBB->begin(); 273 MachineBasicBlock::iterator NI; 274 for (MachineBasicBlock::iterator E = TailBB->end(); I != E; I = NI) { 275 NI = next(I); 276 if (I->getOpcode() == TargetInstrInfo::PHI) { 277 // Replace the uses of the def of the PHI with the register coming 278 // from PredBB. 279 unsigned DefReg = I->getOperand(0).getReg(); 280 unsigned SrcOpIdx = getPHISrcRegOpIdx(I, PredBB); 281 unsigned SrcReg = I->getOperand(SrcOpIdx).getReg(); 282 LocalVRMap.insert(std::make_pair(DefReg, SrcReg)); 283 if (isDefLiveOut(DefReg, TailBB, MRI)) 284 AddSSAUpdateEntry(DefReg, SrcReg); 285 286 // Remove PredBB from the PHI node. 287 I->RemoveOperand(SrcOpIdx+1); 288 I->RemoveOperand(SrcOpIdx); 289 if (I->getNumOperands() == 1) 290 I->eraseFromParent(); 291 continue; 292 } 293 294 // Replace def of virtual registers with new registers, and update uses 295 // with PHI source register or the new registers. 296 MachineInstr *NewMI = MF.CloneMachineInstr(I); 297 for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { 298 MachineOperand &MO = NewMI->getOperand(i); 299 if (!MO.isReg()) 300 continue; 301 unsigned Reg = MO.getReg(); 302 if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) 303 continue; 304 if (MO.isDef()) { 305 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 306 unsigned NewReg = MRI->createVirtualRegister(RC); 307 MO.setReg(NewReg); 308 LocalVRMap.insert(std::make_pair(Reg, NewReg)); 309 if (isDefLiveOut(Reg, TailBB, MRI)) 310 AddSSAUpdateEntry(Reg, NewReg); 311 } else { 312 DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg); 313 if (VI != LocalVRMap.end()) 314 MO.setReg(VI->second); 315 } 316 } 317 PredBB->insert(PredBB->end(), NewMI); 318 } 319 NumInstrDups += TailBB->size() - 1; // subtract one for removed branch 320 321 // Update the CFG. 322 PredBB->removeSuccessor(PredBB->succ_begin()); 323 assert(PredBB->succ_empty() && 324 "TailDuplicate called on block with multiple successors!"); 325 for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), 326 E = TailBB->succ_end(); I != E; ++I) 327 PredBB->addSuccessor(*I); 328 329 Changed = true; 330 ++NumTailDups; 331 } 332 333 // If TailBB was duplicated into all its predecessors except for the prior 334 // block, which falls through unconditionally, move the contents of this 335 // block into the prior block. 336 MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(TailBB)); 337 MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; 338 SmallVector<MachineOperand, 4> PriorCond; 339 bool PriorUnAnalyzable = 340 TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true); 341 // This has to check PrevBB->succ_size() because EH edges are ignored by 342 // AnalyzeBranch. 343 if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB && 344 TailBB->pred_size() == 1 && PrevBB.succ_size() == 1 && 345 !TailBB->hasAddressTaken()) { 346 DEBUG(errs() << "\nMerging into block: " << PrevBB 347 << "From MBB: " << *TailBB); 348 PrevBB.splice(PrevBB.end(), TailBB, TailBB->begin(), TailBB->end()); 349 PrevBB.removeSuccessor(PrevBB.succ_begin());; 350 assert(PrevBB.succ_empty()); 351 PrevBB.transferSuccessors(TailBB); 352 Changed = true; 353 } 354 355 return Changed; 356} 357 358/// RemoveDeadBlock - Remove the specified dead machine basic block from the 359/// function, updating the CFG. 360void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) { 361 assert(MBB->pred_empty() && "MBB must be dead!"); 362 DEBUG(errs() << "\nRemoving MBB: " << *MBB); 363 364 // Remove all successors. 365 while (!MBB->succ_empty()) 366 MBB->removeSuccessor(MBB->succ_end()-1); 367 368 // If there are any labels in the basic block, unregister them from 369 // MachineModuleInfo. 370 if (MMI && !MBB->empty()) { 371 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); 372 I != E; ++I) { 373 if (I->isLabel()) 374 // The label ID # is always operand #0, an immediate. 375 MMI->InvalidateLabel(I->getOperand(0).getImm()); 376 } 377 } 378 379 // Remove the block. 380 MBB->eraseFromParent(); 381} 382 383