TailDuplication.cpp revision 289a27946fc11ae6b5a9aa517639d94efa4d91d9
1//===-- TailDuplication.cpp - Duplicate blocks into predecessors' tails ---===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass duplicates basic blocks ending in unconditional branches into 11// the tails of their predecessors. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "tailduplication" 16#include "llvm/Function.h" 17#include "llvm/CodeGen/Passes.h" 18#include "llvm/CodeGen/MachineModuleInfo.h" 19#include "llvm/CodeGen/MachineFunctionPass.h" 20#include "llvm/CodeGen/MachineInstrBuilder.h" 21#include "llvm/CodeGen/MachineRegisterInfo.h" 22#include "llvm/CodeGen/MachineSSAUpdater.h" 23#include "llvm/Target/TargetInstrInfo.h" 24#include "llvm/Support/CommandLine.h" 25#include "llvm/Support/Debug.h" 26#include "llvm/Support/ErrorHandling.h" 27#include "llvm/Support/raw_ostream.h" 28#include "llvm/ADT/SmallSet.h" 29#include "llvm/ADT/SetVector.h" 30#include "llvm/ADT/Statistic.h" 31using namespace llvm; 32 33STATISTIC(NumTails , "Number of tails duplicated"); 34STATISTIC(NumTailDups , "Number of tail duplicated blocks"); 35STATISTIC(NumInstrDups , "Additional instructions due to tail duplication"); 36STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); 37STATISTIC(NumAddedPHIs , "Number of phis added"); 38 39// Heuristic for tail duplication. 40static cl::opt<unsigned> 41TailDuplicateSize("tail-dup-size", 42 cl::desc("Maximum instructions to consider tail duplicating"), 43 cl::init(2), cl::Hidden); 44 45static cl::opt<bool> 46TailDupVerify("tail-dup-verify", 47 cl::desc("Verify sanity of PHI instructions during taildup"), 48 cl::init(false), cl::Hidden); 49 50static cl::opt<unsigned> 51TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden); 52 53typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy; 54 55namespace { 56 /// TailDuplicatePass - Perform tail duplication. 57 class TailDuplicatePass : public MachineFunctionPass { 58 bool PreRegAlloc; 59 const TargetInstrInfo *TII; 60 MachineModuleInfo *MMI; 61 MachineRegisterInfo *MRI; 62 63 // SSAUpdateVRs - A list of virtual registers for which to update SSA form. 64 SmallVector<unsigned, 16> SSAUpdateVRs; 65 66 // SSAUpdateVals - For each virtual register in SSAUpdateVals keep a list of 67 // source virtual registers. 68 DenseMap<unsigned, AvailableValsTy> SSAUpdateVals; 69 70 public: 71 static char ID; 72 explicit TailDuplicatePass(bool PreRA) : 73 MachineFunctionPass(ID), PreRegAlloc(PreRA) {} 74 75 virtual bool runOnMachineFunction(MachineFunction &MF); 76 virtual const char *getPassName() const { return "Tail Duplication"; } 77 78 private: 79 void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, 80 MachineBasicBlock *BB); 81 void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB, 82 MachineBasicBlock *PredBB, 83 DenseMap<unsigned, unsigned> &LocalVRMap, 84 SmallVector<std::pair<unsigned,unsigned>, 4> &Copies, 85 const DenseSet<unsigned> &UsedByPhi, 86 bool Remove); 87 void DuplicateInstruction(MachineInstr *MI, 88 MachineBasicBlock *TailBB, 89 MachineBasicBlock *PredBB, 90 MachineFunction &MF, 91 DenseMap<unsigned, unsigned> &LocalVRMap, 92 const DenseSet<unsigned> &UsedByPhi); 93 void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, 94 SmallVector<MachineBasicBlock*, 8> &TDBBs, 95 SmallSetVector<MachineBasicBlock*, 8> &Succs); 96 bool TailDuplicateBlocks(MachineFunction &MF); 97 bool shouldTailDuplicate(const MachineFunction &MF, 98 MachineBasicBlock &TailBB); 99 bool isSimpleBB(MachineBasicBlock *TailBB); 100 bool canCompletelyDuplicateSimpleBB(MachineBasicBlock &BB); 101 bool duplicateSimpleBB(MachineBasicBlock *TailBB, 102 SmallVector<MachineBasicBlock*, 8> &TDBBs, 103 const DenseSet<unsigned> &RegsUsedByPhi, 104 SmallVector<MachineInstr*, 16> &Copies); 105 bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, 106 SmallVector<MachineBasicBlock*, 8> &TDBBs, 107 SmallVector<MachineInstr*, 16> &Copies); 108 void RemoveDeadBlock(MachineBasicBlock *MBB); 109 }; 110 111 char TailDuplicatePass::ID = 0; 112} 113 114FunctionPass *llvm::createTailDuplicatePass(bool PreRegAlloc) { 115 return new TailDuplicatePass(PreRegAlloc); 116} 117 118bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { 119 TII = MF.getTarget().getInstrInfo(); 120 MRI = &MF.getRegInfo(); 121 MMI = getAnalysisIfAvailable<MachineModuleInfo>(); 122 123 bool MadeChange = false; 124 while (TailDuplicateBlocks(MF)) 125 MadeChange = true; 126 127 return MadeChange; 128} 129 130static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { 131 for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) { 132 MachineBasicBlock *MBB = I; 133 SmallSetVector<MachineBasicBlock*, 8> Preds(MBB->pred_begin(), 134 MBB->pred_end()); 135 MachineBasicBlock::iterator MI = MBB->begin(); 136 while (MI != MBB->end()) { 137 if (!MI->isPHI()) 138 break; 139 for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), 140 PE = Preds.end(); PI != PE; ++PI) { 141 MachineBasicBlock *PredBB = *PI; 142 bool Found = false; 143 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { 144 MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB(); 145 if (PHIBB == PredBB) { 146 Found = true; 147 break; 148 } 149 } 150 if (!Found) { 151 dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; 152 dbgs() << " missing input from predecessor BB#" 153 << PredBB->getNumber() << '\n'; 154 llvm_unreachable(0); 155 } 156 } 157 158 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { 159 MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB(); 160 if (CheckExtra && !Preds.count(PHIBB)) { 161 dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber() 162 << ": " << *MI; 163 dbgs() << " extra input from predecessor BB#" 164 << PHIBB->getNumber() << '\n'; 165 llvm_unreachable(0); 166 } 167 if (PHIBB->getNumber() < 0) { 168 dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; 169 dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n'; 170 llvm_unreachable(0); 171 } 172 } 173 ++MI; 174 } 175 } 176} 177 178/// TailDuplicateBlocks - Look for small blocks that are unconditionally 179/// branched to and do not fall through. Tail-duplicate their instructions 180/// into their predecessors to eliminate (dynamic) branches. 181bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { 182 bool MadeChange = false; 183 184 if (PreRegAlloc && TailDupVerify) { 185 DEBUG(dbgs() << "\n*** Before tail-duplicating\n"); 186 VerifyPHIs(MF, true); 187 } 188 189 SmallVector<MachineInstr*, 8> NewPHIs; 190 MachineSSAUpdater SSAUpdate(MF, &NewPHIs); 191 192 for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { 193 MachineBasicBlock *MBB = I++; 194 195 if (NumTails == TailDupLimit) 196 break; 197 198 // Save the successors list. 199 SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(), 200 MBB->succ_end()); 201 202 SmallVector<MachineBasicBlock*, 8> TDBBs; 203 SmallVector<MachineInstr*, 16> Copies; 204 if (TailDuplicate(MBB, MF, TDBBs, Copies)) { 205 ++NumTails; 206 207 // TailBB's immediate successors are now successors of those predecessors 208 // which duplicated TailBB. Add the predecessors as sources to the PHI 209 // instructions. 210 bool isDead = MBB->pred_empty(); 211 if (PreRegAlloc) 212 UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs); 213 214 // If it is dead, remove it. 215 if (isDead) { 216 NumInstrDups -= MBB->size(); 217 RemoveDeadBlock(MBB); 218 ++NumDeadBlocks; 219 } 220 221 // Update SSA form. 222 if (!SSAUpdateVRs.empty()) { 223 for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { 224 unsigned VReg = SSAUpdateVRs[i]; 225 SSAUpdate.Initialize(VReg); 226 227 // If the original definition is still around, add it as an available 228 // value. 229 MachineInstr *DefMI = MRI->getVRegDef(VReg); 230 MachineBasicBlock *DefBB = 0; 231 if (DefMI) { 232 DefBB = DefMI->getParent(); 233 SSAUpdate.AddAvailableValue(DefBB, VReg); 234 } 235 236 // Add the new vregs as available values. 237 DenseMap<unsigned, AvailableValsTy>::iterator LI = 238 SSAUpdateVals.find(VReg); 239 for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { 240 MachineBasicBlock *SrcBB = LI->second[j].first; 241 unsigned SrcReg = LI->second[j].second; 242 SSAUpdate.AddAvailableValue(SrcBB, SrcReg); 243 } 244 245 // Rewrite uses that are outside of the original def's block. 246 MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); 247 while (UI != MRI->use_end()) { 248 MachineOperand &UseMO = UI.getOperand(); 249 MachineInstr *UseMI = &*UI; 250 ++UI; 251 if (UseMI->isDebugValue()) { 252 // SSAUpdate can replace the use with an undef. That creates 253 // a debug instruction that is a kill. 254 // FIXME: Should it SSAUpdate job to delete debug instructions 255 // instead of replacing the use with undef? 256 UseMI->eraseFromParent(); 257 continue; 258 } 259 if (UseMI->getParent() == DefBB && !UseMI->isPHI()) 260 continue; 261 SSAUpdate.RewriteUse(UseMO); 262 } 263 } 264 265 SSAUpdateVRs.clear(); 266 SSAUpdateVals.clear(); 267 } 268 269 // Eliminate some of the copies inserted by tail duplication to maintain 270 // SSA form. 271 for (unsigned i = 0, e = Copies.size(); i != e; ++i) { 272 MachineInstr *Copy = Copies[i]; 273 if (!Copy->isCopy()) 274 continue; 275 unsigned Dst = Copy->getOperand(0).getReg(); 276 unsigned Src = Copy->getOperand(1).getReg(); 277 MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src); 278 if (++UI == MRI->use_end()) { 279 // Copy is the only use. Do trivial copy propagation here. 280 MRI->replaceRegWith(Dst, Src); 281 Copy->eraseFromParent(); 282 } 283 } 284 285 if (PreRegAlloc && TailDupVerify) 286 VerifyPHIs(MF, false); 287 MadeChange = true; 288 } 289 } 290 NumAddedPHIs += NewPHIs.size(); 291 292 return MadeChange; 293} 294 295static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB, 296 const MachineRegisterInfo *MRI) { 297 for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), 298 UE = MRI->use_end(); UI != UE; ++UI) { 299 MachineInstr *UseMI = &*UI; 300 if (UseMI->isDebugValue()) 301 continue; 302 if (UseMI->getParent() != BB) 303 return true; 304 } 305 return false; 306} 307 308static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) { 309 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) 310 if (MI->getOperand(i+1).getMBB() == SrcBB) 311 return i; 312 return 0; 313} 314 315 316// Remember which registers are used by phis in this block. This is 317// used to determine which registers are liveout while modifying the 318// block (which is why we need to copy the information). 319static void getRegsUsedByPHIs(const MachineBasicBlock &BB, 320 DenseSet<unsigned> *UsedByPhi) { 321 for(MachineBasicBlock::const_iterator I = BB.begin(), E = BB.end(); 322 I != E; ++I) { 323 const MachineInstr &MI = *I; 324 if (!MI.isPHI()) 325 break; 326 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { 327 unsigned SrcReg = MI.getOperand(i).getReg(); 328 UsedByPhi->insert(SrcReg); 329 } 330 } 331} 332 333/// AddSSAUpdateEntry - Add a definition and source virtual registers pair for 334/// SSA update. 335void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, 336 MachineBasicBlock *BB) { 337 DenseMap<unsigned, AvailableValsTy>::iterator LI= SSAUpdateVals.find(OrigReg); 338 if (LI != SSAUpdateVals.end()) 339 LI->second.push_back(std::make_pair(BB, NewReg)); 340 else { 341 AvailableValsTy Vals; 342 Vals.push_back(std::make_pair(BB, NewReg)); 343 SSAUpdateVals.insert(std::make_pair(OrigReg, Vals)); 344 SSAUpdateVRs.push_back(OrigReg); 345 } 346} 347 348/// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB. 349/// Remember the source register that's contributed by PredBB and update SSA 350/// update map. 351void TailDuplicatePass::ProcessPHI(MachineInstr *MI, 352 MachineBasicBlock *TailBB, 353 MachineBasicBlock *PredBB, 354 DenseMap<unsigned, unsigned> &LocalVRMap, 355 SmallVector<std::pair<unsigned,unsigned>, 4> &Copies, 356 const DenseSet<unsigned> &RegsUsedByPhi, 357 bool Remove) { 358 unsigned DefReg = MI->getOperand(0).getReg(); 359 unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB); 360 assert(SrcOpIdx && "Unable to find matching PHI source?"); 361 unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg(); 362 const TargetRegisterClass *RC = MRI->getRegClass(DefReg); 363 LocalVRMap.insert(std::make_pair(DefReg, SrcReg)); 364 365 // Insert a copy from source to the end of the block. The def register is the 366 // available value liveout of the block. 367 unsigned NewDef = MRI->createVirtualRegister(RC); 368 Copies.push_back(std::make_pair(NewDef, SrcReg)); 369 if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg)) 370 AddSSAUpdateEntry(DefReg, NewDef, PredBB); 371 372 if (!Remove) 373 return; 374 375 // Remove PredBB from the PHI node. 376 MI->RemoveOperand(SrcOpIdx+1); 377 MI->RemoveOperand(SrcOpIdx); 378 if (MI->getNumOperands() == 1) 379 MI->eraseFromParent(); 380} 381 382/// DuplicateInstruction - Duplicate a TailBB instruction to PredBB and update 383/// the source operands due to earlier PHI translation. 384void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, 385 MachineBasicBlock *TailBB, 386 MachineBasicBlock *PredBB, 387 MachineFunction &MF, 388 DenseMap<unsigned, unsigned> &LocalVRMap, 389 const DenseSet<unsigned> &UsedByPhi) { 390 MachineInstr *NewMI = TII->duplicate(MI, MF); 391 for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { 392 MachineOperand &MO = NewMI->getOperand(i); 393 if (!MO.isReg()) 394 continue; 395 unsigned Reg = MO.getReg(); 396 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 397 continue; 398 if (MO.isDef()) { 399 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 400 unsigned NewReg = MRI->createVirtualRegister(RC); 401 MO.setReg(NewReg); 402 LocalVRMap.insert(std::make_pair(Reg, NewReg)); 403 if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg)) 404 AddSSAUpdateEntry(Reg, NewReg, PredBB); 405 } else { 406 DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg); 407 if (VI != LocalVRMap.end()) 408 MO.setReg(VI->second); 409 } 410 } 411 PredBB->insert(PredBB->end(), NewMI); 412} 413 414/// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor 415/// blocks, the successors have gained new predecessors. Update the PHI 416/// instructions in them accordingly. 417void 418TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, 419 SmallVector<MachineBasicBlock*, 8> &TDBBs, 420 SmallSetVector<MachineBasicBlock*,8> &Succs) { 421 for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(), 422 SE = Succs.end(); SI != SE; ++SI) { 423 MachineBasicBlock *SuccBB = *SI; 424 for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end(); 425 II != EE; ++II) { 426 if (!II->isPHI()) 427 break; 428 unsigned Idx = 0; 429 for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) { 430 MachineOperand &MO = II->getOperand(i+1); 431 if (MO.getMBB() == FromBB) { 432 Idx = i; 433 break; 434 } 435 } 436 437 assert(Idx != 0); 438 MachineOperand &MO0 = II->getOperand(Idx); 439 unsigned Reg = MO0.getReg(); 440 if (isDead) { 441 // Folded into the previous BB. 442 // There could be duplicate phi source entries. FIXME: Should sdisel 443 // or earlier pass fixed this? 444 for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) { 445 MachineOperand &MO = II->getOperand(i+1); 446 if (MO.getMBB() == FromBB) { 447 II->RemoveOperand(i+1); 448 II->RemoveOperand(i); 449 } 450 } 451 } else 452 Idx = 0; 453 454 // If Idx is set, the operands at Idx and Idx+1 must be removed. 455 // We reuse the location to avoid expensive RemoveOperand calls. 456 457 DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg); 458 if (LI != SSAUpdateVals.end()) { 459 // This register is defined in the tail block. 460 for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { 461 MachineBasicBlock *SrcBB = LI->second[j].first; 462 // If we didn't duplicate a bb into a particular predecessor, we 463 // might still have added an entry to SSAUpdateVals to correcly 464 // recompute SSA. If that case, avoid adding a dummy extra argument 465 // this PHI. 466 if (!SrcBB->isSuccessor(SuccBB)) 467 continue; 468 469 unsigned SrcReg = LI->second[j].second; 470 if (Idx != 0) { 471 II->getOperand(Idx).setReg(SrcReg); 472 II->getOperand(Idx+1).setMBB(SrcBB); 473 Idx = 0; 474 } else { 475 II->addOperand(MachineOperand::CreateReg(SrcReg, false)); 476 II->addOperand(MachineOperand::CreateMBB(SrcBB)); 477 } 478 } 479 } else { 480 // Live in tail block, must also be live in predecessors. 481 for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) { 482 MachineBasicBlock *SrcBB = TDBBs[j]; 483 if (Idx != 0) { 484 II->getOperand(Idx).setReg(Reg); 485 II->getOperand(Idx+1).setMBB(SrcBB); 486 Idx = 0; 487 } else { 488 II->addOperand(MachineOperand::CreateReg(Reg, false)); 489 II->addOperand(MachineOperand::CreateMBB(SrcBB)); 490 } 491 } 492 } 493 if (Idx != 0) { 494 II->RemoveOperand(Idx+1); 495 II->RemoveOperand(Idx); 496 } 497 } 498 } 499} 500 501/// shouldTailDuplicate - Determine if it is profitable to duplicate this block. 502bool 503TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, 504 MachineBasicBlock &TailBB) { 505 // Only duplicate blocks that end with unconditional branches. 506 if (TailBB.canFallThrough()) 507 return false; 508 509 // Don't try to tail-duplicate single-block loops. 510 if (TailBB.isSuccessor(&TailBB)) 511 return false; 512 513 // Set the limit on the cost to duplicate. When optimizing for size, 514 // duplicate only one, because one branch instruction can be eliminated to 515 // compensate for the duplication. 516 unsigned MaxDuplicateCount; 517 if (TailDuplicateSize.getNumOccurrences() == 0 && 518 MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) 519 MaxDuplicateCount = 1; 520 else 521 MaxDuplicateCount = TailDuplicateSize; 522 523 // If the target has hardware branch prediction that can handle indirect 524 // branches, duplicating them can often make them predictable when there 525 // are common paths through the code. The limit needs to be high enough 526 // to allow undoing the effects of tail merging and other optimizations 527 // that rearrange the predecessors of the indirect branch. 528 529 if (PreRegAlloc && !TailBB.empty()) { 530 const TargetInstrDesc &TID = TailBB.back().getDesc(); 531 if (TID.isIndirectBranch()) 532 MaxDuplicateCount = 20; 533 } 534 535 // Check the instructions in the block to determine whether tail-duplication 536 // is invalid or unlikely to be profitable. 537 unsigned InstrCount = 0; 538 for (MachineBasicBlock::const_iterator I = TailBB.begin(); I != TailBB.end(); 539 ++I) { 540 // Non-duplicable things shouldn't be tail-duplicated. 541 if (I->getDesc().isNotDuplicable()) 542 return false; 543 544 // Do not duplicate 'return' instructions if this is a pre-regalloc run. 545 // A return may expand into a lot more instructions (e.g. reload of callee 546 // saved registers) after PEI. 547 if (PreRegAlloc && I->getDesc().isReturn()) 548 return false; 549 550 // Avoid duplicating calls before register allocation. Calls presents a 551 // barrier to register allocation so duplicating them may end up increasing 552 // spills. 553 if (PreRegAlloc && I->getDesc().isCall()) 554 return false; 555 556 if (!I->isPHI() && !I->isDebugValue()) 557 InstrCount += 1; 558 559 if (InstrCount > MaxDuplicateCount) 560 return false; 561 } 562 563 return true; 564} 565 566/// isSimpleBB - True if this BB has only one unconditional jump. 567bool 568TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) { 569 if (TailBB->succ_size() != 1) 570 return false; 571 MachineBasicBlock::iterator I = TailBB->getFirstNonPHI(); 572 MachineBasicBlock::iterator E = TailBB->end(); 573 while (I->isDebugValue() && I != E) 574 ++I; 575 if (I == E) 576 return true; 577 return I->getDesc().isUnconditionalBranch(); 578} 579 580static bool 581bothUsedInPHI(const MachineBasicBlock &A, 582 SmallPtrSet<MachineBasicBlock*, 8> SuccsB) { 583 for (MachineBasicBlock::const_succ_iterator SI = A.succ_begin(), 584 SE = A.succ_end(); SI != SE; ++SI) { 585 MachineBasicBlock *BB = *SI; 586 if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI()) 587 return true; 588 } 589 590 return false; 591} 592 593bool 594TailDuplicatePass::canCompletelyDuplicateSimpleBB(MachineBasicBlock &BB) { 595 SmallPtrSet<MachineBasicBlock*, 8> Succs(BB.succ_begin(), BB.succ_end()); 596 597 for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(), 598 PE = BB.pred_end(); PI != PE; ++PI) { 599 MachineBasicBlock *PredBB = *PI; 600 if (PredBB->getLandingPadSuccessor()) 601 return false; 602 if (bothUsedInPHI(*PredBB, Succs)) 603 return false; 604 MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL; 605 SmallVector<MachineOperand, 4> PredCond; 606 if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) 607 return false; 608 } 609 return true; 610} 611 612bool 613TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, 614 SmallVector<MachineBasicBlock*, 8> &TDBBs, 615 const DenseSet<unsigned> &UsedByPhi, 616 SmallVector<MachineInstr*, 16> &Copies) { 617 if (!canCompletelyDuplicateSimpleBB(*TailBB)) 618 return false; 619 620 bool Changed = false; 621 SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), 622 TailBB->pred_end()); 623 for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), 624 PE = Preds.end(); PI != PE; ++PI) { 625 MachineBasicBlock *PredBB = *PI; 626 627 MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL; 628 SmallVector<MachineOperand, 4> PredCond; 629 bool NotAnalyzable = 630 TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true); 631 (void)NotAnalyzable; 632 assert(!NotAnalyzable && "Cannot duplicate this!"); 633 634 DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB 635 << "From simple Succ: " << *TailBB); 636 637 MachineBasicBlock *NewTarget = *TailBB->succ_begin(); 638 MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(PredBB)); 639 640 DenseMap<unsigned, unsigned> LocalVRMap; 641 SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; 642 for (MachineBasicBlock::iterator I = TailBB->begin(); 643 I != TailBB->end() && I->isPHI();) { 644 MachineInstr *MI = &*I; 645 ++I; 646 ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true); 647 } 648 MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); 649 for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { 650 Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), 651 TII->get(TargetOpcode::COPY), 652 CopyInfos[i].first).addReg(CopyInfos[i].second)); 653 } 654 655 // Make PredFBB explicit. 656 if (PredCond.empty()) 657 PredFBB = PredTBB; 658 659 // Make fall through explicit. 660 if (!PredTBB) 661 PredTBB = NextBB; 662 if (!PredFBB) 663 PredFBB = NextBB; 664 665 // Redirect 666 if (PredFBB == TailBB) 667 PredFBB = NewTarget; 668 if (PredTBB == TailBB) 669 PredTBB = NewTarget; 670 671 // Make the branch unconditional if possible 672 if (PredTBB == PredFBB) 673 PredFBB = NULL; 674 675 // Avoid adding fall through branches. 676 if (PredFBB == NextBB) 677 PredFBB = NULL; 678 if (PredTBB == NextBB && PredFBB == NULL) 679 PredTBB = NULL; 680 681 TII->RemoveBranch(*PredBB); 682 683 if (PredTBB) 684 TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc()); 685 686 PredBB->removeSuccessor(TailBB); 687 PredBB->addSuccessor(NewTarget); 688 689 TDBBs.push_back(PredBB); 690 691 Changed = true; 692 } 693 return Changed; 694} 695 696/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each 697/// of its predecessors. 698bool 699TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, 700 SmallVector<MachineBasicBlock*, 8> &TDBBs, 701 SmallVector<MachineInstr*, 16> &Copies) { 702 if (!shouldTailDuplicate(MF, *TailBB)) 703 return false; 704 705 DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); 706 707 DenseSet<unsigned> UsedByPhi; 708 getRegsUsedByPHIs(*TailBB, &UsedByPhi); 709 710 if (isSimpleBB(TailBB)) 711 return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies); 712 713 // Iterate through all the unique predecessors and tail-duplicate this 714 // block into them, if possible. Copying the list ahead of time also 715 // avoids trouble with the predecessor list reallocating. 716 bool Changed = false; 717 SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), 718 TailBB->pred_end()); 719 for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), 720 PE = Preds.end(); PI != PE; ++PI) { 721 MachineBasicBlock *PredBB = *PI; 722 723 assert(TailBB != PredBB && 724 "Single-block loop should have been rejected earlier!"); 725 // EH edges are ignored by AnalyzeBranch. 726 if (PredBB->succ_size() > 1) 727 continue; 728 729 MachineBasicBlock *PredTBB, *PredFBB; 730 SmallVector<MachineOperand, 4> PredCond; 731 if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) 732 continue; 733 if (!PredCond.empty()) 734 continue; 735 // Don't duplicate into a fall-through predecessor (at least for now). 736 if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) 737 continue; 738 739 DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB 740 << "From Succ: " << *TailBB); 741 742 TDBBs.push_back(PredBB); 743 744 // Remove PredBB's unconditional branch. 745 TII->RemoveBranch(*PredBB); 746 747 // Clone the contents of TailBB into PredBB. 748 DenseMap<unsigned, unsigned> LocalVRMap; 749 SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; 750 MachineBasicBlock::iterator I = TailBB->begin(); 751 while (I != TailBB->end()) { 752 MachineInstr *MI = &*I; 753 ++I; 754 if (MI->isPHI()) { 755 // Replace the uses of the def of the PHI with the register coming 756 // from PredBB. 757 ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true); 758 } else { 759 // Replace def of virtual registers with new registers, and update 760 // uses with PHI source register or the new registers. 761 DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi); 762 } 763 } 764 MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); 765 for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { 766 Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), 767 TII->get(TargetOpcode::COPY), 768 CopyInfos[i].first).addReg(CopyInfos[i].second)); 769 } 770 771 // Simplify 772 TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true); 773 774 NumInstrDups += TailBB->size() - 1; // subtract one for removed branch 775 776 // Update the CFG. 777 PredBB->removeSuccessor(PredBB->succ_begin()); 778 assert(PredBB->succ_empty() && 779 "TailDuplicate called on block with multiple successors!"); 780 for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), 781 E = TailBB->succ_end(); I != E; ++I) 782 PredBB->addSuccessor(*I); 783 784 Changed = true; 785 ++NumTailDups; 786 } 787 788 // If TailBB was duplicated into all its predecessors except for the prior 789 // block, which falls through unconditionally, move the contents of this 790 // block into the prior block. 791 MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB)); 792 MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; 793 SmallVector<MachineOperand, 4> PriorCond; 794 // This has to check PrevBB->succ_size() because EH edges are ignored by 795 // AnalyzeBranch. 796 if (PrevBB->succ_size() == 1 && 797 !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) && 798 PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 && 799 !TailBB->hasAddressTaken()) { 800 DEBUG(dbgs() << "\nMerging into block: " << *PrevBB 801 << "From MBB: " << *TailBB); 802 if (PreRegAlloc) { 803 DenseMap<unsigned, unsigned> LocalVRMap; 804 SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; 805 MachineBasicBlock::iterator I = TailBB->begin(); 806 // Process PHI instructions first. 807 while (I != TailBB->end() && I->isPHI()) { 808 // Replace the uses of the def of the PHI with the register coming 809 // from PredBB. 810 MachineInstr *MI = &*I++; 811 ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true); 812 if (MI->getParent()) 813 MI->eraseFromParent(); 814 } 815 816 // Now copy the non-PHI instructions. 817 while (I != TailBB->end()) { 818 // Replace def of virtual registers with new registers, and update 819 // uses with PHI source register or the new registers. 820 MachineInstr *MI = &*I++; 821 DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi); 822 MI->eraseFromParent(); 823 } 824 MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator(); 825 for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { 826 Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(), 827 TII->get(TargetOpcode::COPY), 828 CopyInfos[i].first) 829 .addReg(CopyInfos[i].second)); 830 } 831 } else { 832 // No PHIs to worry about, just splice the instructions over. 833 PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); 834 } 835 PrevBB->removeSuccessor(PrevBB->succ_begin()); 836 assert(PrevBB->succ_empty()); 837 PrevBB->transferSuccessors(TailBB); 838 TDBBs.push_back(PrevBB); 839 Changed = true; 840 } 841 842 // If this is after register allocation, there are no phis to fix. 843 if (!PreRegAlloc) 844 return Changed; 845 846 // If we made no changes so far, we are safe. 847 if (!Changed) 848 return Changed; 849 850 851 // Handle the nasty case in that we duplicated a block that is part of a loop 852 // into some but not all of its predecessors. For example: 853 // 1 -> 2 <-> 3 | 854 // \ | 855 // \---> rest | 856 // if we duplicate 2 into 1 but not into 3, we end up with 857 // 12 -> 3 <-> 2 -> rest | 858 // \ / | 859 // \----->-----/ | 860 // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced 861 // with a phi in 3 (which now dominates 2). 862 // What we do here is introduce a copy in 3 of the register defined by the 863 // phi, just like when we are duplicating 2 into 3, but we don't copy any 864 // real instructions or remove the 3 -> 2 edge from the phi in 2. 865 for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), 866 PE = Preds.end(); PI != PE; ++PI) { 867 MachineBasicBlock *PredBB = *PI; 868 if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end()) 869 continue; 870 871 // EH edges 872 if (PredBB->succ_size() != 1) 873 continue; 874 875 DenseMap<unsigned, unsigned> LocalVRMap; 876 SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; 877 MachineBasicBlock::iterator I = TailBB->begin(); 878 // Process PHI instructions first. 879 while (I != TailBB->end() && I->isPHI()) { 880 // Replace the uses of the def of the PHI with the register coming 881 // from PredBB. 882 MachineInstr *MI = &*I++; 883 ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false); 884 } 885 MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); 886 for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { 887 Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), 888 TII->get(TargetOpcode::COPY), 889 CopyInfos[i].first).addReg(CopyInfos[i].second)); 890 } 891 } 892 893 return Changed; 894} 895 896/// RemoveDeadBlock - Remove the specified dead machine basic block from the 897/// function, updating the CFG. 898void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) { 899 assert(MBB->pred_empty() && "MBB must be dead!"); 900 DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); 901 902 // Remove all successors. 903 while (!MBB->succ_empty()) 904 MBB->removeSuccessor(MBB->succ_end()-1); 905 906 // Remove the block. 907 MBB->eraseFromParent(); 908} 909