TailDuplication.cpp revision eb25bd2356cca5ef83472b1ae0f764037b82ad82
1//===-- TailDuplication.cpp - Duplicate blocks into predecessors' tails ---===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass duplicates basic blocks ending in unconditional branches into 11// the tails of their predecessors. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "tailduplication" 16#include "llvm/Function.h" 17#include "llvm/CodeGen/Passes.h" 18#include "llvm/CodeGen/MachineModuleInfo.h" 19#include "llvm/CodeGen/MachineFunctionPass.h" 20#include "llvm/CodeGen/MachineInstrBuilder.h" 21#include "llvm/CodeGen/MachineRegisterInfo.h" 22#include "llvm/CodeGen/MachineSSAUpdater.h" 23#include "llvm/CodeGen/RegisterScavenging.h" 24#include "llvm/Target/TargetInstrInfo.h" 25#include "llvm/Target/TargetRegisterInfo.h" 26#include "llvm/Support/CommandLine.h" 27#include "llvm/Support/Debug.h" 28#include "llvm/Support/ErrorHandling.h" 29#include "llvm/Support/raw_ostream.h" 30#include "llvm/ADT/DenseSet.h" 31#include "llvm/ADT/SmallSet.h" 32#include "llvm/ADT/SetVector.h" 33#include "llvm/ADT/Statistic.h" 34using namespace llvm; 35 36STATISTIC(NumTails , "Number of tails duplicated"); 37STATISTIC(NumTailDups , "Number of tail duplicated blocks"); 38STATISTIC(NumInstrDups , "Additional instructions due to tail duplication"); 39STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); 40STATISTIC(NumAddedPHIs , "Number of phis added"); 41 42// Heuristic for tail duplication. 43static cl::opt<unsigned> 44TailDuplicateSize("tail-dup-size", 45 cl::desc("Maximum instructions to consider tail duplicating"), 46 cl::init(2), cl::Hidden); 47 48static cl::opt<bool> 49TailDupVerify("tail-dup-verify", 50 cl::desc("Verify sanity of PHI instructions during taildup"), 51 cl::init(false), cl::Hidden); 52 53static cl::opt<unsigned> 54TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden); 55 56typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy; 57 58namespace { 59 /// TailDuplicatePass - Perform tail duplication. 60 class TailDuplicatePass : public MachineFunctionPass { 61 const TargetInstrInfo *TII; 62 const TargetRegisterInfo *TRI; 63 MachineModuleInfo *MMI; 64 MachineRegisterInfo *MRI; 65 RegScavenger *RS; 66 bool PreRegAlloc; 67 68 // SSAUpdateVRs - A list of virtual registers for which to update SSA form. 69 SmallVector<unsigned, 16> SSAUpdateVRs; 70 71 // SSAUpdateVals - For each virtual register in SSAUpdateVals keep a list of 72 // source virtual registers. 73 DenseMap<unsigned, AvailableValsTy> SSAUpdateVals; 74 75 public: 76 static char ID; 77 explicit TailDuplicatePass() : 78 MachineFunctionPass(ID), PreRegAlloc(false) {} 79 80 virtual bool runOnMachineFunction(MachineFunction &MF); 81 82 private: 83 void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, 84 MachineBasicBlock *BB); 85 void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB, 86 MachineBasicBlock *PredBB, 87 DenseMap<unsigned, unsigned> &LocalVRMap, 88 SmallVector<std::pair<unsigned,unsigned>, 4> &Copies, 89 const DenseSet<unsigned> &UsedByPhi, 90 bool Remove); 91 void DuplicateInstruction(MachineInstr *MI, 92 MachineBasicBlock *TailBB, 93 MachineBasicBlock *PredBB, 94 MachineFunction &MF, 95 DenseMap<unsigned, unsigned> &LocalVRMap, 96 const DenseSet<unsigned> &UsedByPhi); 97 void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, 98 SmallVector<MachineBasicBlock*, 8> &TDBBs, 99 SmallSetVector<MachineBasicBlock*, 8> &Succs); 100 bool TailDuplicateBlocks(MachineFunction &MF); 101 bool shouldTailDuplicate(const MachineFunction &MF, 102 bool IsSimple, MachineBasicBlock &TailBB); 103 bool isSimpleBB(MachineBasicBlock *TailBB); 104 bool canCompletelyDuplicateBB(MachineBasicBlock &BB); 105 bool duplicateSimpleBB(MachineBasicBlock *TailBB, 106 SmallVector<MachineBasicBlock*, 8> &TDBBs, 107 const DenseSet<unsigned> &RegsUsedByPhi, 108 SmallVector<MachineInstr*, 16> &Copies); 109 bool TailDuplicate(MachineBasicBlock *TailBB, 110 bool IsSimple, 111 MachineFunction &MF, 112 SmallVector<MachineBasicBlock*, 8> &TDBBs, 113 SmallVector<MachineInstr*, 16> &Copies); 114 bool TailDuplicateAndUpdate(MachineBasicBlock *MBB, 115 bool IsSimple, 116 MachineFunction &MF); 117 118 void RemoveDeadBlock(MachineBasicBlock *MBB); 119 }; 120 121 char TailDuplicatePass::ID = 0; 122} 123 124char &llvm::TailDuplicateID = TailDuplicatePass::ID; 125 126INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication", 127 false, false) 128 129bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { 130 TII = MF.getTarget().getInstrInfo(); 131 TRI = MF.getTarget().getRegisterInfo(); 132 MRI = &MF.getRegInfo(); 133 MMI = getAnalysisIfAvailable<MachineModuleInfo>(); 134 PreRegAlloc = MRI->isSSA(); 135 RS = NULL; 136 if (MRI->tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF)) 137 RS = new RegScavenger(); 138 139 bool MadeChange = false; 140 while (TailDuplicateBlocks(MF)) 141 MadeChange = true; 142 143 return MadeChange; 144} 145 146static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { 147 for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) { 148 MachineBasicBlock *MBB = I; 149 SmallSetVector<MachineBasicBlock*, 8> Preds(MBB->pred_begin(), 150 MBB->pred_end()); 151 MachineBasicBlock::iterator MI = MBB->begin(); 152 while (MI != MBB->end()) { 153 if (!MI->isPHI()) 154 break; 155 for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), 156 PE = Preds.end(); PI != PE; ++PI) { 157 MachineBasicBlock *PredBB = *PI; 158 bool Found = false; 159 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { 160 MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB(); 161 if (PHIBB == PredBB) { 162 Found = true; 163 break; 164 } 165 } 166 if (!Found) { 167 dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; 168 dbgs() << " missing input from predecessor BB#" 169 << PredBB->getNumber() << '\n'; 170 llvm_unreachable(0); 171 } 172 } 173 174 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { 175 MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB(); 176 if (CheckExtra && !Preds.count(PHIBB)) { 177 dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber() 178 << ": " << *MI; 179 dbgs() << " extra input from predecessor BB#" 180 << PHIBB->getNumber() << '\n'; 181 llvm_unreachable(0); 182 } 183 if (PHIBB->getNumber() < 0) { 184 dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; 185 dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n'; 186 llvm_unreachable(0); 187 } 188 } 189 ++MI; 190 } 191 } 192} 193 194/// TailDuplicateAndUpdate - Tail duplicate the block and cleanup. 195bool 196TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB, 197 bool IsSimple, 198 MachineFunction &MF) { 199 // Save the successors list. 200 SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(), 201 MBB->succ_end()); 202 203 SmallVector<MachineBasicBlock*, 8> TDBBs; 204 SmallVector<MachineInstr*, 16> Copies; 205 if (!TailDuplicate(MBB, IsSimple, MF, TDBBs, Copies)) 206 return false; 207 208 ++NumTails; 209 210 SmallVector<MachineInstr*, 8> NewPHIs; 211 MachineSSAUpdater SSAUpdate(MF, &NewPHIs); 212 213 // TailBB's immediate successors are now successors of those predecessors 214 // which duplicated TailBB. Add the predecessors as sources to the PHI 215 // instructions. 216 bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken(); 217 if (PreRegAlloc) 218 UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs); 219 220 // If it is dead, remove it. 221 if (isDead) { 222 NumInstrDups -= MBB->size(); 223 RemoveDeadBlock(MBB); 224 ++NumDeadBlocks; 225 } 226 227 // Update SSA form. 228 if (!SSAUpdateVRs.empty()) { 229 for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { 230 unsigned VReg = SSAUpdateVRs[i]; 231 SSAUpdate.Initialize(VReg); 232 233 // If the original definition is still around, add it as an available 234 // value. 235 MachineInstr *DefMI = MRI->getVRegDef(VReg); 236 MachineBasicBlock *DefBB = 0; 237 if (DefMI) { 238 DefBB = DefMI->getParent(); 239 SSAUpdate.AddAvailableValue(DefBB, VReg); 240 } 241 242 // Add the new vregs as available values. 243 DenseMap<unsigned, AvailableValsTy>::iterator LI = 244 SSAUpdateVals.find(VReg); 245 for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { 246 MachineBasicBlock *SrcBB = LI->second[j].first; 247 unsigned SrcReg = LI->second[j].second; 248 SSAUpdate.AddAvailableValue(SrcBB, SrcReg); 249 } 250 251 // Rewrite uses that are outside of the original def's block. 252 MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); 253 while (UI != MRI->use_end()) { 254 MachineOperand &UseMO = UI.getOperand(); 255 MachineInstr *UseMI = &*UI; 256 ++UI; 257 if (UseMI->isDebugValue()) { 258 // SSAUpdate can replace the use with an undef. That creates 259 // a debug instruction that is a kill. 260 // FIXME: Should it SSAUpdate job to delete debug instructions 261 // instead of replacing the use with undef? 262 UseMI->eraseFromParent(); 263 continue; 264 } 265 if (UseMI->getParent() == DefBB && !UseMI->isPHI()) 266 continue; 267 SSAUpdate.RewriteUse(UseMO); 268 } 269 } 270 271 SSAUpdateVRs.clear(); 272 SSAUpdateVals.clear(); 273 } 274 275 // Eliminate some of the copies inserted by tail duplication to maintain 276 // SSA form. 277 for (unsigned i = 0, e = Copies.size(); i != e; ++i) { 278 MachineInstr *Copy = Copies[i]; 279 if (!Copy->isCopy()) 280 continue; 281 unsigned Dst = Copy->getOperand(0).getReg(); 282 unsigned Src = Copy->getOperand(1).getReg(); 283 if (MRI->hasOneNonDBGUse(Src) && 284 MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) { 285 // Copy is the only use. Do trivial copy propagation here. 286 MRI->replaceRegWith(Dst, Src); 287 Copy->eraseFromParent(); 288 } 289 } 290 291 if (NewPHIs.size()) 292 NumAddedPHIs += NewPHIs.size(); 293 294 return true; 295} 296 297/// TailDuplicateBlocks - Look for small blocks that are unconditionally 298/// branched to and do not fall through. Tail-duplicate their instructions 299/// into their predecessors to eliminate (dynamic) branches. 300bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { 301 bool MadeChange = false; 302 303 if (PreRegAlloc && TailDupVerify) { 304 DEBUG(dbgs() << "\n*** Before tail-duplicating\n"); 305 VerifyPHIs(MF, true); 306 } 307 308 for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { 309 MachineBasicBlock *MBB = I++; 310 311 if (NumTails == TailDupLimit) 312 break; 313 314 bool IsSimple = isSimpleBB(MBB); 315 316 if (!shouldTailDuplicate(MF, IsSimple, *MBB)) 317 continue; 318 319 MadeChange |= TailDuplicateAndUpdate(MBB, IsSimple, MF); 320 } 321 322 if (PreRegAlloc && TailDupVerify) 323 VerifyPHIs(MF, false); 324 325 return MadeChange; 326} 327 328static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB, 329 const MachineRegisterInfo *MRI) { 330 for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), 331 UE = MRI->use_end(); UI != UE; ++UI) { 332 MachineInstr *UseMI = &*UI; 333 if (UseMI->isDebugValue()) 334 continue; 335 if (UseMI->getParent() != BB) 336 return true; 337 } 338 return false; 339} 340 341static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) { 342 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) 343 if (MI->getOperand(i+1).getMBB() == SrcBB) 344 return i; 345 return 0; 346} 347 348 349// Remember which registers are used by phis in this block. This is 350// used to determine which registers are liveout while modifying the 351// block (which is why we need to copy the information). 352static void getRegsUsedByPHIs(const MachineBasicBlock &BB, 353 DenseSet<unsigned> *UsedByPhi) { 354 for(MachineBasicBlock::const_iterator I = BB.begin(), E = BB.end(); 355 I != E; ++I) { 356 const MachineInstr &MI = *I; 357 if (!MI.isPHI()) 358 break; 359 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { 360 unsigned SrcReg = MI.getOperand(i).getReg(); 361 UsedByPhi->insert(SrcReg); 362 } 363 } 364} 365 366/// AddSSAUpdateEntry - Add a definition and source virtual registers pair for 367/// SSA update. 368void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, 369 MachineBasicBlock *BB) { 370 DenseMap<unsigned, AvailableValsTy>::iterator LI= SSAUpdateVals.find(OrigReg); 371 if (LI != SSAUpdateVals.end()) 372 LI->second.push_back(std::make_pair(BB, NewReg)); 373 else { 374 AvailableValsTy Vals; 375 Vals.push_back(std::make_pair(BB, NewReg)); 376 SSAUpdateVals.insert(std::make_pair(OrigReg, Vals)); 377 SSAUpdateVRs.push_back(OrigReg); 378 } 379} 380 381/// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB. 382/// Remember the source register that's contributed by PredBB and update SSA 383/// update map. 384void TailDuplicatePass::ProcessPHI(MachineInstr *MI, 385 MachineBasicBlock *TailBB, 386 MachineBasicBlock *PredBB, 387 DenseMap<unsigned, unsigned> &LocalVRMap, 388 SmallVector<std::pair<unsigned,unsigned>, 4> &Copies, 389 const DenseSet<unsigned> &RegsUsedByPhi, 390 bool Remove) { 391 unsigned DefReg = MI->getOperand(0).getReg(); 392 unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB); 393 assert(SrcOpIdx && "Unable to find matching PHI source?"); 394 unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg(); 395 const TargetRegisterClass *RC = MRI->getRegClass(DefReg); 396 LocalVRMap.insert(std::make_pair(DefReg, SrcReg)); 397 398 // Insert a copy from source to the end of the block. The def register is the 399 // available value liveout of the block. 400 unsigned NewDef = MRI->createVirtualRegister(RC); 401 Copies.push_back(std::make_pair(NewDef, SrcReg)); 402 if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg)) 403 AddSSAUpdateEntry(DefReg, NewDef, PredBB); 404 405 if (!Remove) 406 return; 407 408 // Remove PredBB from the PHI node. 409 MI->RemoveOperand(SrcOpIdx+1); 410 MI->RemoveOperand(SrcOpIdx); 411 if (MI->getNumOperands() == 1) 412 MI->eraseFromParent(); 413} 414 415/// DuplicateInstruction - Duplicate a TailBB instruction to PredBB and update 416/// the source operands due to earlier PHI translation. 417void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, 418 MachineBasicBlock *TailBB, 419 MachineBasicBlock *PredBB, 420 MachineFunction &MF, 421 DenseMap<unsigned, unsigned> &LocalVRMap, 422 const DenseSet<unsigned> &UsedByPhi) { 423 MachineInstr *NewMI = TII->duplicate(MI, MF); 424 for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { 425 MachineOperand &MO = NewMI->getOperand(i); 426 if (!MO.isReg()) 427 continue; 428 unsigned Reg = MO.getReg(); 429 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 430 continue; 431 if (MO.isDef()) { 432 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 433 unsigned NewReg = MRI->createVirtualRegister(RC); 434 MO.setReg(NewReg); 435 LocalVRMap.insert(std::make_pair(Reg, NewReg)); 436 if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg)) 437 AddSSAUpdateEntry(Reg, NewReg, PredBB); 438 } else { 439 DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg); 440 if (VI != LocalVRMap.end()) { 441 MO.setReg(VI->second); 442 MRI->constrainRegClass(VI->second, MRI->getRegClass(Reg)); 443 } 444 } 445 } 446 PredBB->insert(PredBB->instr_end(), NewMI); 447} 448 449/// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor 450/// blocks, the successors have gained new predecessors. Update the PHI 451/// instructions in them accordingly. 452void 453TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, 454 SmallVector<MachineBasicBlock*, 8> &TDBBs, 455 SmallSetVector<MachineBasicBlock*,8> &Succs) { 456 for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(), 457 SE = Succs.end(); SI != SE; ++SI) { 458 MachineBasicBlock *SuccBB = *SI; 459 for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end(); 460 II != EE; ++II) { 461 if (!II->isPHI()) 462 break; 463 unsigned Idx = 0; 464 for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) { 465 MachineOperand &MO = II->getOperand(i+1); 466 if (MO.getMBB() == FromBB) { 467 Idx = i; 468 break; 469 } 470 } 471 472 assert(Idx != 0); 473 MachineOperand &MO0 = II->getOperand(Idx); 474 unsigned Reg = MO0.getReg(); 475 if (isDead) { 476 // Folded into the previous BB. 477 // There could be duplicate phi source entries. FIXME: Should sdisel 478 // or earlier pass fixed this? 479 for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) { 480 MachineOperand &MO = II->getOperand(i+1); 481 if (MO.getMBB() == FromBB) { 482 II->RemoveOperand(i+1); 483 II->RemoveOperand(i); 484 } 485 } 486 } else 487 Idx = 0; 488 489 // If Idx is set, the operands at Idx and Idx+1 must be removed. 490 // We reuse the location to avoid expensive RemoveOperand calls. 491 492 DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg); 493 if (LI != SSAUpdateVals.end()) { 494 // This register is defined in the tail block. 495 for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { 496 MachineBasicBlock *SrcBB = LI->second[j].first; 497 // If we didn't duplicate a bb into a particular predecessor, we 498 // might still have added an entry to SSAUpdateVals to correcly 499 // recompute SSA. If that case, avoid adding a dummy extra argument 500 // this PHI. 501 if (!SrcBB->isSuccessor(SuccBB)) 502 continue; 503 504 unsigned SrcReg = LI->second[j].second; 505 if (Idx != 0) { 506 II->getOperand(Idx).setReg(SrcReg); 507 II->getOperand(Idx+1).setMBB(SrcBB); 508 Idx = 0; 509 } else { 510 II->addOperand(MachineOperand::CreateReg(SrcReg, false)); 511 II->addOperand(MachineOperand::CreateMBB(SrcBB)); 512 } 513 } 514 } else { 515 // Live in tail block, must also be live in predecessors. 516 for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) { 517 MachineBasicBlock *SrcBB = TDBBs[j]; 518 if (Idx != 0) { 519 II->getOperand(Idx).setReg(Reg); 520 II->getOperand(Idx+1).setMBB(SrcBB); 521 Idx = 0; 522 } else { 523 II->addOperand(MachineOperand::CreateReg(Reg, false)); 524 II->addOperand(MachineOperand::CreateMBB(SrcBB)); 525 } 526 } 527 } 528 if (Idx != 0) { 529 II->RemoveOperand(Idx+1); 530 II->RemoveOperand(Idx); 531 } 532 } 533 } 534} 535 536/// shouldTailDuplicate - Determine if it is profitable to duplicate this block. 537bool 538TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, 539 bool IsSimple, 540 MachineBasicBlock &TailBB) { 541 // Only duplicate blocks that end with unconditional branches. 542 if (TailBB.canFallThrough()) 543 return false; 544 545 // Don't try to tail-duplicate single-block loops. 546 if (TailBB.isSuccessor(&TailBB)) 547 return false; 548 549 // Set the limit on the cost to duplicate. When optimizing for size, 550 // duplicate only one, because one branch instruction can be eliminated to 551 // compensate for the duplication. 552 unsigned MaxDuplicateCount; 553 if (TailDuplicateSize.getNumOccurrences() == 0 && 554 MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) 555 MaxDuplicateCount = 1; 556 else 557 MaxDuplicateCount = TailDuplicateSize; 558 559 // If the target has hardware branch prediction that can handle indirect 560 // branches, duplicating them can often make them predictable when there 561 // are common paths through the code. The limit needs to be high enough 562 // to allow undoing the effects of tail merging and other optimizations 563 // that rearrange the predecessors of the indirect branch. 564 565 bool HasIndirectbr = false; 566 if (!TailBB.empty()) 567 HasIndirectbr = TailBB.back().isIndirectBranch(); 568 569 if (HasIndirectbr && PreRegAlloc) 570 MaxDuplicateCount = 20; 571 572 // Check the instructions in the block to determine whether tail-duplication 573 // is invalid or unlikely to be profitable. 574 unsigned InstrCount = 0; 575 for (MachineBasicBlock::iterator I = TailBB.begin(); I != TailBB.end(); ++I) { 576 // Non-duplicable things shouldn't be tail-duplicated. 577 if (I->isNotDuplicable()) 578 return false; 579 580 // Do not duplicate 'return' instructions if this is a pre-regalloc run. 581 // A return may expand into a lot more instructions (e.g. reload of callee 582 // saved registers) after PEI. 583 if (PreRegAlloc && I->isReturn()) 584 return false; 585 586 // Avoid duplicating calls before register allocation. Calls presents a 587 // barrier to register allocation so duplicating them may end up increasing 588 // spills. 589 if (PreRegAlloc && I->isCall()) 590 return false; 591 592 if (!I->isPHI() && !I->isDebugValue()) 593 InstrCount += 1; 594 595 if (InstrCount > MaxDuplicateCount) 596 return false; 597 } 598 599 if (HasIndirectbr && PreRegAlloc) 600 return true; 601 602 if (IsSimple) 603 return true; 604 605 if (!PreRegAlloc) 606 return true; 607 608 return canCompletelyDuplicateBB(TailBB); 609} 610 611/// isSimpleBB - True if this BB has only one unconditional jump. 612bool 613TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) { 614 if (TailBB->succ_size() != 1) 615 return false; 616 if (TailBB->pred_empty()) 617 return false; 618 MachineBasicBlock::iterator I = TailBB->begin(); 619 MachineBasicBlock::iterator E = TailBB->end(); 620 while (I != E && I->isDebugValue()) 621 ++I; 622 if (I == E) 623 return true; 624 return I->isUnconditionalBranch(); 625} 626 627static bool 628bothUsedInPHI(const MachineBasicBlock &A, 629 SmallPtrSet<MachineBasicBlock*, 8> SuccsB) { 630 for (MachineBasicBlock::const_succ_iterator SI = A.succ_begin(), 631 SE = A.succ_end(); SI != SE; ++SI) { 632 MachineBasicBlock *BB = *SI; 633 if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI()) 634 return true; 635 } 636 637 return false; 638} 639 640bool 641TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) { 642 SmallPtrSet<MachineBasicBlock*, 8> Succs(BB.succ_begin(), BB.succ_end()); 643 644 for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(), 645 PE = BB.pred_end(); PI != PE; ++PI) { 646 MachineBasicBlock *PredBB = *PI; 647 648 if (PredBB->succ_size() > 1) 649 return false; 650 651 MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL; 652 SmallVector<MachineOperand, 4> PredCond; 653 if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) 654 return false; 655 656 if (!PredCond.empty()) 657 return false; 658 } 659 return true; 660} 661 662bool 663TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, 664 SmallVector<MachineBasicBlock*, 8> &TDBBs, 665 const DenseSet<unsigned> &UsedByPhi, 666 SmallVector<MachineInstr*, 16> &Copies) { 667 SmallPtrSet<MachineBasicBlock*, 8> Succs(TailBB->succ_begin(), 668 TailBB->succ_end()); 669 SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), 670 TailBB->pred_end()); 671 bool Changed = false; 672 for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), 673 PE = Preds.end(); PI != PE; ++PI) { 674 MachineBasicBlock *PredBB = *PI; 675 676 if (PredBB->getLandingPadSuccessor()) 677 continue; 678 679 if (bothUsedInPHI(*PredBB, Succs)) 680 continue; 681 682 MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL; 683 SmallVector<MachineOperand, 4> PredCond; 684 if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) 685 continue; 686 687 Changed = true; 688 DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB 689 << "From simple Succ: " << *TailBB); 690 691 MachineBasicBlock *NewTarget = *TailBB->succ_begin(); 692 MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(PredBB)); 693 694 // Make PredFBB explicit. 695 if (PredCond.empty()) 696 PredFBB = PredTBB; 697 698 // Make fall through explicit. 699 if (!PredTBB) 700 PredTBB = NextBB; 701 if (!PredFBB) 702 PredFBB = NextBB; 703 704 // Redirect 705 if (PredFBB == TailBB) 706 PredFBB = NewTarget; 707 if (PredTBB == TailBB) 708 PredTBB = NewTarget; 709 710 // Make the branch unconditional if possible 711 if (PredTBB == PredFBB) { 712 PredCond.clear(); 713 PredFBB = NULL; 714 } 715 716 // Avoid adding fall through branches. 717 if (PredFBB == NextBB) 718 PredFBB = NULL; 719 if (PredTBB == NextBB && PredFBB == NULL) 720 PredTBB = NULL; 721 722 TII->RemoveBranch(*PredBB); 723 724 if (PredTBB) 725 TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc()); 726 727 PredBB->removeSuccessor(TailBB); 728 unsigned NumSuccessors = PredBB->succ_size(); 729 assert(NumSuccessors <= 1); 730 if (NumSuccessors == 0 || *PredBB->succ_begin() != NewTarget) 731 PredBB->addSuccessor(NewTarget); 732 733 TDBBs.push_back(PredBB); 734 } 735 return Changed; 736} 737 738/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each 739/// of its predecessors. 740bool 741TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, 742 bool IsSimple, 743 MachineFunction &MF, 744 SmallVector<MachineBasicBlock*, 8> &TDBBs, 745 SmallVector<MachineInstr*, 16> &Copies) { 746 DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); 747 748 DenseSet<unsigned> UsedByPhi; 749 getRegsUsedByPHIs(*TailBB, &UsedByPhi); 750 751 if (IsSimple) 752 return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies); 753 754 // Iterate through all the unique predecessors and tail-duplicate this 755 // block into them, if possible. Copying the list ahead of time also 756 // avoids trouble with the predecessor list reallocating. 757 bool Changed = false; 758 SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), 759 TailBB->pred_end()); 760 for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), 761 PE = Preds.end(); PI != PE; ++PI) { 762 MachineBasicBlock *PredBB = *PI; 763 764 assert(TailBB != PredBB && 765 "Single-block loop should have been rejected earlier!"); 766 // EH edges are ignored by AnalyzeBranch. 767 if (PredBB->succ_size() > 1) 768 continue; 769 770 MachineBasicBlock *PredTBB, *PredFBB; 771 SmallVector<MachineOperand, 4> PredCond; 772 if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) 773 continue; 774 if (!PredCond.empty()) 775 continue; 776 // Don't duplicate into a fall-through predecessor (at least for now). 777 if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) 778 continue; 779 780 DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB 781 << "From Succ: " << *TailBB); 782 783 TDBBs.push_back(PredBB); 784 785 // Remove PredBB's unconditional branch. 786 TII->RemoveBranch(*PredBB); 787 788 if (RS && !TailBB->livein_empty()) { 789 // Update PredBB livein. 790 RS->enterBasicBlock(PredBB); 791 if (!PredBB->empty()) 792 RS->forward(prior(PredBB->end())); 793 BitVector RegsLiveAtExit(TRI->getNumRegs()); 794 RS->getRegsUsed(RegsLiveAtExit, false); 795 for (MachineBasicBlock::livein_iterator I = TailBB->livein_begin(), 796 E = TailBB->livein_end(); I != E; ++I) { 797 if (!RegsLiveAtExit[*I]) 798 // If a register is previously livein to the tail but it's not live 799 // at the end of predecessor BB, then it should be added to its 800 // livein list. 801 PredBB->addLiveIn(*I); 802 } 803 } 804 805 // Clone the contents of TailBB into PredBB. 806 DenseMap<unsigned, unsigned> LocalVRMap; 807 SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; 808 // Use instr_iterator here to properly handle bundles, e.g. 809 // ARM Thumb2 IT block. 810 MachineBasicBlock::instr_iterator I = TailBB->instr_begin(); 811 while (I != TailBB->instr_end()) { 812 MachineInstr *MI = &*I; 813 ++I; 814 if (MI->isPHI()) { 815 // Replace the uses of the def of the PHI with the register coming 816 // from PredBB. 817 ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true); 818 } else { 819 // Replace def of virtual registers with new registers, and update 820 // uses with PHI source register or the new registers. 821 DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi); 822 } 823 } 824 MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); 825 for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { 826 Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), 827 TII->get(TargetOpcode::COPY), 828 CopyInfos[i].first).addReg(CopyInfos[i].second)); 829 } 830 831 // Simplify 832 TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true); 833 834 NumInstrDups += TailBB->size() - 1; // subtract one for removed branch 835 836 // Update the CFG. 837 PredBB->removeSuccessor(PredBB->succ_begin()); 838 assert(PredBB->succ_empty() && 839 "TailDuplicate called on block with multiple successors!"); 840 for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), 841 E = TailBB->succ_end(); I != E; ++I) 842 PredBB->addSuccessor(*I); 843 844 Changed = true; 845 ++NumTailDups; 846 } 847 848 // If TailBB was duplicated into all its predecessors except for the prior 849 // block, which falls through unconditionally, move the contents of this 850 // block into the prior block. 851 MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB)); 852 MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; 853 SmallVector<MachineOperand, 4> PriorCond; 854 // This has to check PrevBB->succ_size() because EH edges are ignored by 855 // AnalyzeBranch. 856 if (PrevBB->succ_size() == 1 && 857 !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) && 858 PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 && 859 !TailBB->hasAddressTaken()) { 860 DEBUG(dbgs() << "\nMerging into block: " << *PrevBB 861 << "From MBB: " << *TailBB); 862 if (PreRegAlloc) { 863 DenseMap<unsigned, unsigned> LocalVRMap; 864 SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; 865 MachineBasicBlock::iterator I = TailBB->begin(); 866 // Process PHI instructions first. 867 while (I != TailBB->end() && I->isPHI()) { 868 // Replace the uses of the def of the PHI with the register coming 869 // from PredBB. 870 MachineInstr *MI = &*I++; 871 ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true); 872 if (MI->getParent()) 873 MI->eraseFromParent(); 874 } 875 876 // Now copy the non-PHI instructions. 877 while (I != TailBB->end()) { 878 // Replace def of virtual registers with new registers, and update 879 // uses with PHI source register or the new registers. 880 MachineInstr *MI = &*I++; 881 assert(!MI->isBundle() && "Not expecting bundles before regalloc!"); 882 DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi); 883 MI->eraseFromParent(); 884 } 885 MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator(); 886 for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { 887 Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(), 888 TII->get(TargetOpcode::COPY), 889 CopyInfos[i].first) 890 .addReg(CopyInfos[i].second)); 891 } 892 } else { 893 // No PHIs to worry about, just splice the instructions over. 894 PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); 895 } 896 PrevBB->removeSuccessor(PrevBB->succ_begin()); 897 assert(PrevBB->succ_empty()); 898 PrevBB->transferSuccessors(TailBB); 899 TDBBs.push_back(PrevBB); 900 Changed = true; 901 } 902 903 // If this is after register allocation, there are no phis to fix. 904 if (!PreRegAlloc) 905 return Changed; 906 907 // If we made no changes so far, we are safe. 908 if (!Changed) 909 return Changed; 910 911 912 // Handle the nasty case in that we duplicated a block that is part of a loop 913 // into some but not all of its predecessors. For example: 914 // 1 -> 2 <-> 3 | 915 // \ | 916 // \---> rest | 917 // if we duplicate 2 into 1 but not into 3, we end up with 918 // 12 -> 3 <-> 2 -> rest | 919 // \ / | 920 // \----->-----/ | 921 // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced 922 // with a phi in 3 (which now dominates 2). 923 // What we do here is introduce a copy in 3 of the register defined by the 924 // phi, just like when we are duplicating 2 into 3, but we don't copy any 925 // real instructions or remove the 3 -> 2 edge from the phi in 2. 926 for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), 927 PE = Preds.end(); PI != PE; ++PI) { 928 MachineBasicBlock *PredBB = *PI; 929 if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end()) 930 continue; 931 932 // EH edges 933 if (PredBB->succ_size() != 1) 934 continue; 935 936 DenseMap<unsigned, unsigned> LocalVRMap; 937 SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; 938 MachineBasicBlock::iterator I = TailBB->begin(); 939 // Process PHI instructions first. 940 while (I != TailBB->end() && I->isPHI()) { 941 // Replace the uses of the def of the PHI with the register coming 942 // from PredBB. 943 MachineInstr *MI = &*I++; 944 ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false); 945 } 946 MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); 947 for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { 948 Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), 949 TII->get(TargetOpcode::COPY), 950 CopyInfos[i].first).addReg(CopyInfos[i].second)); 951 } 952 } 953 954 return Changed; 955} 956 957/// RemoveDeadBlock - Remove the specified dead machine basic block from the 958/// function, updating the CFG. 959void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) { 960 assert(MBB->pred_empty() && "MBB must be dead!"); 961 DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); 962 963 // Remove all successors. 964 while (!MBB->succ_empty()) 965 MBB->removeSuccessor(MBB->succ_end()-1); 966 967 // Remove the block. 968 MBB->eraseFromParent(); 969} 970