JumpThreading.cpp revision 6b233395025069f63156ea2b524cdb708a14731f
1//===- JumpThreading.cpp - Thread control through conditional blocks ------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the Jump Threading pass. 11// 12//===----------------------------------------------------------------------===// 13 14#define DEBUG_TYPE "jump-threading" 15#include "llvm/Transforms/Scalar.h" 16#include "llvm/IntrinsicInst.h" 17#include "llvm/Pass.h" 18#include "llvm/ADT/DenseMap.h" 19#include "llvm/ADT/Statistic.h" 20#include "llvm/ADT/STLExtras.h" 21#include "llvm/Analysis/ConstantFolding.h" 22#include "llvm/Transforms/Utils/BasicBlockUtils.h" 23#include "llvm/Transforms/Utils/Local.h" 24#include "llvm/Target/TargetData.h" 25#include "llvm/Support/CommandLine.h" 26#include "llvm/Support/Compiler.h" 27#include "llvm/Support/Debug.h" 28#include "llvm/ADT/SmallPtrSet.h" 29using namespace llvm; 30 31STATISTIC(NumThreads, "Number of jumps threaded"); 32STATISTIC(NumFolds, "Number of terminators folded"); 33 34static cl::opt<unsigned> 35Threshold("jump-threading-threshold", 36 cl::desc("Max block size to duplicate for jump threading"), 37 cl::init(6), cl::Hidden); 38 39namespace { 40 /// This pass performs 'jump threading', which looks at blocks that have 41 /// multiple predecessors and multiple successors. If one or more of the 42 /// predecessors of the block can be proven to always jump to one of the 43 /// successors, we forward the edge from the predecessor to the successor by 44 /// duplicating the contents of this block. 45 /// 46 /// An example of when this can occur is code like this: 47 /// 48 /// if () { ... 49 /// X = 4; 50 /// } 51 /// if (X < 3) { 52 /// 53 /// In this case, the unconditional branch at the end of the first if can be 54 /// revectored to the false side of the second if. 55 /// 56 class VISIBILITY_HIDDEN JumpThreading : public FunctionPass { 57 TargetData *TD; 58 public: 59 static char ID; // Pass identification 60 JumpThreading() : FunctionPass(&ID) {} 61 62 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 63 AU.addRequired<TargetData>(); 64 } 65 66 bool runOnFunction(Function &F); 67 bool ProcessBlock(BasicBlock *BB); 68 void ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB); 69 BasicBlock *FactorCommonPHIPreds(PHINode *PN, Constant *CstVal); 70 bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB); 71 bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB); 72 73 bool ProcessJumpOnPHI(PHINode *PN); 74 bool ProcessBranchOnLogical(Value *V, BasicBlock *BB, bool isAnd); 75 bool ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB); 76 77 bool SimplifyPartiallyRedundantLoad(LoadInst *LI); 78 }; 79} 80 81char JumpThreading::ID = 0; 82static RegisterPass<JumpThreading> 83X("jump-threading", "Jump Threading"); 84 85// Public interface to the Jump Threading pass 86FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); } 87 88/// runOnFunction - Top level algorithm. 89/// 90bool JumpThreading::runOnFunction(Function &F) { 91 DOUT << "Jump threading on function '" << F.getNameStart() << "'\n"; 92 TD = &getAnalysis<TargetData>(); 93 94 bool AnotherIteration = true, EverChanged = false; 95 while (AnotherIteration) { 96 AnotherIteration = false; 97 bool Changed = false; 98 for (Function::iterator I = F.begin(), E = F.end(); I != E;) { 99 BasicBlock *BB = I; 100 while (ProcessBlock(BB)) 101 Changed = true; 102 103 ++I; 104 105 // If the block is trivially dead, zap it. This eliminates the successor 106 // edges which simplifies the CFG. 107 if (pred_begin(BB) == pred_end(BB) && 108 BB != &BB->getParent()->getEntryBlock()) { 109 DOUT << " JT: Deleting dead block '" << BB->getNameStart() 110 << "' with terminator: " << *BB->getTerminator(); 111 DeleteDeadBlock(BB); 112 Changed = true; 113 } 114 } 115 AnotherIteration = Changed; 116 EverChanged |= Changed; 117 } 118 return EverChanged; 119} 120 121/// FactorCommonPHIPreds - If there are multiple preds with the same incoming 122/// value for the PHI, factor them together so we get one block to thread for 123/// the whole group. 124/// This is important for things like "phi i1 [true, true, false, true, x]" 125/// where we only need to clone the block for the true blocks once. 126/// 127BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Constant *CstVal) { 128 SmallVector<BasicBlock*, 16> CommonPreds; 129 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 130 if (PN->getIncomingValue(i) == CstVal) 131 CommonPreds.push_back(PN->getIncomingBlock(i)); 132 133 if (CommonPreds.size() == 1) 134 return CommonPreds[0]; 135 136 DOUT << " Factoring out " << CommonPreds.size() 137 << " common predecessors.\n"; 138 return SplitBlockPredecessors(PN->getParent(), 139 &CommonPreds[0], CommonPreds.size(), 140 ".thr_comm", this); 141} 142 143 144/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to 145/// thread across it. 146static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) { 147 /// Ignore PHI nodes, these will be flattened when duplication happens. 148 BasicBlock::const_iterator I = BB->getFirstNonPHI(); 149 150 // Sum up the cost of each instruction until we get to the terminator. Don't 151 // include the terminator because the copy won't include it. 152 unsigned Size = 0; 153 for (; !isa<TerminatorInst>(I); ++I) { 154 // Debugger intrinsics don't incur code size. 155 if (isa<DbgInfoIntrinsic>(I)) continue; 156 157 // If this is a pointer->pointer bitcast, it is free. 158 if (isa<BitCastInst>(I) && isa<PointerType>(I->getType())) 159 continue; 160 161 // All other instructions count for at least one unit. 162 ++Size; 163 164 // Calls are more expensive. If they are non-intrinsic calls, we model them 165 // as having cost of 4. If they are a non-vector intrinsic, we model them 166 // as having cost of 2 total, and if they are a vector intrinsic, we model 167 // them as having cost 1. 168 if (const CallInst *CI = dyn_cast<CallInst>(I)) { 169 if (!isa<IntrinsicInst>(CI)) 170 Size += 3; 171 else if (isa<VectorType>(CI->getType())) 172 Size += 1; 173 } 174 } 175 176 // Threading through a switch statement is particularly profitable. If this 177 // block ends in a switch, decrease its cost to make it more likely to happen. 178 if (isa<SwitchInst>(I)) 179 Size = Size > 6 ? Size-6 : 0; 180 181 return Size; 182} 183 184/// ProcessBlock - If there are any predecessors whose control can be threaded 185/// through to a successor, transform them now. 186bool JumpThreading::ProcessBlock(BasicBlock *BB) { 187 // If this block has a single predecessor, and if that pred has a single 188 // successor, merge the blocks. This encourages recursive jump threading 189 // because now the condition in this block can be threaded through 190 // predecessors of our predecessor block. 191 if (BasicBlock *SinglePred = BB->getSinglePredecessor()) 192 if (SinglePred->getTerminator()->getNumSuccessors() == 1 && 193 SinglePred != BB) { 194 // Remember if SinglePred was the entry block of the function. If so, we 195 // will need to move BB back to the entry position. 196 bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); 197 MergeBasicBlockIntoOnlyPred(BB); 198 199 if (isEntry && BB != &BB->getParent()->getEntryBlock()) 200 BB->moveBefore(&BB->getParent()->getEntryBlock()); 201 return true; 202 } 203 204 // See if this block ends with a branch or switch. If so, see if the 205 // condition is a phi node. If so, and if an entry of the phi node is a 206 // constant, we can thread the block. 207 Value *Condition; 208 if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { 209 // Can't thread an unconditional jump. 210 if (BI->isUnconditional()) return false; 211 Condition = BI->getCondition(); 212 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) 213 Condition = SI->getCondition(); 214 else 215 return false; // Must be an invoke. 216 217 // If the terminator of this block is branching on a constant, simplify the 218 // terminator to an unconditional branch. This can occur due to threading in 219 // other blocks. 220 if (isa<ConstantInt>(Condition)) { 221 DOUT << " In block '" << BB->getNameStart() 222 << "' folding terminator: " << *BB->getTerminator(); 223 ++NumFolds; 224 ConstantFoldTerminator(BB); 225 return true; 226 } 227 228 // If the terminator is branching on an undef, we can pick any of the 229 // successors to branch to. Since this is arbitrary, we pick the successor 230 // with the fewest predecessors. This should reduce the in-degree of the 231 // others. 232 if (isa<UndefValue>(Condition)) { 233 TerminatorInst *BBTerm = BB->getTerminator(); 234 unsigned MinSucc = 0; 235 BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc); 236 // Compute the successor with the minimum number of predecessors. 237 unsigned MinNumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); 238 for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) { 239 TestBB = BBTerm->getSuccessor(i); 240 unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); 241 if (NumPreds < MinNumPreds) 242 MinSucc = i; 243 } 244 245 // Fold the branch/switch. 246 for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) { 247 if (i == MinSucc) continue; 248 BBTerm->getSuccessor(i)->removePredecessor(BB); 249 } 250 251 DOUT << " In block '" << BB->getNameStart() 252 << "' folding undef terminator: " << *BBTerm; 253 BranchInst::Create(BBTerm->getSuccessor(MinSucc), BBTerm); 254 BBTerm->eraseFromParent(); 255 return true; 256 } 257 258 Instruction *CondInst = dyn_cast<Instruction>(Condition); 259 260 // If the condition is an instruction defined in another block, see if a 261 // predecessor has the same condition: 262 // br COND, BBX, BBY 263 // BBX: 264 // br COND, BBZ, BBW 265 if (!Condition->hasOneUse() && // Multiple uses. 266 (CondInst == 0 || CondInst->getParent() != BB)) { // Non-local definition. 267 pred_iterator PI = pred_begin(BB), E = pred_end(BB); 268 if (isa<BranchInst>(BB->getTerminator())) { 269 for (; PI != E; ++PI) 270 if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) 271 if (PBI->isConditional() && PBI->getCondition() == Condition && 272 ProcessBranchOnDuplicateCond(*PI, BB)) 273 return true; 274 } else { 275 assert(isa<SwitchInst>(BB->getTerminator()) && "Unknown jump terminator"); 276 for (; PI != E; ++PI) 277 if (SwitchInst *PSI = dyn_cast<SwitchInst>((*PI)->getTerminator())) 278 if (PSI->getCondition() == Condition && 279 ProcessSwitchOnDuplicateCond(*PI, BB)) 280 return true; 281 } 282 } 283 284 // If there is only a single predecessor of this block, nothing to fold. 285 if (BB->getSinglePredecessor()) 286 return false; 287 288 // All the rest of our checks depend on the condition being an instruction. 289 if (CondInst == 0) 290 return false; 291 292 // See if this is a phi node in the current block. 293 if (PHINode *PN = dyn_cast<PHINode>(CondInst)) 294 if (PN->getParent() == BB) 295 return ProcessJumpOnPHI(PN); 296 297 // If this is a conditional branch whose condition is and/or of a phi, try to 298 // simplify it. 299 if ((CondInst->getOpcode() == Instruction::And || 300 CondInst->getOpcode() == Instruction::Or) && 301 isa<BranchInst>(BB->getTerminator()) && 302 ProcessBranchOnLogical(CondInst, BB, 303 CondInst->getOpcode() == Instruction::And)) 304 return true; 305 306 // If we have "br (phi != 42)" and the phi node has any constant values as 307 // operands, we can thread through this block. 308 if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) 309 if (isa<PHINode>(CondCmp->getOperand(0)) && 310 isa<Constant>(CondCmp->getOperand(1)) && 311 ProcessBranchOnCompare(CondCmp, BB)) 312 return true; 313 314 // Check for some cases that are worth simplifying. Right now we want to look 315 // for loads that are used by a switch or by the condition for the branch. If 316 // we see one, check to see if it's partially redundant. If so, insert a PHI 317 // which can then be used to thread the values. 318 // 319 // This is particularly important because reg2mem inserts loads and stores all 320 // over the place, and this blocks jump threading if we don't zap them. 321 Value *SimplifyValue = CondInst; 322 if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue)) 323 if (isa<Constant>(CondCmp->getOperand(1))) 324 SimplifyValue = CondCmp->getOperand(0); 325 326 if (LoadInst *LI = dyn_cast<LoadInst>(SimplifyValue)) 327 if (SimplifyPartiallyRedundantLoad(LI)) 328 return true; 329 330 // TODO: If we have: "br (X > 0)" and we have a predecessor where we know 331 // "(X == 4)" thread through this block. 332 333 return false; 334} 335 336/// ProcessBranchOnDuplicateCond - We found a block and a predecessor of that 337/// block that jump on exactly the same condition. This means that we almost 338/// always know the direction of the edge in the DESTBB: 339/// PREDBB: 340/// br COND, DESTBB, BBY 341/// DESTBB: 342/// br COND, BBZ, BBW 343/// 344/// If DESTBB has multiple predecessors, we can't just constant fold the branch 345/// in DESTBB, we have to thread over it. 346bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB, 347 BasicBlock *BB) { 348 BranchInst *PredBI = cast<BranchInst>(PredBB->getTerminator()); 349 350 // If both successors of PredBB go to DESTBB, we don't know anything. We can 351 // fold the branch to an unconditional one, which allows other recursive 352 // simplifications. 353 bool BranchDir; 354 if (PredBI->getSuccessor(1) != BB) 355 BranchDir = true; 356 else if (PredBI->getSuccessor(0) != BB) 357 BranchDir = false; 358 else { 359 DOUT << " In block '" << PredBB->getNameStart() 360 << "' folding terminator: " << *PredBB->getTerminator(); 361 ++NumFolds; 362 ConstantFoldTerminator(PredBB); 363 return true; 364 } 365 366 BranchInst *DestBI = cast<BranchInst>(BB->getTerminator()); 367 368 // If the dest block has one predecessor, just fix the branch condition to a 369 // constant and fold it. 370 if (BB->getSinglePredecessor()) { 371 DOUT << " In block '" << BB->getNameStart() 372 << "' folding condition to '" << BranchDir << "': " 373 << *BB->getTerminator(); 374 ++NumFolds; 375 DestBI->setCondition(ConstantInt::get(Type::Int1Ty, BranchDir)); 376 ConstantFoldTerminator(BB); 377 return true; 378 } 379 380 // Otherwise we need to thread from PredBB to DestBB's successor which 381 // involves code duplication. Check to see if it is worth it. 382 unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); 383 if (JumpThreadCost > Threshold) { 384 DOUT << " Not threading BB '" << BB->getNameStart() 385 << "' - Cost is too high: " << JumpThreadCost << "\n"; 386 return false; 387 } 388 389 // Next, figure out which successor we are threading to. 390 BasicBlock *SuccBB = DestBI->getSuccessor(!BranchDir); 391 392 // If threading to the same block as we come from, we would infinite loop. 393 if (SuccBB == BB) { 394 DOUT << " Not threading BB '" << BB->getNameStart() 395 << "' - would thread to self!\n"; 396 return false; 397 } 398 399 // And finally, do it! 400 DOUT << " Threading edge from '" << PredBB->getNameStart() << "' to '" 401 << SuccBB->getNameStart() << "' with cost: " << JumpThreadCost 402 << ", across block:\n " 403 << *BB << "\n"; 404 405 ThreadEdge(BB, PredBB, SuccBB); 406 ++NumThreads; 407 return true; 408} 409 410/// ProcessSwitchOnDuplicateCond - We found a block and a predecessor of that 411/// block that switch on exactly the same condition. This means that we almost 412/// always know the direction of the edge in the DESTBB: 413/// PREDBB: 414/// switch COND [... DESTBB, BBY ... ] 415/// DESTBB: 416/// switch COND [... BBZ, BBW ] 417/// 418/// Optimizing switches like this is very important, because simplifycfg builds 419/// switches out of repeated 'if' conditions. 420bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, 421 BasicBlock *DestBB) { 422 // Can't thread edge to self. 423 if (PredBB == DestBB) 424 return false; 425 426 427 SwitchInst *PredSI = cast<SwitchInst>(PredBB->getTerminator()); 428 SwitchInst *DestSI = cast<SwitchInst>(DestBB->getTerminator()); 429 430 // There are a variety of optimizations that we can potentially do on these 431 // blocks: we order them from most to least preferable. 432 433 // If DESTBB *just* contains the switch, then we can forward edges from PREDBB 434 // directly to their destination. This does not introduce *any* code size 435 // growth. Skip debug info first. 436 BasicBlock::iterator BBI = DestBB->begin(); 437 while (isa<DbgInfoIntrinsic>(BBI)) 438 BBI++; 439 440 // FIXME: Thread if it just contains a PHI. 441 if (isa<SwitchInst>(BBI)) { 442 bool MadeChange = false; 443 // Ignore the default edge for now. 444 for (unsigned i = 1, e = DestSI->getNumSuccessors(); i != e; ++i) { 445 ConstantInt *DestVal = DestSI->getCaseValue(i); 446 BasicBlock *DestSucc = DestSI->getSuccessor(i); 447 448 // Okay, DestSI has a case for 'DestVal' that goes to 'DestSucc'. See if 449 // PredSI has an explicit case for it. If so, forward. If it is covered 450 // by the default case, we can't update PredSI. 451 unsigned PredCase = PredSI->findCaseValue(DestVal); 452 if (PredCase == 0) continue; 453 454 // If PredSI doesn't go to DestBB on this value, then it won't reach the 455 // case on this condition. 456 if (PredSI->getSuccessor(PredCase) != DestBB && 457 DestSI->getSuccessor(i) != DestBB) 458 continue; 459 460 // Otherwise, we're safe to make the change. Make sure that the edge from 461 // DestSI to DestSucc is not critical and has no PHI nodes. 462 DOUT << "FORWARDING EDGE " << *DestVal << " FROM: " << *PredSI; 463 DOUT << "THROUGH: " << *DestSI; 464 465 // If the destination has PHI nodes, just split the edge for updating 466 // simplicity. 467 if (isa<PHINode>(DestSucc->begin()) && !DestSucc->getSinglePredecessor()){ 468 SplitCriticalEdge(DestSI, i, this); 469 DestSucc = DestSI->getSuccessor(i); 470 } 471 FoldSingleEntryPHINodes(DestSucc); 472 PredSI->setSuccessor(PredCase, DestSucc); 473 MadeChange = true; 474 } 475 476 if (MadeChange) 477 return true; 478 } 479 480 return false; 481} 482 483 484/// SimplifyPartiallyRedundantLoad - If LI is an obviously partially redundant 485/// load instruction, eliminate it by replacing it with a PHI node. This is an 486/// important optimization that encourages jump threading, and needs to be run 487/// interlaced with other jump threading tasks. 488bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { 489 // Don't hack volatile loads. 490 if (LI->isVolatile()) return false; 491 492 // If the load is defined in a block with exactly one predecessor, it can't be 493 // partially redundant. 494 BasicBlock *LoadBB = LI->getParent(); 495 if (LoadBB->getSinglePredecessor()) 496 return false; 497 498 Value *LoadedPtr = LI->getOperand(0); 499 500 // If the loaded operand is defined in the LoadBB, it can't be available. 501 // FIXME: Could do PHI translation, that would be fun :) 502 if (Instruction *PtrOp = dyn_cast<Instruction>(LoadedPtr)) 503 if (PtrOp->getParent() == LoadBB) 504 return false; 505 506 // Scan a few instructions up from the load, to see if it is obviously live at 507 // the entry to its block. 508 BasicBlock::iterator BBIt = LI; 509 510 if (Value *AvailableVal = FindAvailableLoadedValue(LoadedPtr, LoadBB, 511 BBIt, 6)) { 512 // If the value if the load is locally available within the block, just use 513 // it. This frequently occurs for reg2mem'd allocas. 514 //cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n"; 515 516 // If the returned value is the load itself, replace with an undef. This can 517 // only happen in dead loops. 518 if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType()); 519 LI->replaceAllUsesWith(AvailableVal); 520 LI->eraseFromParent(); 521 return true; 522 } 523 524 // Otherwise, if we scanned the whole block and got to the top of the block, 525 // we know the block is locally transparent to the load. If not, something 526 // might clobber its value. 527 if (BBIt != LoadBB->begin()) 528 return false; 529 530 531 SmallPtrSet<BasicBlock*, 8> PredsScanned; 532 typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy; 533 AvailablePredsTy AvailablePreds; 534 BasicBlock *OneUnavailablePred = 0; 535 536 // If we got here, the loaded value is transparent through to the start of the 537 // block. Check to see if it is available in any of the predecessor blocks. 538 for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB); 539 PI != PE; ++PI) { 540 BasicBlock *PredBB = *PI; 541 542 // If we already scanned this predecessor, skip it. 543 if (!PredsScanned.insert(PredBB)) 544 continue; 545 546 // Scan the predecessor to see if the value is available in the pred. 547 BBIt = PredBB->end(); 548 Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6); 549 if (!PredAvailable) { 550 OneUnavailablePred = PredBB; 551 continue; 552 } 553 554 // If so, this load is partially redundant. Remember this info so that we 555 // can create a PHI node. 556 AvailablePreds.push_back(std::make_pair(PredBB, PredAvailable)); 557 } 558 559 // If the loaded value isn't available in any predecessor, it isn't partially 560 // redundant. 561 if (AvailablePreds.empty()) return false; 562 563 // Okay, the loaded value is available in at least one (and maybe all!) 564 // predecessors. If the value is unavailable in more than one unique 565 // predecessor, we want to insert a merge block for those common predecessors. 566 // This ensures that we only have to insert one reload, thus not increasing 567 // code size. 568 BasicBlock *UnavailablePred = 0; 569 570 // If there is exactly one predecessor where the value is unavailable, the 571 // already computed 'OneUnavailablePred' block is it. If it ends in an 572 // unconditional branch, we know that it isn't a critical edge. 573 if (PredsScanned.size() == AvailablePreds.size()+1 && 574 OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) { 575 UnavailablePred = OneUnavailablePred; 576 } else if (PredsScanned.size() != AvailablePreds.size()) { 577 // Otherwise, we had multiple unavailable predecessors or we had a critical 578 // edge from the one. 579 SmallVector<BasicBlock*, 8> PredsToSplit; 580 SmallPtrSet<BasicBlock*, 8> AvailablePredSet; 581 582 for (unsigned i = 0, e = AvailablePreds.size(); i != e; ++i) 583 AvailablePredSet.insert(AvailablePreds[i].first); 584 585 // Add all the unavailable predecessors to the PredsToSplit list. 586 for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB); 587 PI != PE; ++PI) 588 if (!AvailablePredSet.count(*PI)) 589 PredsToSplit.push_back(*PI); 590 591 // Split them out to their own block. 592 UnavailablePred = 593 SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(), 594 "thread-split", this); 595 } 596 597 // If the value isn't available in all predecessors, then there will be 598 // exactly one where it isn't available. Insert a load on that edge and add 599 // it to the AvailablePreds list. 600 if (UnavailablePred) { 601 assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 && 602 "Can't handle critical edge here!"); 603 Value *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", 604 UnavailablePred->getTerminator()); 605 AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal)); 606 } 607 608 // Now we know that each predecessor of this block has a value in 609 // AvailablePreds, sort them for efficient access as we're walking the preds. 610 array_pod_sort(AvailablePreds.begin(), AvailablePreds.end()); 611 612 // Create a PHI node at the start of the block for the PRE'd load value. 613 PHINode *PN = PHINode::Create(LI->getType(), "", LoadBB->begin()); 614 PN->takeName(LI); 615 616 // Insert new entries into the PHI for each predecessor. A single block may 617 // have multiple entries here. 618 for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E; 619 ++PI) { 620 AvailablePredsTy::iterator I = 621 std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(), 622 std::make_pair(*PI, (Value*)0)); 623 624 assert(I != AvailablePreds.end() && I->first == *PI && 625 "Didn't find entry for predecessor!"); 626 627 PN->addIncoming(I->second, I->first); 628 } 629 630 //cerr << "PRE: " << *LI << *PN << "\n"; 631 632 LI->replaceAllUsesWith(PN); 633 LI->eraseFromParent(); 634 635 return true; 636} 637 638 639/// ProcessJumpOnPHI - We have a conditional branch of switch on a PHI node in 640/// the current block. See if there are any simplifications we can do based on 641/// inputs to the phi node. 642/// 643bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) { 644 // See if the phi node has any constant values. If so, we can determine where 645 // the corresponding predecessor will branch. 646 ConstantInt *PredCst = 0; 647 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 648 if ((PredCst = dyn_cast<ConstantInt>(PN->getIncomingValue(i)))) 649 break; 650 651 // If no incoming value has a constant, we don't know the destination of any 652 // predecessors. 653 if (PredCst == 0) 654 return false; 655 656 // See if the cost of duplicating this block is low enough. 657 BasicBlock *BB = PN->getParent(); 658 unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); 659 if (JumpThreadCost > Threshold) { 660 DOUT << " Not threading BB '" << BB->getNameStart() 661 << "' - Cost is too high: " << JumpThreadCost << "\n"; 662 return false; 663 } 664 665 // If so, we can actually do this threading. Merge any common predecessors 666 // that will act the same. 667 BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst); 668 669 // Next, figure out which successor we are threading to. 670 BasicBlock *SuccBB; 671 if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) 672 SuccBB = BI->getSuccessor(PredCst == ConstantInt::getFalse()); 673 else { 674 SwitchInst *SI = cast<SwitchInst>(BB->getTerminator()); 675 SuccBB = SI->getSuccessor(SI->findCaseValue(PredCst)); 676 } 677 678 // If threading to the same block as we come from, we would infinite loop. 679 if (SuccBB == BB) { 680 DOUT << " Not threading BB '" << BB->getNameStart() 681 << "' - would thread to self!\n"; 682 return false; 683 } 684 685 // And finally, do it! 686 DOUT << " Threading edge from '" << PredBB->getNameStart() << "' to '" 687 << SuccBB->getNameStart() << "' with cost: " << JumpThreadCost 688 << ", across block:\n " 689 << *BB << "\n"; 690 691 ThreadEdge(BB, PredBB, SuccBB); 692 ++NumThreads; 693 return true; 694} 695 696/// ProcessJumpOnLogicalPHI - PN's basic block contains a conditional branch 697/// whose condition is an AND/OR where one side is PN. If PN has constant 698/// operands that permit us to evaluate the condition for some operand, thread 699/// through the block. For example with: 700/// br (and X, phi(Y, Z, false)) 701/// the predecessor corresponding to the 'false' will always jump to the false 702/// destination of the branch. 703/// 704bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB, 705 bool isAnd) { 706 // If this is a binary operator tree of the same AND/OR opcode, check the 707 // LHS/RHS. 708 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) 709 if ((isAnd && BO->getOpcode() == Instruction::And) || 710 (!isAnd && BO->getOpcode() == Instruction::Or)) { 711 if (ProcessBranchOnLogical(BO->getOperand(0), BB, isAnd)) 712 return true; 713 if (ProcessBranchOnLogical(BO->getOperand(1), BB, isAnd)) 714 return true; 715 } 716 717 // If this isn't a PHI node, we can't handle it. 718 PHINode *PN = dyn_cast<PHINode>(V); 719 if (!PN || PN->getParent() != BB) return false; 720 721 // We can only do the simplification for phi nodes of 'false' with AND or 722 // 'true' with OR. See if we have any entries in the phi for this. 723 unsigned PredNo = ~0U; 724 ConstantInt *PredCst = ConstantInt::get(Type::Int1Ty, !isAnd); 725 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 726 if (PN->getIncomingValue(i) == PredCst) { 727 PredNo = i; 728 break; 729 } 730 } 731 732 // If no match, bail out. 733 if (PredNo == ~0U) 734 return false; 735 736 // See if the cost of duplicating this block is low enough. 737 unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); 738 if (JumpThreadCost > Threshold) { 739 DOUT << " Not threading BB '" << BB->getNameStart() 740 << "' - Cost is too high: " << JumpThreadCost << "\n"; 741 return false; 742 } 743 744 // If so, we can actually do this threading. Merge any common predecessors 745 // that will act the same. 746 BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst); 747 748 // Next, figure out which successor we are threading to. If this was an AND, 749 // the constant must be FALSE, and we must be targeting the 'false' block. 750 // If this is an OR, the constant must be TRUE, and we must be targeting the 751 // 'true' block. 752 BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(isAnd); 753 754 // If threading to the same block as we come from, we would infinite loop. 755 if (SuccBB == BB) { 756 DOUT << " Not threading BB '" << BB->getNameStart() 757 << "' - would thread to self!\n"; 758 return false; 759 } 760 761 // And finally, do it! 762 DOUT << " Threading edge through bool from '" << PredBB->getNameStart() 763 << "' to '" << SuccBB->getNameStart() << "' with cost: " 764 << JumpThreadCost << ", across block:\n " 765 << *BB << "\n"; 766 767 ThreadEdge(BB, PredBB, SuccBB); 768 ++NumThreads; 769 return true; 770} 771 772/// ProcessBranchOnCompare - We found a branch on a comparison between a phi 773/// node and a constant. If the PHI node contains any constants as inputs, we 774/// can fold the compare for that edge and thread through it. 775bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) { 776 PHINode *PN = cast<PHINode>(Cmp->getOperand(0)); 777 Constant *RHS = cast<Constant>(Cmp->getOperand(1)); 778 779 // If the phi isn't in the current block, an incoming edge to this block 780 // doesn't control the destination. 781 if (PN->getParent() != BB) 782 return false; 783 784 // We can do this simplification if any comparisons fold to true or false. 785 // See if any do. 786 Constant *PredCst = 0; 787 bool TrueDirection = false; 788 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 789 PredCst = dyn_cast<Constant>(PN->getIncomingValue(i)); 790 if (PredCst == 0) continue; 791 792 Constant *Res; 793 if (ICmpInst *ICI = dyn_cast<ICmpInst>(Cmp)) 794 Res = ConstantExpr::getICmp(ICI->getPredicate(), PredCst, RHS); 795 else 796 Res = ConstantExpr::getFCmp(cast<FCmpInst>(Cmp)->getPredicate(), 797 PredCst, RHS); 798 // If this folded to a constant expr, we can't do anything. 799 if (ConstantInt *ResC = dyn_cast<ConstantInt>(Res)) { 800 TrueDirection = ResC->getZExtValue(); 801 break; 802 } 803 // If this folded to undef, just go the false way. 804 if (isa<UndefValue>(Res)) { 805 TrueDirection = false; 806 break; 807 } 808 809 // Otherwise, we can't fold this input. 810 PredCst = 0; 811 } 812 813 // If no match, bail out. 814 if (PredCst == 0) 815 return false; 816 817 // See if the cost of duplicating this block is low enough. 818 unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); 819 if (JumpThreadCost > Threshold) { 820 DOUT << " Not threading BB '" << BB->getNameStart() 821 << "' - Cost is too high: " << JumpThreadCost << "\n"; 822 return false; 823 } 824 825 // If so, we can actually do this threading. Merge any common predecessors 826 // that will act the same. 827 BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst); 828 829 // Next, get our successor. 830 BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(!TrueDirection); 831 832 // If threading to the same block as we come from, we would infinite loop. 833 if (SuccBB == BB) { 834 DOUT << " Not threading BB '" << BB->getNameStart() 835 << "' - would thread to self!\n"; 836 return false; 837 } 838 839 840 // And finally, do it! 841 DOUT << " Threading edge through bool from '" << PredBB->getNameStart() 842 << "' to '" << SuccBB->getNameStart() << "' with cost: " 843 << JumpThreadCost << ", across block:\n " 844 << *BB << "\n"; 845 846 ThreadEdge(BB, PredBB, SuccBB); 847 ++NumThreads; 848 return true; 849} 850 851 852/// ThreadEdge - We have decided that it is safe and profitable to thread an 853/// edge from PredBB to SuccBB across BB. Transform the IR to reflect this 854/// change. 855void JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, 856 BasicBlock *SuccBB) { 857 858 // Jump Threading can not update SSA properties correctly if the values 859 // defined in the duplicated block are used outside of the block itself. For 860 // this reason, we spill all values that are used outside of BB to the stack. 861 for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { 862 if (!I->isUsedOutsideOfBlock(BB)) 863 continue; 864 865 // We found a use of I outside of BB. Create a new stack slot to 866 // break this inter-block usage pattern. 867 DemoteRegToStack(*I); 868 } 869 870 // We are going to have to map operands from the original BB block to the new 871 // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to 872 // account for entry from PredBB. 873 DenseMap<Instruction*, Value*> ValueMapping; 874 875 BasicBlock *NewBB = 876 BasicBlock::Create(BB->getName()+".thread", BB->getParent(), BB); 877 NewBB->moveAfter(PredBB); 878 879 BasicBlock::iterator BI = BB->begin(); 880 for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) 881 ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB); 882 883 // Clone the non-phi instructions of BB into NewBB, keeping track of the 884 // mapping and using it to remap operands in the cloned instructions. 885 for (; !isa<TerminatorInst>(BI); ++BI) { 886 Instruction *New = BI->clone(); 887 New->setName(BI->getNameStart()); 888 NewBB->getInstList().push_back(New); 889 ValueMapping[BI] = New; 890 891 // Remap operands to patch up intra-block references. 892 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) 893 if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) 894 if (Value *Remapped = ValueMapping[Inst]) 895 New->setOperand(i, Remapped); 896 } 897 898 // We didn't copy the terminator from BB over to NewBB, because there is now 899 // an unconditional jump to SuccBB. Insert the unconditional jump. 900 BranchInst::Create(SuccBB, NewBB); 901 902 // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the 903 // PHI nodes for NewBB now. 904 for (BasicBlock::iterator PNI = SuccBB->begin(); isa<PHINode>(PNI); ++PNI) { 905 PHINode *PN = cast<PHINode>(PNI); 906 // Ok, we have a PHI node. Figure out what the incoming value was for the 907 // DestBlock. 908 Value *IV = PN->getIncomingValueForBlock(BB); 909 910 // Remap the value if necessary. 911 if (Instruction *Inst = dyn_cast<Instruction>(IV)) 912 if (Value *MappedIV = ValueMapping[Inst]) 913 IV = MappedIV; 914 PN->addIncoming(IV, NewBB); 915 } 916 917 // Ok, NewBB is good to go. Update the terminator of PredBB to jump to 918 // NewBB instead of BB. This eliminates predecessors from BB, which requires 919 // us to simplify any PHI nodes in BB. 920 TerminatorInst *PredTerm = PredBB->getTerminator(); 921 for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) 922 if (PredTerm->getSuccessor(i) == BB) { 923 BB->removePredecessor(PredBB); 924 PredTerm->setSuccessor(i, NewBB); 925 } 926 927 // At this point, the IR is fully up to date and consistent. Do a quick scan 928 // over the new instructions and zap any that are constants or dead. This 929 // frequently happens because of phi translation. 930 BI = NewBB->begin(); 931 for (BasicBlock::iterator E = NewBB->end(); BI != E; ) { 932 Instruction *Inst = BI++; 933 if (Constant *C = ConstantFoldInstruction(Inst, TD)) { 934 Inst->replaceAllUsesWith(C); 935 Inst->eraseFromParent(); 936 continue; 937 } 938 939 RecursivelyDeleteTriviallyDeadInstructions(Inst); 940 } 941} 942