JumpThreading.cpp revision 69e067fdd86d34cb81ccdffb82415b4f89144218
1//===- JumpThreading.cpp - Thread control through conditional blocks ------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the Jump Threading pass. 11// 12//===----------------------------------------------------------------------===// 13 14#define DEBUG_TYPE "jump-threading" 15#include "llvm/Transforms/Scalar.h" 16#include "llvm/IntrinsicInst.h" 17#include "llvm/Pass.h" 18#include "llvm/ADT/DenseMap.h" 19#include "llvm/ADT/Statistic.h" 20#include "llvm/Transforms/Utils/BasicBlockUtils.h" 21#include "llvm/Transforms/Utils/Local.h" 22#include "llvm/Support/CommandLine.h" 23#include "llvm/Support/Compiler.h" 24#include "llvm/Support/Debug.h" 25#include "llvm/ADT/SmallPtrSet.h" 26using namespace llvm; 27 28STATISTIC(NumThreads, "Number of jumps threaded"); 29STATISTIC(NumFolds, "Number of terminators folded"); 30 31static cl::opt<unsigned> 32Threshold("jump-threading-threshold", 33 cl::desc("Max block size to duplicate for jump threading"), 34 cl::init(6), cl::Hidden); 35 36namespace { 37 /// This pass performs 'jump threading', which looks at blocks that have 38 /// multiple predecessors and multiple successors. If one or more of the 39 /// predecessors of the block can be proven to always jump to one of the 40 /// successors, we forward the edge from the predecessor to the successor by 41 /// duplicating the contents of this block. 42 /// 43 /// An example of when this can occur is code like this: 44 /// 45 /// if () { ... 46 /// X = 4; 47 /// } 48 /// if (X < 3) { 49 /// 50 /// In this case, the unconditional branch at the end of the first if can be 51 /// revectored to the false side of the second if. 52 /// 53 class VISIBILITY_HIDDEN JumpThreading : public FunctionPass { 54 public: 55 static char ID; // Pass identification 56 JumpThreading() : FunctionPass(&ID) {} 57 58 bool runOnFunction(Function &F); 59 bool ThreadBlock(BasicBlock *BB); 60 void ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB); 61 BasicBlock *FactorCommonPHIPreds(PHINode *PN, Constant *CstVal); 62 63 bool ProcessJumpOnPHI(PHINode *PN); 64 bool ProcessBranchOnLogical(Value *V, BasicBlock *BB, bool isAnd); 65 bool ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB); 66 67 bool SimplifyPartiallyRedundantLoad(LoadInst *LI); 68 }; 69} 70 71char JumpThreading::ID = 0; 72static RegisterPass<JumpThreading> 73X("jump-threading", "Jump Threading"); 74 75// Public interface to the Jump Threading pass 76FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); } 77 78/// runOnFunction - Top level algorithm. 79/// 80bool JumpThreading::runOnFunction(Function &F) { 81 DOUT << "Jump threading on function '" << F.getNameStart() << "'\n"; 82 83 bool AnotherIteration = true, EverChanged = false; 84 while (AnotherIteration) { 85 AnotherIteration = false; 86 bool Changed = false; 87 for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) 88 while (ThreadBlock(I)) 89 Changed = true; 90 AnotherIteration = Changed; 91 EverChanged |= Changed; 92 } 93 return EverChanged; 94} 95 96/// FactorCommonPHIPreds - If there are multiple preds with the same incoming 97/// value for the PHI, factor them together so we get one block to thread for 98/// the whole group. 99/// This is important for things like "phi i1 [true, true, false, true, x]" 100/// where we only need to clone the block for the true blocks once. 101/// 102BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Constant *CstVal) { 103 SmallVector<BasicBlock*, 16> CommonPreds; 104 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 105 if (PN->getIncomingValue(i) == CstVal) 106 CommonPreds.push_back(PN->getIncomingBlock(i)); 107 108 if (CommonPreds.size() == 1) 109 return CommonPreds[0]; 110 111 DOUT << " Factoring out " << CommonPreds.size() 112 << " common predecessors.\n"; 113 return SplitBlockPredecessors(PN->getParent(), 114 &CommonPreds[0], CommonPreds.size(), 115 ".thr_comm", this); 116} 117 118 119/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to 120/// thread across it. 121static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) { 122 /// Ignore PHI nodes, these will be flattened when duplication happens. 123 BasicBlock::const_iterator I = BB->getFirstNonPHI(); 124 125 // Sum up the cost of each instruction until we get to the terminator. Don't 126 // include the terminator because the copy won't include it. 127 unsigned Size = 0; 128 for (; !isa<TerminatorInst>(I); ++I) { 129 // Debugger intrinsics don't incur code size. 130 if (isa<DbgInfoIntrinsic>(I)) continue; 131 132 // If this is a pointer->pointer bitcast, it is free. 133 if (isa<BitCastInst>(I) && isa<PointerType>(I->getType())) 134 continue; 135 136 // All other instructions count for at least one unit. 137 ++Size; 138 139 // Calls are more expensive. If they are non-intrinsic calls, we model them 140 // as having cost of 4. If they are a non-vector intrinsic, we model them 141 // as having cost of 2 total, and if they are a vector intrinsic, we model 142 // them as having cost 1. 143 if (const CallInst *CI = dyn_cast<CallInst>(I)) { 144 if (!isa<IntrinsicInst>(CI)) 145 Size += 3; 146 else if (isa<VectorType>(CI->getType())) 147 Size += 1; 148 } 149 } 150 151 // Threading through a switch statement is particularly profitable. If this 152 // block ends in a switch, decrease its cost to make it more likely to happen. 153 if (isa<SwitchInst>(I)) 154 Size = Size > 6 ? Size-6 : 0; 155 156 return Size; 157} 158 159/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its 160/// predecessor is known to have one successor (DestBB!). Eliminate the edge 161/// between them, moving the instructions in the predecessor into DestBB and 162/// deleting the predecessor block. 163/// 164/// FIXME: Move to TransformUtils to share with simplifycfg and codegenprepare. 165static void MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB) { 166 // If BB has single-entry PHI nodes, fold them. 167 while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { 168 Value *NewVal = PN->getIncomingValue(0); 169 // Replace self referencing PHI with undef, it must be dead. 170 if (NewVal == PN) NewVal = UndefValue::get(PN->getType()); 171 PN->replaceAllUsesWith(NewVal); 172 PN->eraseFromParent(); 173 } 174 175 BasicBlock *PredBB = DestBB->getSinglePredecessor(); 176 assert(PredBB && "Block doesn't have a single predecessor!"); 177 178 // Splice all the instructions from PredBB to DestBB. 179 PredBB->getTerminator()->eraseFromParent(); 180 DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList()); 181 182 // Anything that branched to PredBB now branches to DestBB. 183 PredBB->replaceAllUsesWith(DestBB); 184 185 // Nuke BB. 186 PredBB->eraseFromParent(); 187} 188 189 190/// ThreadBlock - If there are any predecessors whose control can be threaded 191/// through to a successor, transform them now. 192bool JumpThreading::ThreadBlock(BasicBlock *BB) { 193 // If this block has a single predecessor, and if that pred has a single 194 // successor, merge the blocks. This encourages recursive jump threading 195 // because now the condition in this block can be threaded through 196 // predecessors of our predecessor block. 197 if (BasicBlock *SinglePred = BB->getSinglePredecessor()) 198 if (SinglePred->getTerminator()->getNumSuccessors() == 1) { 199 MergeBasicBlockIntoOnlyPred(BB); 200 return true; 201 } 202 203 // See if this block ends with a branch or switch. If so, see if the 204 // condition is a phi node. If so, and if an entry of the phi node is a 205 // constant, we can thread the block. 206 Value *Condition; 207 if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { 208 // Can't thread an unconditional jump. 209 if (BI->isUnconditional()) return false; 210 Condition = BI->getCondition(); 211 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) 212 Condition = SI->getCondition(); 213 else 214 return false; // Must be an invoke. 215 216 // If the terminator of this block is branching on a constant, simplify the 217 // terminator to an unconditional branch. This can occur due to threading in 218 // other blocks. 219 if (isa<ConstantInt>(Condition)) { 220 DOUT << " In block '" << BB->getNameStart() 221 << "' folding terminator: " << *BB->getTerminator(); 222 ++NumFolds; 223 ConstantFoldTerminator(BB); 224 return true; 225 } 226 227 // If there is only a single predecessor of this block, nothing to fold. 228 if (BB->getSinglePredecessor()) 229 return false; 230 231 // See if this is a phi node in the current block. 232 PHINode *PN = dyn_cast<PHINode>(Condition); 233 if (PN && PN->getParent() == BB) 234 return ProcessJumpOnPHI(PN); 235 236 // If this is a conditional branch whose condition is and/or of a phi, try to 237 // simplify it. 238 if (BinaryOperator *CondI = dyn_cast<BinaryOperator>(Condition)) { 239 if ((CondI->getOpcode() == Instruction::And || 240 CondI->getOpcode() == Instruction::Or) && 241 isa<BranchInst>(BB->getTerminator()) && 242 ProcessBranchOnLogical(CondI, BB, 243 CondI->getOpcode() == Instruction::And)) 244 return true; 245 } 246 247 // If we have "br (phi != 42)" and the phi node has any constant values as 248 // operands, we can thread through this block. 249 if (CmpInst *CondCmp = dyn_cast<CmpInst>(Condition)) 250 if (isa<PHINode>(CondCmp->getOperand(0)) && 251 isa<Constant>(CondCmp->getOperand(1)) && 252 ProcessBranchOnCompare(CondCmp, BB)) 253 return true; 254 255 // Check for some cases that are worth simplifying. Right now we want to look 256 // for loads that are used by a switch or by the condition for the branch. If 257 // we see one, check to see if it's partially redundant. If so, insert a PHI 258 // which can then be used to thread the values. 259 // 260 // This is particularly important because reg2mem inserts loads and stores all 261 // over the place, and this blocks jump threading if we don't zap them. 262 Value *SimplifyValue = Condition; 263 if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue)) 264 if (isa<Constant>(CondCmp->getOperand(1))) 265 SimplifyValue = CondCmp->getOperand(0); 266 267 if (LoadInst *LI = dyn_cast<LoadInst>(SimplifyValue)) 268 if (SimplifyPartiallyRedundantLoad(LI)) 269 return true; 270 271 // TODO: If we have: "br (X > 0)" and we have a predecessor where we know 272 // "(X == 4)" thread through this block. 273 274 return false; 275} 276 277 278/// FindAvailableLoadedValue - Scan backwards from ScanFrom checking to see if 279/// we have the value at the memory address *Ptr locally available within a 280/// small number of instructions. If the value is available, return it. 281/// 282/// If not, return the iterator for the last validated instruction that the 283/// value would be live through. If we scanned the entire block, ScanFrom would 284/// be left at begin(). 285/// 286/// FIXME: Move this to transform utils and use from 287/// InstCombiner::visitLoadInst. It would also be nice to optionally take AA so 288/// that GVN could do this. 289static Value *FindAvailableLoadedValue(Value *Ptr, 290 BasicBlock *ScanBB, 291 BasicBlock::iterator &ScanFrom) { 292 293 unsigned NumToScan = 6; 294 while (ScanFrom != ScanBB->begin()) { 295 // Don't scan huge blocks. 296 if (--NumToScan == 0) return 0; 297 298 Instruction *Inst = --ScanFrom; 299 300 // If this is a load of Ptr, the loaded value is available. 301 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) 302 if (LI->getOperand(0) == Ptr) 303 return LI; 304 305 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 306 // If this is a store through Ptr, the value is available! 307 if (SI->getOperand(1) == Ptr) 308 return SI->getOperand(0); 309 310 // If Ptr is an alloca and this is a store to a different alloca, ignore 311 // the store. This is a trivial form of alias analysis that is important 312 // for reg2mem'd code. 313 if ((isa<AllocaInst>(Ptr) || isa<GlobalVariable>(Ptr)) && 314 (isa<AllocaInst>(SI->getOperand(1)) || 315 isa<GlobalVariable>(SI->getOperand(1)))) 316 continue; 317 318 // Otherwise the store that may or may not alias the pointer, bail out. 319 ++ScanFrom; 320 return 0; 321 } 322 323 324 // If this is some other instruction that may clobber Ptr, bail out. 325 if (Inst->mayWriteToMemory()) { 326 // May modify the pointer, bail out. 327 ++ScanFrom; 328 return 0; 329 } 330 } 331 332 // Got to the start of the block, we didn't find it, but are done for this 333 // block. 334 return 0; 335} 336 337 338/// SimplifyPartiallyRedundantLoad - If LI is an obviously partially redundant 339/// load instruction, eliminate it by replacing it with a PHI node. This is an 340/// important optimization that encourages jump threading, and needs to be run 341/// interlaced with other jump threading tasks. 342bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { 343 // Don't hack volatile loads. 344 if (LI->isVolatile()) return false; 345 346 // If the load is defined in a block with exactly one predecessor, it can't be 347 // partially redundant. 348 BasicBlock *LoadBB = LI->getParent(); 349 if (LoadBB->getSinglePredecessor()) 350 return false; 351 352 Value *LoadedPtr = LI->getOperand(0); 353 354 // If the loaded operand is defined in the LoadBB, it can't be available. 355 // FIXME: Could do PHI translation, that would be fun :) 356 if (Instruction *PtrOp = dyn_cast<Instruction>(LoadedPtr)) 357 if (PtrOp->getParent() == LoadBB) 358 return false; 359 360 // Scan a few instructions up from the load, to see if it is obviously live at 361 // the entry to its block. 362 BasicBlock::iterator BBIt = LI; 363 364 if (Value *AvailableVal = FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt)) { 365 // If the value if the load is locally available within the block, just use 366 // it. This frequently occurs for reg2mem'd allocas. 367 //cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n"; 368 LI->replaceAllUsesWith(AvailableVal); 369 LI->eraseFromParent(); 370 return true; 371 } 372 373 // Otherwise, if we scanned the whole block and got to the top of the block, 374 // we know the block is locally transparent to the load. If not, something 375 // might clobber its value. 376 if (BBIt != LoadBB->begin()) 377 return false; 378 379 380 SmallPtrSet<BasicBlock*, 8> PredsScanned; 381 typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy; 382 AvailablePredsTy AvailablePreds; 383 BasicBlock *OneUnavailablePred = 0; 384 385 // If we got here, the loaded value is transparent through to the start of the 386 // block. Check to see if it is available in any of the predecessor blocks. 387 for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB); 388 PI != PE; ++PI) { 389 BasicBlock *PredBB = *PI; 390 391 // If we already scanned this predecessor, skip it. 392 if (!PredsScanned.insert(PredBB)) 393 continue; 394 395 // Scan the predecessor to see if the value is available in the pred. 396 BBIt = PredBB->end(); 397 Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt); 398 if (!PredAvailable) { 399 OneUnavailablePred = PredBB; 400 continue; 401 } 402 403 // If so, this load is partially redundant. Remember this info so that we 404 // can create a PHI node. 405 AvailablePreds.push_back(std::make_pair(PredBB, PredAvailable)); 406 } 407 408 // If the loaded value isn't available in any predecessor, it isn't partially 409 // redundant. 410 if (AvailablePreds.empty()) return false; 411 412 // Okay, the loaded value is available in at least one (and maybe all!) 413 // predecessors. If the value is unavailable in more than one unique 414 // predecessor, we want to insert a merge block for those common predecessors. 415 // This ensures that we only have to insert one reload, thus not increasing 416 // code size. 417 BasicBlock *UnavailablePred = 0; 418 419 // If there is exactly one predecessor where the value is unavailable, the 420 // already computed 'OneUnavailablePred' block is it. If it ends in an 421 // unconditional branch, we know that it isn't a critical edge. 422 if (PredsScanned.size() == AvailablePreds.size()+1 && 423 OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) { 424 UnavailablePred = OneUnavailablePred; 425 } else if (PredsScanned.size() != AvailablePreds.size()) { 426 // Otherwise, we had multiple unavailable predecessors or we had a critical 427 // edge from the one. 428 SmallVector<BasicBlock*, 8> PredsToSplit; 429 SmallPtrSet<BasicBlock*, 8> AvailablePredSet; 430 431 for (unsigned i = 0, e = AvailablePreds.size(); i != e; ++i) 432 AvailablePredSet.insert(AvailablePreds[i].first); 433 434 // Add all the unavailable predecessors to the PredsToSplit list. 435 for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB); 436 PI != PE; ++PI) 437 if (!AvailablePredSet.count(*PI)) 438 PredsToSplit.push_back(*PI); 439 440 // Split them out to their own block. 441 UnavailablePred = 442 SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(), 443 "thread-split", this); 444 } 445 446 // If the value isn't available in all predecessors, then there will be 447 // exactly one where it isn't available. Insert a load on that edge and add 448 // it to the AvailablePreds list. 449 if (UnavailablePred) { 450 assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 && 451 "Can't handle critical edge here!"); 452 Value *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", 453 UnavailablePred->getTerminator()); 454 AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal)); 455 } 456 457 // Now we know that each predecessor of this block has a value in 458 // AvailablePreds, sort them for efficient access as we're walking the preds. 459 std::sort(AvailablePreds.begin(), AvailablePreds.end()); 460 461 // Create a PHI node at the start of the block for the PRE'd load value. 462 PHINode *PN = PHINode::Create(LI->getType(), "", LoadBB->begin()); 463 PN->takeName(LI); 464 465 // Insert new entries into the PHI for each predecessor. A single block may 466 // have multiple entries here. 467 for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E; 468 ++PI) { 469 AvailablePredsTy::iterator I = 470 std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(), 471 std::make_pair(*PI, (Value*)0)); 472 473 assert(I != AvailablePreds.end() && I->first == *PI && 474 "Didn't find entry for predecessor!"); 475 476 PN->addIncoming(I->second, I->first); 477 } 478 479 //cerr << "PRE: " << *LI << *PN << "\n"; 480 481 LI->replaceAllUsesWith(PN); 482 LI->eraseFromParent(); 483 484 return true; 485} 486 487 488/// ProcessJumpOnPHI - We have a conditional branch of switch on a PHI node in 489/// the current block. See if there are any simplifications we can do based on 490/// inputs to the phi node. 491/// 492bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) { 493 // See if the phi node has any constant values. If so, we can determine where 494 // the corresponding predecessor will branch. 495 ConstantInt *PredCst = 0; 496 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 497 if ((PredCst = dyn_cast<ConstantInt>(PN->getIncomingValue(i)))) 498 break; 499 500 // If no incoming value has a constant, we don't know the destination of any 501 // predecessors. 502 if (PredCst == 0) 503 return false; 504 505 // See if the cost of duplicating this block is low enough. 506 BasicBlock *BB = PN->getParent(); 507 unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); 508 if (JumpThreadCost > Threshold) { 509 DOUT << " Not threading BB '" << BB->getNameStart() 510 << "' - Cost is too high: " << JumpThreadCost << "\n"; 511 return false; 512 } 513 514 // If so, we can actually do this threading. Merge any common predecessors 515 // that will act the same. 516 BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst); 517 518 // Next, figure out which successor we are threading to. 519 BasicBlock *SuccBB; 520 if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) 521 SuccBB = BI->getSuccessor(PredCst == ConstantInt::getFalse()); 522 else { 523 SwitchInst *SI = cast<SwitchInst>(BB->getTerminator()); 524 SuccBB = SI->getSuccessor(SI->findCaseValue(PredCst)); 525 } 526 527 // If threading to the same block as we come from, we would infinite loop. 528 if (SuccBB == BB) { 529 DOUT << " Not threading BB '" << BB->getNameStart() 530 << "' - would thread to self!\n"; 531 return false; 532 } 533 534 // And finally, do it! 535 DOUT << " Threading edge from '" << PredBB->getNameStart() << "' to '" 536 << SuccBB->getNameStart() << "' with cost: " << JumpThreadCost 537 << ", across block:\n " 538 << *BB << "\n"; 539 540 ThreadEdge(BB, PredBB, SuccBB); 541 ++NumThreads; 542 return true; 543} 544 545/// ProcessJumpOnLogicalPHI - PN's basic block contains a conditional branch 546/// whose condition is an AND/OR where one side is PN. If PN has constant 547/// operands that permit us to evaluate the condition for some operand, thread 548/// through the block. For example with: 549/// br (and X, phi(Y, Z, false)) 550/// the predecessor corresponding to the 'false' will always jump to the false 551/// destination of the branch. 552/// 553bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB, 554 bool isAnd) { 555 // If this is a binary operator tree of the same AND/OR opcode, check the 556 // LHS/RHS. 557 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) 558 if ((isAnd && BO->getOpcode() == Instruction::And) || 559 (!isAnd && BO->getOpcode() == Instruction::Or)) { 560 if (ProcessBranchOnLogical(BO->getOperand(0), BB, isAnd)) 561 return true; 562 if (ProcessBranchOnLogical(BO->getOperand(1), BB, isAnd)) 563 return true; 564 } 565 566 // If this isn't a PHI node, we can't handle it. 567 PHINode *PN = dyn_cast<PHINode>(V); 568 if (!PN || PN->getParent() != BB) return false; 569 570 // We can only do the simplification for phi nodes of 'false' with AND or 571 // 'true' with OR. See if we have any entries in the phi for this. 572 unsigned PredNo = ~0U; 573 ConstantInt *PredCst = ConstantInt::get(Type::Int1Ty, !isAnd); 574 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 575 if (PN->getIncomingValue(i) == PredCst) { 576 PredNo = i; 577 break; 578 } 579 } 580 581 // If no match, bail out. 582 if (PredNo == ~0U) 583 return false; 584 585 // See if the cost of duplicating this block is low enough. 586 unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); 587 if (JumpThreadCost > Threshold) { 588 DOUT << " Not threading BB '" << BB->getNameStart() 589 << "' - Cost is too high: " << JumpThreadCost << "\n"; 590 return false; 591 } 592 593 // If so, we can actually do this threading. Merge any common predecessors 594 // that will act the same. 595 BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst); 596 597 // Next, figure out which successor we are threading to. If this was an AND, 598 // the constant must be FALSE, and we must be targeting the 'false' block. 599 // If this is an OR, the constant must be TRUE, and we must be targeting the 600 // 'true' block. 601 BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(isAnd); 602 603 // If threading to the same block as we come from, we would infinite loop. 604 if (SuccBB == BB) { 605 DOUT << " Not threading BB '" << BB->getNameStart() 606 << "' - would thread to self!\n"; 607 return false; 608 } 609 610 // And finally, do it! 611 DOUT << " Threading edge through bool from '" << PredBB->getNameStart() 612 << "' to '" << SuccBB->getNameStart() << "' with cost: " 613 << JumpThreadCost << ", across block:\n " 614 << *BB << "\n"; 615 616 ThreadEdge(BB, PredBB, SuccBB); 617 ++NumThreads; 618 return true; 619} 620 621/// ProcessBranchOnCompare - We found a branch on a comparison between a phi 622/// node and a constant. If the PHI node contains any constants as inputs, we 623/// can fold the compare for that edge and thread through it. 624bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) { 625 PHINode *PN = cast<PHINode>(Cmp->getOperand(0)); 626 Constant *RHS = cast<Constant>(Cmp->getOperand(1)); 627 628 // If the phi isn't in the current block, an incoming edge to this block 629 // doesn't control the destination. 630 if (PN->getParent() != BB) 631 return false; 632 633 // We can do this simplification if any comparisons fold to true or false. 634 // See if any do. 635 Constant *PredCst = 0; 636 bool TrueDirection = false; 637 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 638 PredCst = dyn_cast<Constant>(PN->getIncomingValue(i)); 639 if (PredCst == 0) continue; 640 641 Constant *Res; 642 if (ICmpInst *ICI = dyn_cast<ICmpInst>(Cmp)) 643 Res = ConstantExpr::getICmp(ICI->getPredicate(), PredCst, RHS); 644 else 645 Res = ConstantExpr::getFCmp(cast<FCmpInst>(Cmp)->getPredicate(), 646 PredCst, RHS); 647 // If this folded to a constant expr, we can't do anything. 648 if (ConstantInt *ResC = dyn_cast<ConstantInt>(Res)) { 649 TrueDirection = ResC->getZExtValue(); 650 break; 651 } 652 // If this folded to undef, just go the false way. 653 if (isa<UndefValue>(Res)) { 654 TrueDirection = false; 655 break; 656 } 657 658 // Otherwise, we can't fold this input. 659 PredCst = 0; 660 } 661 662 // If no match, bail out. 663 if (PredCst == 0) 664 return false; 665 666 // See if the cost of duplicating this block is low enough. 667 unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); 668 if (JumpThreadCost > Threshold) { 669 DOUT << " Not threading BB '" << BB->getNameStart() 670 << "' - Cost is too high: " << JumpThreadCost << "\n"; 671 return false; 672 } 673 674 // If so, we can actually do this threading. Merge any common predecessors 675 // that will act the same. 676 BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst); 677 678 // Next, get our successor. 679 BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(!TrueDirection); 680 681 // If threading to the same block as we come from, we would infinite loop. 682 if (SuccBB == BB) { 683 DOUT << " Not threading BB '" << BB->getNameStart() 684 << "' - would thread to self!\n"; 685 return false; 686 } 687 688 689 // And finally, do it! 690 DOUT << " Threading edge through bool from '" << PredBB->getNameStart() 691 << "' to '" << SuccBB->getNameStart() << "' with cost: " 692 << JumpThreadCost << ", across block:\n " 693 << *BB << "\n"; 694 695 ThreadEdge(BB, PredBB, SuccBB); 696 ++NumThreads; 697 return true; 698} 699 700 701/// ThreadEdge - We have decided that it is safe and profitable to thread an 702/// edge from PredBB to SuccBB across BB. Transform the IR to reflect this 703/// change. 704void JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, 705 BasicBlock *SuccBB) { 706 707 // Jump Threading can not update SSA properties correctly if the values 708 // defined in the duplicated block are used outside of the block itself. For 709 // this reason, we spill all values that are used outside of BB to the stack. 710 for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { 711 if (!I->isUsedOutsideOfBlock(BB)) 712 continue; 713 714 // We found a use of I outside of BB. Create a new stack slot to 715 // break this inter-block usage pattern. 716 DemoteRegToStack(*I); 717 } 718 719 // We are going to have to map operands from the original BB block to the new 720 // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to 721 // account for entry from PredBB. 722 DenseMap<Instruction*, Value*> ValueMapping; 723 724 BasicBlock *NewBB = 725 BasicBlock::Create(BB->getName()+".thread", BB->getParent(), BB); 726 NewBB->moveAfter(PredBB); 727 728 BasicBlock::iterator BI = BB->begin(); 729 for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) 730 ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB); 731 732 // Clone the non-phi instructions of BB into NewBB, keeping track of the 733 // mapping and using it to remap operands in the cloned instructions. 734 for (; !isa<TerminatorInst>(BI); ++BI) { 735 Instruction *New = BI->clone(); 736 New->setName(BI->getNameStart()); 737 NewBB->getInstList().push_back(New); 738 ValueMapping[BI] = New; 739 740 // Remap operands to patch up intra-block references. 741 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) 742 if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) 743 if (Value *Remapped = ValueMapping[Inst]) 744 New->setOperand(i, Remapped); 745 } 746 747 // We didn't copy the terminator from BB over to NewBB, because there is now 748 // an unconditional jump to SuccBB. Insert the unconditional jump. 749 BranchInst::Create(SuccBB, NewBB); 750 751 // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the 752 // PHI nodes for NewBB now. 753 for (BasicBlock::iterator PNI = SuccBB->begin(); isa<PHINode>(PNI); ++PNI) { 754 PHINode *PN = cast<PHINode>(PNI); 755 // Ok, we have a PHI node. Figure out what the incoming value was for the 756 // DestBlock. 757 Value *IV = PN->getIncomingValueForBlock(BB); 758 759 // Remap the value if necessary. 760 if (Instruction *Inst = dyn_cast<Instruction>(IV)) 761 if (Value *MappedIV = ValueMapping[Inst]) 762 IV = MappedIV; 763 PN->addIncoming(IV, NewBB); 764 } 765 766 // Finally, NewBB is good to go. Update the terminator of PredBB to jump to 767 // NewBB instead of BB. This eliminates predecessors from BB, which requires 768 // us to simplify any PHI nodes in BB. 769 TerminatorInst *PredTerm = PredBB->getTerminator(); 770 for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) 771 if (PredTerm->getSuccessor(i) == BB) { 772 BB->removePredecessor(PredBB); 773 PredTerm->setSuccessor(i, NewBB); 774 } 775} 776