CodeGenPrepare.cpp revision 661a390b830fcdea13ed9daeaf4e6dbd1adcdca6
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass munges the code in the input function to better prepare it for 11// SelectionDAG-based code generation. This works around limitations in it's 12// basic-block-at-a-time approach. It should eventually be removed. 13// 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "codegenprepare" 17#include "llvm/Transforms/Scalar.h" 18#include "llvm/Constants.h" 19#include "llvm/DerivedTypes.h" 20#include "llvm/Function.h" 21#include "llvm/InlineAsm.h" 22#include "llvm/Instructions.h" 23#include "llvm/IntrinsicInst.h" 24#include "llvm/Pass.h" 25#include "llvm/Analysis/Dominators.h" 26#include "llvm/Analysis/InstructionSimplify.h" 27#include "llvm/Analysis/ProfileInfo.h" 28#include "llvm/Target/TargetData.h" 29#include "llvm/Target/TargetLowering.h" 30#include "llvm/Transforms/Utils/AddrModeMatcher.h" 31#include "llvm/Transforms/Utils/BasicBlockUtils.h" 32#include "llvm/Transforms/Utils/Local.h" 33#include "llvm/Transforms/Utils/BuildLibCalls.h" 34#include "llvm/ADT/DenseMap.h" 35#include "llvm/ADT/SmallSet.h" 36#include "llvm/ADT/Statistic.h" 37#include "llvm/Assembly/Writer.h" 38#include "llvm/Support/CallSite.h" 39#include "llvm/Support/CommandLine.h" 40#include "llvm/Support/Debug.h" 41#include "llvm/Support/GetElementPtrTypeIterator.h" 42#include "llvm/Support/PatternMatch.h" 43#include "llvm/Support/raw_ostream.h" 44#include "llvm/Support/IRBuilder.h" 45#include "llvm/Support/ValueHandle.h" 46using namespace llvm; 47using namespace llvm::PatternMatch; 48 49STATISTIC(NumBlocksElim, "Number of blocks eliminated"); 50STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); 51STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); 52STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of " 53 "sunken Cmps"); 54STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " 55 "of sunken Casts"); 56STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " 57 "computations were sunk"); 58STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); 59STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); 60STATISTIC(NumRetsDup, "Number of return instructions duplicated"); 61 62static cl::opt<bool> DisableBranchOpts( 63 "disable-cgp-branch-opts", cl::Hidden, cl::init(false), 64 cl::desc("Disable branch optimizations in CodeGenPrepare")); 65 66namespace { 67 class CodeGenPrepare : public FunctionPass { 68 /// TLI - Keep a pointer of a TargetLowering to consult for determining 69 /// transformation profitability. 70 const TargetLowering *TLI; 71 DominatorTree *DT; 72 ProfileInfo *PFI; 73 74 /// CurInstIterator - As we scan instructions optimizing them, this is the 75 /// next instruction to optimize. Xforms that can invalidate this should 76 /// update it. 77 BasicBlock::iterator CurInstIterator; 78 79 /// Keeps track of non-local addresses that have been sunk into a block. 80 /// This allows us to avoid inserting duplicate code for blocks with 81 /// multiple load/stores of the same address. 82 DenseMap<Value*, Value*> SunkAddrs; 83 84 /// UpdateDT - If CFG is modified in anyway, dominator tree may need to 85 /// be updated. 86 bool UpdateDT; 87 88 public: 89 static char ID; // Pass identification, replacement for typeid 90 explicit CodeGenPrepare(const TargetLowering *tli = 0) 91 : FunctionPass(ID), TLI(tli) { 92 initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); 93 } 94 bool runOnFunction(Function &F); 95 96 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 97 AU.addPreserved<DominatorTree>(); 98 AU.addPreserved<ProfileInfo>(); 99 } 100 101 private: 102 bool EliminateMostlyEmptyBlocks(Function &F); 103 bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; 104 void EliminateMostlyEmptyBlock(BasicBlock *BB); 105 bool OptimizeBlock(BasicBlock &BB); 106 bool OptimizeInst(Instruction *I); 107 bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy); 108 bool OptimizeInlineAsmInst(CallInst *CS); 109 bool OptimizeCallInst(CallInst *CI); 110 bool MoveExtToFormExtLoad(Instruction *I); 111 bool OptimizeExtUses(Instruction *I); 112 bool DupRetToEnableTailCallOpts(ReturnInst *RI); 113 }; 114} 115 116char CodeGenPrepare::ID = 0; 117INITIALIZE_PASS(CodeGenPrepare, "codegenprepare", 118 "Optimize for code generation", false, false) 119 120FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) { 121 return new CodeGenPrepare(TLI); 122} 123 124bool CodeGenPrepare::runOnFunction(Function &F) { 125 bool EverMadeChange = false; 126 127 UpdateDT = false; 128 DT = getAnalysisIfAvailable<DominatorTree>(); 129 PFI = getAnalysisIfAvailable<ProfileInfo>(); 130 131 // First pass, eliminate blocks that contain only PHI nodes and an 132 // unconditional branch. 133 EverMadeChange |= EliminateMostlyEmptyBlocks(F); 134 135 bool MadeChange = true; 136 while (MadeChange) { 137 MadeChange = false; 138 for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { 139 BasicBlock *BB = I++; 140 MadeChange |= OptimizeBlock(*BB); 141 } 142 EverMadeChange |= MadeChange; 143 } 144 145 SunkAddrs.clear(); 146 147 if (!DisableBranchOpts) { 148 MadeChange = false; 149 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) 150 MadeChange |= ConstantFoldTerminator(BB); 151 152 if (MadeChange) 153 UpdateDT = true; 154 EverMadeChange |= MadeChange; 155 } 156 157 if (UpdateDT && DT) 158 DT->DT->recalculate(F); 159 160 return EverMadeChange; 161} 162 163/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes, 164/// debug info directives, and an unconditional branch. Passes before isel 165/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for 166/// isel. Start by eliminating these blocks so we can split them the way we 167/// want them. 168bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) { 169 bool MadeChange = false; 170 // Note that this intentionally skips the entry block. 171 for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) { 172 BasicBlock *BB = I++; 173 174 // If this block doesn't end with an uncond branch, ignore it. 175 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); 176 if (!BI || !BI->isUnconditional()) 177 continue; 178 179 // If the instruction before the branch (skipping debug info) isn't a phi 180 // node, then other stuff is happening here. 181 BasicBlock::iterator BBI = BI; 182 if (BBI != BB->begin()) { 183 --BBI; 184 while (isa<DbgInfoIntrinsic>(BBI)) { 185 if (BBI == BB->begin()) 186 break; 187 --BBI; 188 } 189 if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI)) 190 continue; 191 } 192 193 // Do not break infinite loops. 194 BasicBlock *DestBB = BI->getSuccessor(0); 195 if (DestBB == BB) 196 continue; 197 198 if (!CanMergeBlocks(BB, DestBB)) 199 continue; 200 201 EliminateMostlyEmptyBlock(BB); 202 MadeChange = true; 203 } 204 return MadeChange; 205} 206 207/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a 208/// single uncond branch between them, and BB contains no other non-phi 209/// instructions. 210bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, 211 const BasicBlock *DestBB) const { 212 // We only want to eliminate blocks whose phi nodes are used by phi nodes in 213 // the successor. If there are more complex condition (e.g. preheaders), 214 // don't mess around with them. 215 BasicBlock::const_iterator BBI = BB->begin(); 216 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 217 for (Value::const_use_iterator UI = PN->use_begin(), E = PN->use_end(); 218 UI != E; ++UI) { 219 const Instruction *User = cast<Instruction>(*UI); 220 if (User->getParent() != DestBB || !isa<PHINode>(User)) 221 return false; 222 // If User is inside DestBB block and it is a PHINode then check 223 // incoming value. If incoming value is not from BB then this is 224 // a complex condition (e.g. preheaders) we want to avoid here. 225 if (User->getParent() == DestBB) { 226 if (const PHINode *UPN = dyn_cast<PHINode>(User)) 227 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { 228 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I)); 229 if (Insn && Insn->getParent() == BB && 230 Insn->getParent() != UPN->getIncomingBlock(I)) 231 return false; 232 } 233 } 234 } 235 } 236 237 // If BB and DestBB contain any common predecessors, then the phi nodes in BB 238 // and DestBB may have conflicting incoming values for the block. If so, we 239 // can't merge the block. 240 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin()); 241 if (!DestBBPN) return true; // no conflict. 242 243 // Collect the preds of BB. 244 SmallPtrSet<const BasicBlock*, 16> BBPreds; 245 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 246 // It is faster to get preds from a PHI than with pred_iterator. 247 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 248 BBPreds.insert(BBPN->getIncomingBlock(i)); 249 } else { 250 BBPreds.insert(pred_begin(BB), pred_end(BB)); 251 } 252 253 // Walk the preds of DestBB. 254 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { 255 BasicBlock *Pred = DestBBPN->getIncomingBlock(i); 256 if (BBPreds.count(Pred)) { // Common predecessor? 257 BBI = DestBB->begin(); 258 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 259 const Value *V1 = PN->getIncomingValueForBlock(Pred); 260 const Value *V2 = PN->getIncomingValueForBlock(BB); 261 262 // If V2 is a phi node in BB, look up what the mapped value will be. 263 if (const PHINode *V2PN = dyn_cast<PHINode>(V2)) 264 if (V2PN->getParent() == BB) 265 V2 = V2PN->getIncomingValueForBlock(Pred); 266 267 // If there is a conflict, bail out. 268 if (V1 != V2) return false; 269 } 270 } 271 } 272 273 return true; 274} 275 276 277/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and 278/// an unconditional branch in it. 279void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { 280 BranchInst *BI = cast<BranchInst>(BB->getTerminator()); 281 BasicBlock *DestBB = BI->getSuccessor(0); 282 283 DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); 284 285 // If the destination block has a single pred, then this is a trivial edge, 286 // just collapse it. 287 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { 288 if (SinglePred != DestBB) { 289 // Remember if SinglePred was the entry block of the function. If so, we 290 // will need to move BB back to the entry position. 291 bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); 292 MergeBasicBlockIntoOnlyPred(DestBB, this); 293 294 if (isEntry && BB != &BB->getParent()->getEntryBlock()) 295 BB->moveBefore(&BB->getParent()->getEntryBlock()); 296 297 DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); 298 return; 299 } 300 } 301 302 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB 303 // to handle the new incoming edges it is about to have. 304 PHINode *PN; 305 for (BasicBlock::iterator BBI = DestBB->begin(); 306 (PN = dyn_cast<PHINode>(BBI)); ++BBI) { 307 // Remove the incoming value for BB, and remember it. 308 Value *InVal = PN->removeIncomingValue(BB, false); 309 310 // Two options: either the InVal is a phi node defined in BB or it is some 311 // value that dominates BB. 312 PHINode *InValPhi = dyn_cast<PHINode>(InVal); 313 if (InValPhi && InValPhi->getParent() == BB) { 314 // Add all of the input values of the input PHI as inputs of this phi. 315 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) 316 PN->addIncoming(InValPhi->getIncomingValue(i), 317 InValPhi->getIncomingBlock(i)); 318 } else { 319 // Otherwise, add one instance of the dominating value for each edge that 320 // we will be adding. 321 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 322 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 323 PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); 324 } else { 325 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) 326 PN->addIncoming(InVal, *PI); 327 } 328 } 329 } 330 331 // The PHIs are now updated, change everything that refers to BB to use 332 // DestBB and remove BB. 333 BB->replaceAllUsesWith(DestBB); 334 if (DT) { 335 BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock(); 336 BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock(); 337 BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom); 338 DT->changeImmediateDominator(DestBB, NewIDom); 339 DT->eraseNode(BB); 340 } 341 if (PFI) { 342 PFI->replaceAllUses(BB, DestBB); 343 PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB)); 344 } 345 BB->eraseFromParent(); 346 ++NumBlocksElim; 347 348 DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); 349} 350 351/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop 352/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), 353/// sink it into user blocks to reduce the number of virtual 354/// registers that must be created and coalesced. 355/// 356/// Return true if any changes are made. 357/// 358static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ 359 // If this is a noop copy, 360 EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); 361 EVT DstVT = TLI.getValueType(CI->getType()); 362 363 // This is an fp<->int conversion? 364 if (SrcVT.isInteger() != DstVT.isInteger()) 365 return false; 366 367 // If this is an extension, it will be a zero or sign extension, which 368 // isn't a noop. 369 if (SrcVT.bitsLT(DstVT)) return false; 370 371 // If these values will be promoted, find out what they will be promoted 372 // to. This helps us consider truncates on PPC as noop copies when they 373 // are. 374 if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote) 375 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); 376 if (TLI.getTypeAction(DstVT) == TargetLowering::Promote) 377 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); 378 379 // If, after promotion, these are the same types, this is a noop copy. 380 if (SrcVT != DstVT) 381 return false; 382 383 BasicBlock *DefBB = CI->getParent(); 384 385 /// InsertedCasts - Only insert a cast in each block once. 386 DenseMap<BasicBlock*, CastInst*> InsertedCasts; 387 388 bool MadeChange = false; 389 for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); 390 UI != E; ) { 391 Use &TheUse = UI.getUse(); 392 Instruction *User = cast<Instruction>(*UI); 393 394 // Figure out which BB this cast is used in. For PHI's this is the 395 // appropriate predecessor block. 396 BasicBlock *UserBB = User->getParent(); 397 if (PHINode *PN = dyn_cast<PHINode>(User)) { 398 UserBB = PN->getIncomingBlock(UI); 399 } 400 401 // Preincrement use iterator so we don't invalidate it. 402 ++UI; 403 404 // If this user is in the same block as the cast, don't change the cast. 405 if (UserBB == DefBB) continue; 406 407 // If we have already inserted a cast into this block, use it. 408 CastInst *&InsertedCast = InsertedCasts[UserBB]; 409 410 if (!InsertedCast) { 411 BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); 412 413 InsertedCast = 414 CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", 415 InsertPt); 416 MadeChange = true; 417 } 418 419 // Replace a use of the cast with a use of the new cast. 420 TheUse = InsertedCast; 421 ++NumCastUses; 422 } 423 424 // If we removed all uses, nuke the cast. 425 if (CI->use_empty()) { 426 CI->eraseFromParent(); 427 MadeChange = true; 428 } 429 430 return MadeChange; 431} 432 433/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce 434/// the number of virtual registers that must be created and coalesced. This is 435/// a clear win except on targets with multiple condition code registers 436/// (PowerPC), where it might lose; some adjustment may be wanted there. 437/// 438/// Return true if any changes are made. 439static bool OptimizeCmpExpression(CmpInst *CI) { 440 BasicBlock *DefBB = CI->getParent(); 441 442 /// InsertedCmp - Only insert a cmp in each block once. 443 DenseMap<BasicBlock*, CmpInst*> InsertedCmps; 444 445 bool MadeChange = false; 446 for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); 447 UI != E; ) { 448 Use &TheUse = UI.getUse(); 449 Instruction *User = cast<Instruction>(*UI); 450 451 // Preincrement use iterator so we don't invalidate it. 452 ++UI; 453 454 // Don't bother for PHI nodes. 455 if (isa<PHINode>(User)) 456 continue; 457 458 // Figure out which BB this cmp is used in. 459 BasicBlock *UserBB = User->getParent(); 460 461 // If this user is in the same block as the cmp, don't change the cmp. 462 if (UserBB == DefBB) continue; 463 464 // If we have already inserted a cmp into this block, use it. 465 CmpInst *&InsertedCmp = InsertedCmps[UserBB]; 466 467 if (!InsertedCmp) { 468 BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); 469 470 InsertedCmp = 471 CmpInst::Create(CI->getOpcode(), 472 CI->getPredicate(), CI->getOperand(0), 473 CI->getOperand(1), "", InsertPt); 474 MadeChange = true; 475 } 476 477 // Replace a use of the cmp with a use of the new cmp. 478 TheUse = InsertedCmp; 479 ++NumCmpUses; 480 } 481 482 // If we removed all uses, nuke the cmp. 483 if (CI->use_empty()) 484 CI->eraseFromParent(); 485 486 return MadeChange; 487} 488 489namespace { 490class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls { 491protected: 492 void replaceCall(Value *With) { 493 CI->replaceAllUsesWith(With); 494 CI->eraseFromParent(); 495 } 496 bool isFoldable(unsigned SizeCIOp, unsigned, bool) const { 497 if (ConstantInt *SizeCI = 498 dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) 499 return SizeCI->isAllOnesValue(); 500 return false; 501 } 502}; 503} // end anonymous namespace 504 505bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { 506 BasicBlock *BB = CI->getParent(); 507 508 // Lower inline assembly if we can. 509 // If we found an inline asm expession, and if the target knows how to 510 // lower it to normal LLVM code, do so now. 511 if (TLI && isa<InlineAsm>(CI->getCalledValue())) { 512 if (TLI->ExpandInlineAsm(CI)) { 513 // Avoid invalidating the iterator. 514 CurInstIterator = BB->begin(); 515 // Avoid processing instructions out of order, which could cause 516 // reuse before a value is defined. 517 SunkAddrs.clear(); 518 return true; 519 } 520 // Sink address computing for memory operands into the block. 521 if (OptimizeInlineAsmInst(CI)) 522 return true; 523 } 524 525 // Lower all uses of llvm.objectsize.* 526 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); 527 if (II && II->getIntrinsicID() == Intrinsic::objectsize) { 528 bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); 529 const Type *ReturnTy = CI->getType(); 530 Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); 531 532 // Substituting this can cause recursive simplifications, which can 533 // invalidate our iterator. Use a WeakVH to hold onto it in case this 534 // happens. 535 WeakVH IterHandle(CurInstIterator); 536 537 ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0, DT); 538 539 // If the iterator instruction was recursively deleted, start over at the 540 // start of the block. 541 if (IterHandle != CurInstIterator) { 542 CurInstIterator = BB->begin(); 543 SunkAddrs.clear(); 544 } 545 return true; 546 } 547 548 // From here on out we're working with named functions. 549 if (CI->getCalledFunction() == 0) return false; 550 551 // We'll need TargetData from here on out. 552 const TargetData *TD = TLI ? TLI->getTargetData() : 0; 553 if (!TD) return false; 554 555 // Lower all default uses of _chk calls. This is very similar 556 // to what InstCombineCalls does, but here we are only lowering calls 557 // that have the default "don't know" as the objectsize. Anything else 558 // should be left alone. 559 CodeGenPrepareFortifiedLibCalls Simplifier; 560 return Simplifier.fold(CI, TD); 561} 562 563/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return 564/// instructions to the predecessor to enable tail call optimizations. The 565/// case it is currently looking for is: 566/// bb0: 567/// %tmp0 = tail call i32 @f0() 568/// br label %return 569/// bb1: 570/// %tmp1 = tail call i32 @f1() 571/// br label %return 572/// bb2: 573/// %tmp2 = tail call i32 @f2() 574/// br label %return 575/// return: 576/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] 577/// ret i32 %retval 578/// 579/// => 580/// 581/// bb0: 582/// %tmp0 = tail call i32 @f0() 583/// ret i32 %tmp0 584/// bb1: 585/// %tmp1 = tail call i32 @f1() 586/// ret i32 %tmp1 587/// bb2: 588/// %tmp2 = tail call i32 @f2() 589/// ret i32 %tmp2 590/// 591bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { 592 if (!TLI) 593 return false; 594 595 Value *V = RI->getReturnValue(); 596 if (!V) 597 return false; 598 599 if (PHINode *PN = dyn_cast<PHINode>(V)) { 600 BasicBlock *BB = RI->getParent(); 601 if (PN->getParent() != BB) 602 return false; 603 604 // It's not safe to eliminate the sign / zero extension of the return value. 605 // See llvm::isInTailCallPosition(). 606 const Function *F = BB->getParent(); 607 unsigned CallerRetAttr = F->getAttributes().getRetAttributes(); 608 if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt)) 609 return false; 610 611 // Make sure there are no instructions between PHI and return. 612 BasicBlock::iterator BI = PN; 613 do { ++BI; } while (isa<DbgInfoIntrinsic>(BI)); 614 if (&*BI != RI) 615 return false; 616 617 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a 618 /// tail call. 619 SmallVector<CallInst*, 4> TailCalls; 620 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { 621 CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I)); 622 // Make sure the phi value is indeed produced by the tail call. 623 if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && 624 TLI->mayBeEmittedAsTailCall(CI)) 625 TailCalls.push_back(CI); 626 } 627 628 bool Changed = false; 629 for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) { 630 CallInst *CI = TailCalls[i]; 631 CallSite CS(CI); 632 633 // Conservatively require the attributes of the call to match those of 634 // the return. Ignore noalias because it doesn't affect the call sequence. 635 unsigned CalleeRetAttr = CS.getAttributes().getRetAttributes(); 636 if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias) 637 continue; 638 639 // Make sure the call instruction is followed by an unconditional branch 640 // to the return block. 641 BasicBlock *CallBB = CI->getParent(); 642 BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator()); 643 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) 644 continue; 645 646 // Duplicate the return into CallBB. 647 (void)FoldReturnIntoUncondBranch(RI, BB, CallBB); 648 UpdateDT = Changed = true; 649 ++NumRetsDup; 650 } 651 652 // If we eliminated all predecessors of the block, delete the block now. 653 if (Changed && pred_begin(BB) == pred_end(BB)) 654 BB->eraseFromParent(); 655 656 return Changed; 657 } 658 659 return false; 660} 661 662//===----------------------------------------------------------------------===// 663// Memory Optimization 664//===----------------------------------------------------------------------===// 665 666/// IsNonLocalValue - Return true if the specified values are defined in a 667/// different basic block than BB. 668static bool IsNonLocalValue(Value *V, BasicBlock *BB) { 669 if (Instruction *I = dyn_cast<Instruction>(V)) 670 return I->getParent() != BB; 671 return false; 672} 673 674/// OptimizeMemoryInst - Load and Store Instructions often have 675/// addressing modes that can do significant amounts of computation. As such, 676/// instruction selection will try to get the load or store to do as much 677/// computation as possible for the program. The problem is that isel can only 678/// see within a single block. As such, we sink as much legal addressing mode 679/// stuff into the block as possible. 680/// 681/// This method is used to optimize both load/store and inline asms with memory 682/// operands. 683bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, 684 const Type *AccessTy) { 685 Value *Repl = Addr; 686 687 // Try to collapse single-value PHI nodes. This is necessary to undo 688 // unprofitable PRE transformations. 689 SmallVector<Value*, 8> worklist; 690 SmallPtrSet<Value*, 16> Visited; 691 worklist.push_back(Addr); 692 693 // Use a worklist to iteratively look through PHI nodes, and ensure that 694 // the addressing mode obtained from the non-PHI roots of the graph 695 // are equivalent. 696 Value *Consensus = 0; 697 unsigned NumUsesConsensus = 0; 698 bool IsNumUsesConsensusValid = false; 699 SmallVector<Instruction*, 16> AddrModeInsts; 700 ExtAddrMode AddrMode; 701 while (!worklist.empty()) { 702 Value *V = worklist.back(); 703 worklist.pop_back(); 704 705 // Break use-def graph loops. 706 if (Visited.count(V)) { 707 Consensus = 0; 708 break; 709 } 710 711 Visited.insert(V); 712 713 // For a PHI node, push all of its incoming values. 714 if (PHINode *P = dyn_cast<PHINode>(V)) { 715 for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) 716 worklist.push_back(P->getIncomingValue(i)); 717 continue; 718 } 719 720 // For non-PHIs, determine the addressing mode being computed. 721 SmallVector<Instruction*, 16> NewAddrModeInsts; 722 ExtAddrMode NewAddrMode = 723 AddressingModeMatcher::Match(V, AccessTy,MemoryInst, 724 NewAddrModeInsts, *TLI); 725 726 // This check is broken into two cases with very similar code to avoid using 727 // getNumUses() as much as possible. Some values have a lot of uses, so 728 // calling getNumUses() unconditionally caused a significant compile-time 729 // regression. 730 if (!Consensus) { 731 Consensus = V; 732 AddrMode = NewAddrMode; 733 AddrModeInsts = NewAddrModeInsts; 734 continue; 735 } else if (NewAddrMode == AddrMode) { 736 if (!IsNumUsesConsensusValid) { 737 NumUsesConsensus = Consensus->getNumUses(); 738 IsNumUsesConsensusValid = true; 739 } 740 741 // Ensure that the obtained addressing mode is equivalent to that obtained 742 // for all other roots of the PHI traversal. Also, when choosing one 743 // such root as representative, select the one with the most uses in order 744 // to keep the cost modeling heuristics in AddressingModeMatcher 745 // applicable. 746 unsigned NumUses = V->getNumUses(); 747 if (NumUses > NumUsesConsensus) { 748 Consensus = V; 749 NumUsesConsensus = NumUses; 750 AddrModeInsts = NewAddrModeInsts; 751 } 752 continue; 753 } 754 755 Consensus = 0; 756 break; 757 } 758 759 // If the addressing mode couldn't be determined, or if multiple different 760 // ones were determined, bail out now. 761 if (!Consensus) return false; 762 763 // Check to see if any of the instructions supersumed by this addr mode are 764 // non-local to I's BB. 765 bool AnyNonLocal = false; 766 for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) { 767 if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) { 768 AnyNonLocal = true; 769 break; 770 } 771 } 772 773 // If all the instructions matched are already in this BB, don't do anything. 774 if (!AnyNonLocal) { 775 DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); 776 return false; 777 } 778 779 // Insert this computation right after this user. Since our caller is 780 // scanning from the top of the BB to the bottom, reuse of the expr are 781 // guaranteed to happen later. 782 BasicBlock::iterator InsertPt = MemoryInst; 783 784 // Now that we determined the addressing expression we want to use and know 785 // that we have to sink it into this block. Check to see if we have already 786 // done this for some other load/store instr in this block. If so, reuse the 787 // computation. 788 Value *&SunkAddr = SunkAddrs[Addr]; 789 if (SunkAddr) { 790 DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " 791 << *MemoryInst); 792 if (SunkAddr->getType() != Addr->getType()) 793 SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt); 794 } else { 795 DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " 796 << *MemoryInst); 797 const Type *IntPtrTy = 798 TLI->getTargetData()->getIntPtrType(AccessTy->getContext()); 799 800 Value *Result = 0; 801 802 // Start with the base register. Do this first so that subsequent address 803 // matching finds it last, which will prevent it from trying to match it 804 // as the scaled value in case it happens to be a mul. That would be 805 // problematic if we've sunk a different mul for the scale, because then 806 // we'd end up sinking both muls. 807 if (AddrMode.BaseReg) { 808 Value *V = AddrMode.BaseReg; 809 if (V->getType()->isPointerTy()) 810 V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt); 811 if (V->getType() != IntPtrTy) 812 V = CastInst::CreateIntegerCast(V, IntPtrTy, /*isSigned=*/true, 813 "sunkaddr", InsertPt); 814 Result = V; 815 } 816 817 // Add the scale value. 818 if (AddrMode.Scale) { 819 Value *V = AddrMode.ScaledReg; 820 if (V->getType() == IntPtrTy) { 821 // done. 822 } else if (V->getType()->isPointerTy()) { 823 V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt); 824 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < 825 cast<IntegerType>(V->getType())->getBitWidth()) { 826 V = new TruncInst(V, IntPtrTy, "sunkaddr", InsertPt); 827 } else { 828 V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt); 829 } 830 if (AddrMode.Scale != 1) 831 V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy, 832 AddrMode.Scale), 833 "sunkaddr", InsertPt); 834 if (Result) 835 Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); 836 else 837 Result = V; 838 } 839 840 // Add in the BaseGV if present. 841 if (AddrMode.BaseGV) { 842 Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, "sunkaddr", 843 InsertPt); 844 if (Result) 845 Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); 846 else 847 Result = V; 848 } 849 850 // Add in the Base Offset if present. 851 if (AddrMode.BaseOffs) { 852 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); 853 if (Result) 854 Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); 855 else 856 Result = V; 857 } 858 859 if (Result == 0) 860 SunkAddr = Constant::getNullValue(Addr->getType()); 861 else 862 SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt); 863 } 864 865 MemoryInst->replaceUsesOfWith(Repl, SunkAddr); 866 867 if (Repl->use_empty()) { 868 RecursivelyDeleteTriviallyDeadInstructions(Repl); 869 // This address is now available for reassignment, so erase the table entry; 870 // we don't want to match some completely different instruction. 871 SunkAddrs[Addr] = 0; 872 } 873 ++NumMemoryInsts; 874 return true; 875} 876 877/// OptimizeInlineAsmInst - If there are any memory operands, use 878/// OptimizeMemoryInst to sink their address computing into the block when 879/// possible / profitable. 880bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) { 881 bool MadeChange = false; 882 883 TargetLowering::AsmOperandInfoVector 884 TargetConstraints = TLI->ParseConstraints(CS); 885 unsigned ArgNo = 0; 886 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { 887 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; 888 889 // Compute the constraint code and ConstraintType to use. 890 TLI->ComputeConstraintToUse(OpInfo, SDValue()); 891 892 if (OpInfo.ConstraintType == TargetLowering::C_Memory && 893 OpInfo.isIndirect) { 894 Value *OpVal = CS->getArgOperand(ArgNo++); 895 MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType()); 896 } else if (OpInfo.Type == InlineAsm::isInput) 897 ArgNo++; 898 } 899 900 return MadeChange; 901} 902 903/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same 904/// basic block as the load, unless conditions are unfavorable. This allows 905/// SelectionDAG to fold the extend into the load. 906/// 907bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) { 908 // Look for a load being extended. 909 LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0)); 910 if (!LI) return false; 911 912 // If they're already in the same block, there's nothing to do. 913 if (LI->getParent() == I->getParent()) 914 return false; 915 916 // If the load has other users and the truncate is not free, this probably 917 // isn't worthwhile. 918 if (!LI->hasOneUse() && 919 TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) || 920 !TLI->isTypeLegal(TLI->getValueType(I->getType()))) && 921 !TLI->isTruncateFree(I->getType(), LI->getType())) 922 return false; 923 924 // Check whether the target supports casts folded into loads. 925 unsigned LType; 926 if (isa<ZExtInst>(I)) 927 LType = ISD::ZEXTLOAD; 928 else { 929 assert(isa<SExtInst>(I) && "Unexpected ext type!"); 930 LType = ISD::SEXTLOAD; 931 } 932 if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType()))) 933 return false; 934 935 // Move the extend into the same block as the load, so that SelectionDAG 936 // can fold it. 937 I->removeFromParent(); 938 I->insertAfter(LI); 939 ++NumExtsMoved; 940 return true; 941} 942 943bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { 944 BasicBlock *DefBB = I->getParent(); 945 946 // If the result of a {s|z}ext and its source are both live out, rewrite all 947 // other uses of the source with result of extension. 948 Value *Src = I->getOperand(0); 949 if (Src->hasOneUse()) 950 return false; 951 952 // Only do this xform if truncating is free. 953 if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType())) 954 return false; 955 956 // Only safe to perform the optimization if the source is also defined in 957 // this block. 958 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent()) 959 return false; 960 961 bool DefIsLiveOut = false; 962 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); 963 UI != E; ++UI) { 964 Instruction *User = cast<Instruction>(*UI); 965 966 // Figure out which BB this ext is used in. 967 BasicBlock *UserBB = User->getParent(); 968 if (UserBB == DefBB) continue; 969 DefIsLiveOut = true; 970 break; 971 } 972 if (!DefIsLiveOut) 973 return false; 974 975 // Make sure non of the uses are PHI nodes. 976 for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); 977 UI != E; ++UI) { 978 Instruction *User = cast<Instruction>(*UI); 979 BasicBlock *UserBB = User->getParent(); 980 if (UserBB == DefBB) continue; 981 // Be conservative. We don't want this xform to end up introducing 982 // reloads just before load / store instructions. 983 if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User)) 984 return false; 985 } 986 987 // InsertedTruncs - Only insert one trunc in each block once. 988 DenseMap<BasicBlock*, Instruction*> InsertedTruncs; 989 990 bool MadeChange = false; 991 for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); 992 UI != E; ++UI) { 993 Use &TheUse = UI.getUse(); 994 Instruction *User = cast<Instruction>(*UI); 995 996 // Figure out which BB this ext is used in. 997 BasicBlock *UserBB = User->getParent(); 998 if (UserBB == DefBB) continue; 999 1000 // Both src and def are live in this block. Rewrite the use. 1001 Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; 1002 1003 if (!InsertedTrunc) { 1004 BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); 1005 1006 InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt); 1007 } 1008 1009 // Replace a use of the {s|z}ext source with a use of the result. 1010 TheUse = InsertedTrunc; 1011 ++NumExtUses; 1012 MadeChange = true; 1013 } 1014 1015 return MadeChange; 1016} 1017 1018bool CodeGenPrepare::OptimizeInst(Instruction *I) { 1019 if (PHINode *P = dyn_cast<PHINode>(I)) { 1020 // It is possible for very late stage optimizations (such as SimplifyCFG) 1021 // to introduce PHI nodes too late to be cleaned up. If we detect such a 1022 // trivial PHI, go ahead and zap it here. 1023 if (Value *V = SimplifyInstruction(P)) { 1024 P->replaceAllUsesWith(V); 1025 P->eraseFromParent(); 1026 ++NumPHIsElim; 1027 return true; 1028 } 1029 return false; 1030 } 1031 1032 if (CastInst *CI = dyn_cast<CastInst>(I)) { 1033 // If the source of the cast is a constant, then this should have 1034 // already been constant folded. The only reason NOT to constant fold 1035 // it is if something (e.g. LSR) was careful to place the constant 1036 // evaluation in a block other than then one that uses it (e.g. to hoist 1037 // the address of globals out of a loop). If this is the case, we don't 1038 // want to forward-subst the cast. 1039 if (isa<Constant>(CI->getOperand(0))) 1040 return false; 1041 1042 if (TLI && OptimizeNoopCopyExpression(CI, *TLI)) 1043 return true; 1044 1045 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) { 1046 bool MadeChange = MoveExtToFormExtLoad(I); 1047 return MadeChange | OptimizeExtUses(I); 1048 } 1049 return false; 1050 } 1051 1052 if (CmpInst *CI = dyn_cast<CmpInst>(I)) 1053 return OptimizeCmpExpression(CI); 1054 1055 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 1056 if (TLI) 1057 return OptimizeMemoryInst(I, I->getOperand(0), LI->getType()); 1058 return false; 1059 } 1060 1061 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 1062 if (TLI) 1063 return OptimizeMemoryInst(I, SI->getOperand(1), 1064 SI->getOperand(0)->getType()); 1065 return false; 1066 } 1067 1068 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { 1069 if (GEPI->hasAllZeroIndices()) { 1070 /// The GEP operand must be a pointer, so must its result -> BitCast 1071 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), 1072 GEPI->getName(), GEPI); 1073 GEPI->replaceAllUsesWith(NC); 1074 GEPI->eraseFromParent(); 1075 ++NumGEPsElim; 1076 OptimizeInst(NC); 1077 return true; 1078 } 1079 return false; 1080 } 1081 1082 if (CallInst *CI = dyn_cast<CallInst>(I)) 1083 return OptimizeCallInst(CI); 1084 1085 if (ReturnInst *RI = dyn_cast<ReturnInst>(I)) 1086 return DupRetToEnableTailCallOpts(RI); 1087 1088 return false; 1089} 1090 1091// In this pass we look for GEP and cast instructions that are used 1092// across basic blocks and rewrite them to improve basic-block-at-a-time 1093// selection. 1094bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { 1095 SunkAddrs.clear(); 1096 bool MadeChange = false; 1097 1098 CurInstIterator = BB.begin(); 1099 for (BasicBlock::iterator E = BB.end(); CurInstIterator != E; ) 1100 MadeChange |= OptimizeInst(CurInstIterator++); 1101 1102 return MadeChange; 1103} 1104