CodeGenPrepare.cpp revision 677c6ecd0804c247eb727a830b50cd6537a6c12c
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass munges the code in the input function to better prepare it for 11// SelectionDAG-based code generation. This works around limitations in it's 12// basic-block-at-a-time approach. It should eventually be removed. 13// 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "codegenprepare" 17#include "llvm/Transforms/Scalar.h" 18#include "llvm/Constants.h" 19#include "llvm/DerivedTypes.h" 20#include "llvm/Function.h" 21#include "llvm/InlineAsm.h" 22#include "llvm/Instructions.h" 23#include "llvm/IntrinsicInst.h" 24#include "llvm/Pass.h" 25#include "llvm/Analysis/ProfileInfo.h" 26#include "llvm/Target/TargetData.h" 27#include "llvm/Target/TargetLowering.h" 28#include "llvm/Transforms/Utils/AddrModeMatcher.h" 29#include "llvm/Transforms/Utils/BasicBlockUtils.h" 30#include "llvm/Transforms/Utils/Local.h" 31#include "llvm/Transforms/Utils/BuildLibCalls.h" 32#include "llvm/ADT/DenseMap.h" 33#include "llvm/ADT/SmallSet.h" 34#include "llvm/Assembly/Writer.h" 35#include "llvm/Support/CallSite.h" 36#include "llvm/Support/CommandLine.h" 37#include "llvm/Support/Debug.h" 38#include "llvm/Support/GetElementPtrTypeIterator.h" 39#include "llvm/Support/PatternMatch.h" 40#include "llvm/Support/raw_ostream.h" 41#include "llvm/Support/IRBuilder.h" 42using namespace llvm; 43using namespace llvm::PatternMatch; 44 45static cl::opt<bool> 46CriticalEdgeSplit("cgp-critical-edge-splitting", 47 cl::desc("Split critical edges during codegen prepare"), 48 cl::init(true), cl::Hidden); 49 50namespace { 51 class CodeGenPrepare : public FunctionPass { 52 /// TLI - Keep a pointer of a TargetLowering to consult for determining 53 /// transformation profitability. 54 const TargetLowering *TLI; 55 ProfileInfo *PFI; 56 57 /// BackEdges - Keep a set of all the loop back edges. 58 /// 59 SmallSet<std::pair<const BasicBlock*, const BasicBlock*>, 8> BackEdges; 60 public: 61 static char ID; // Pass identification, replacement for typeid 62 explicit CodeGenPrepare(const TargetLowering *tli = 0) 63 : FunctionPass(ID), TLI(tli) {} 64 bool runOnFunction(Function &F); 65 66 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 67 AU.addPreserved<ProfileInfo>(); 68 } 69 70 virtual void releaseMemory() { 71 BackEdges.clear(); 72 } 73 74 private: 75 bool EliminateMostlyEmptyBlocks(Function &F); 76 bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; 77 void EliminateMostlyEmptyBlock(BasicBlock *BB); 78 bool OptimizeBlock(BasicBlock &BB); 79 bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy, 80 DenseMap<Value*,Value*> &SunkAddrs); 81 bool OptimizeInlineAsmInst(Instruction *I, CallSite CS, 82 DenseMap<Value*,Value*> &SunkAddrs); 83 bool OptimizeCallInst(CallInst *CI); 84 bool MoveExtToFormExtLoad(Instruction *I); 85 bool OptimizeExtUses(Instruction *I); 86 void findLoopBackEdges(const Function &F); 87 }; 88} 89 90char CodeGenPrepare::ID = 0; 91INITIALIZE_PASS(CodeGenPrepare, "codegenprepare", 92 "Optimize for code generation", false, false); 93 94FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) { 95 return new CodeGenPrepare(TLI); 96} 97 98/// findLoopBackEdges - Do a DFS walk to find loop back edges. 99/// 100void CodeGenPrepare::findLoopBackEdges(const Function &F) { 101 SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges; 102 FindFunctionBackedges(F, Edges); 103 104 BackEdges.insert(Edges.begin(), Edges.end()); 105} 106 107 108bool CodeGenPrepare::runOnFunction(Function &F) { 109 bool EverMadeChange = false; 110 111 PFI = getAnalysisIfAvailable<ProfileInfo>(); 112 // First pass, eliminate blocks that contain only PHI nodes and an 113 // unconditional branch. 114 EverMadeChange |= EliminateMostlyEmptyBlocks(F); 115 116 // Now find loop back edges. 117 findLoopBackEdges(F); 118 119 bool MadeChange = true; 120 while (MadeChange) { 121 MadeChange = false; 122 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) 123 MadeChange |= OptimizeBlock(*BB); 124 EverMadeChange |= MadeChange; 125 } 126 return EverMadeChange; 127} 128 129/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes, 130/// debug info directives, and an unconditional branch. Passes before isel 131/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for 132/// isel. Start by eliminating these blocks so we can split them the way we 133/// want them. 134bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) { 135 bool MadeChange = false; 136 // Note that this intentionally skips the entry block. 137 for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) { 138 BasicBlock *BB = I++; 139 140 // If this block doesn't end with an uncond branch, ignore it. 141 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); 142 if (!BI || !BI->isUnconditional()) 143 continue; 144 145 // If the instruction before the branch (skipping debug info) isn't a phi 146 // node, then other stuff is happening here. 147 BasicBlock::iterator BBI = BI; 148 if (BBI != BB->begin()) { 149 --BBI; 150 while (isa<DbgInfoIntrinsic>(BBI)) { 151 if (BBI == BB->begin()) 152 break; 153 --BBI; 154 } 155 if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI)) 156 continue; 157 } 158 159 // Do not break infinite loops. 160 BasicBlock *DestBB = BI->getSuccessor(0); 161 if (DestBB == BB) 162 continue; 163 164 if (!CanMergeBlocks(BB, DestBB)) 165 continue; 166 167 EliminateMostlyEmptyBlock(BB); 168 MadeChange = true; 169 } 170 return MadeChange; 171} 172 173/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a 174/// single uncond branch between them, and BB contains no other non-phi 175/// instructions. 176bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, 177 const BasicBlock *DestBB) const { 178 // We only want to eliminate blocks whose phi nodes are used by phi nodes in 179 // the successor. If there are more complex condition (e.g. preheaders), 180 // don't mess around with them. 181 BasicBlock::const_iterator BBI = BB->begin(); 182 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 183 for (Value::const_use_iterator UI = PN->use_begin(), E = PN->use_end(); 184 UI != E; ++UI) { 185 const Instruction *User = cast<Instruction>(*UI); 186 if (User->getParent() != DestBB || !isa<PHINode>(User)) 187 return false; 188 // If User is inside DestBB block and it is a PHINode then check 189 // incoming value. If incoming value is not from BB then this is 190 // a complex condition (e.g. preheaders) we want to avoid here. 191 if (User->getParent() == DestBB) { 192 if (const PHINode *UPN = dyn_cast<PHINode>(User)) 193 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { 194 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I)); 195 if (Insn && Insn->getParent() == BB && 196 Insn->getParent() != UPN->getIncomingBlock(I)) 197 return false; 198 } 199 } 200 } 201 } 202 203 // If BB and DestBB contain any common predecessors, then the phi nodes in BB 204 // and DestBB may have conflicting incoming values for the block. If so, we 205 // can't merge the block. 206 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin()); 207 if (!DestBBPN) return true; // no conflict. 208 209 // Collect the preds of BB. 210 SmallPtrSet<const BasicBlock*, 16> BBPreds; 211 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 212 // It is faster to get preds from a PHI than with pred_iterator. 213 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 214 BBPreds.insert(BBPN->getIncomingBlock(i)); 215 } else { 216 BBPreds.insert(pred_begin(BB), pred_end(BB)); 217 } 218 219 // Walk the preds of DestBB. 220 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { 221 BasicBlock *Pred = DestBBPN->getIncomingBlock(i); 222 if (BBPreds.count(Pred)) { // Common predecessor? 223 BBI = DestBB->begin(); 224 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 225 const Value *V1 = PN->getIncomingValueForBlock(Pred); 226 const Value *V2 = PN->getIncomingValueForBlock(BB); 227 228 // If V2 is a phi node in BB, look up what the mapped value will be. 229 if (const PHINode *V2PN = dyn_cast<PHINode>(V2)) 230 if (V2PN->getParent() == BB) 231 V2 = V2PN->getIncomingValueForBlock(Pred); 232 233 // If there is a conflict, bail out. 234 if (V1 != V2) return false; 235 } 236 } 237 } 238 239 return true; 240} 241 242 243/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and 244/// an unconditional branch in it. 245void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { 246 BranchInst *BI = cast<BranchInst>(BB->getTerminator()); 247 BasicBlock *DestBB = BI->getSuccessor(0); 248 249 DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); 250 251 // If the destination block has a single pred, then this is a trivial edge, 252 // just collapse it. 253 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { 254 if (SinglePred != DestBB) { 255 // Remember if SinglePred was the entry block of the function. If so, we 256 // will need to move BB back to the entry position. 257 bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); 258 MergeBasicBlockIntoOnlyPred(DestBB, this); 259 260 if (isEntry && BB != &BB->getParent()->getEntryBlock()) 261 BB->moveBefore(&BB->getParent()->getEntryBlock()); 262 263 DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); 264 return; 265 } 266 } 267 268 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB 269 // to handle the new incoming edges it is about to have. 270 PHINode *PN; 271 for (BasicBlock::iterator BBI = DestBB->begin(); 272 (PN = dyn_cast<PHINode>(BBI)); ++BBI) { 273 // Remove the incoming value for BB, and remember it. 274 Value *InVal = PN->removeIncomingValue(BB, false); 275 276 // Two options: either the InVal is a phi node defined in BB or it is some 277 // value that dominates BB. 278 PHINode *InValPhi = dyn_cast<PHINode>(InVal); 279 if (InValPhi && InValPhi->getParent() == BB) { 280 // Add all of the input values of the input PHI as inputs of this phi. 281 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) 282 PN->addIncoming(InValPhi->getIncomingValue(i), 283 InValPhi->getIncomingBlock(i)); 284 } else { 285 // Otherwise, add one instance of the dominating value for each edge that 286 // we will be adding. 287 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 288 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 289 PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); 290 } else { 291 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) 292 PN->addIncoming(InVal, *PI); 293 } 294 } 295 } 296 297 // The PHIs are now updated, change everything that refers to BB to use 298 // DestBB and remove BB. 299 BB->replaceAllUsesWith(DestBB); 300 if (PFI) { 301 PFI->replaceAllUses(BB, DestBB); 302 PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB)); 303 } 304 BB->eraseFromParent(); 305 306 DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); 307} 308 309/// FindReusablePredBB - Check all of the predecessors of the block DestPHI 310/// lives in to see if there is a block that we can reuse as a critical edge 311/// from TIBB. 312static BasicBlock *FindReusablePredBB(PHINode *DestPHI, BasicBlock *TIBB) { 313 BasicBlock *Dest = DestPHI->getParent(); 314 315 /// TIPHIValues - This array is lazily computed to determine the values of 316 /// PHIs in Dest that TI would provide. 317 SmallVector<Value*, 32> TIPHIValues; 318 319 /// TIBBEntryNo - This is a cache to speed up pred queries for TIBB. 320 unsigned TIBBEntryNo = 0; 321 322 // Check to see if Dest has any blocks that can be used as a split edge for 323 // this terminator. 324 for (unsigned pi = 0, e = DestPHI->getNumIncomingValues(); pi != e; ++pi) { 325 BasicBlock *Pred = DestPHI->getIncomingBlock(pi); 326 // To be usable, the pred has to end with an uncond branch to the dest. 327 BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator()); 328 if (!PredBr || !PredBr->isUnconditional()) 329 continue; 330 // Must be empty other than the branch and debug info. 331 BasicBlock::iterator I = Pred->begin(); 332 while (isa<DbgInfoIntrinsic>(I)) 333 I++; 334 if (&*I != PredBr) 335 continue; 336 // Cannot be the entry block; its label does not get emitted. 337 if (Pred == &Dest->getParent()->getEntryBlock()) 338 continue; 339 340 // Finally, since we know that Dest has phi nodes in it, we have to make 341 // sure that jumping to Pred will have the same effect as going to Dest in 342 // terms of PHI values. 343 PHINode *PN; 344 unsigned PHINo = 0; 345 unsigned PredEntryNo = pi; 346 347 bool FoundMatch = true; 348 for (BasicBlock::iterator I = Dest->begin(); 349 (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) { 350 if (PHINo == TIPHIValues.size()) { 351 if (PN->getIncomingBlock(TIBBEntryNo) != TIBB) 352 TIBBEntryNo = PN->getBasicBlockIndex(TIBB); 353 TIPHIValues.push_back(PN->getIncomingValue(TIBBEntryNo)); 354 } 355 356 // If the PHI entry doesn't work, we can't use this pred. 357 if (PN->getIncomingBlock(PredEntryNo) != Pred) 358 PredEntryNo = PN->getBasicBlockIndex(Pred); 359 360 if (TIPHIValues[PHINo] != PN->getIncomingValue(PredEntryNo)) { 361 FoundMatch = false; 362 break; 363 } 364 } 365 366 // If we found a workable predecessor, change TI to branch to Succ. 367 if (FoundMatch) 368 return Pred; 369 } 370 return 0; 371} 372 373 374/// SplitEdgeNicely - Split the critical edge from TI to its specified 375/// successor if it will improve codegen. We only do this if the successor has 376/// phi nodes (otherwise critical edges are ok). If there is already another 377/// predecessor of the succ that is empty (and thus has no phi nodes), use it 378/// instead of introducing a new block. 379static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, 380 SmallSet<std::pair<const BasicBlock*, 381 const BasicBlock*>, 8> &BackEdges, 382 Pass *P) { 383 BasicBlock *TIBB = TI->getParent(); 384 BasicBlock *Dest = TI->getSuccessor(SuccNum); 385 assert(isa<PHINode>(Dest->begin()) && 386 "This should only be called if Dest has a PHI!"); 387 PHINode *DestPHI = cast<PHINode>(Dest->begin()); 388 389 // Do not split edges to EH landing pads. 390 if (InvokeInst *Invoke = dyn_cast<InvokeInst>(TI)) 391 if (Invoke->getSuccessor(1) == Dest) 392 return; 393 394 // As a hack, never split backedges of loops. Even though the copy for any 395 // PHIs inserted on the backedge would be dead for exits from the loop, we 396 // assume that the cost of *splitting* the backedge would be too high. 397 if (BackEdges.count(std::make_pair(TIBB, Dest))) 398 return; 399 400 if (BasicBlock *ReuseBB = FindReusablePredBB(DestPHI, TIBB)) { 401 ProfileInfo *PFI = P->getAnalysisIfAvailable<ProfileInfo>(); 402 if (PFI) 403 PFI->splitEdge(TIBB, Dest, ReuseBB); 404 Dest->removePredecessor(TIBB); 405 TI->setSuccessor(SuccNum, ReuseBB); 406 return; 407 } 408 409 SplitCriticalEdge(TI, SuccNum, P, true); 410} 411 412 413/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop 414/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), 415/// sink it into user blocks to reduce the number of virtual 416/// registers that must be created and coalesced. 417/// 418/// Return true if any changes are made. 419/// 420static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ 421 // If this is a noop copy, 422 EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); 423 EVT DstVT = TLI.getValueType(CI->getType()); 424 425 // This is an fp<->int conversion? 426 if (SrcVT.isInteger() != DstVT.isInteger()) 427 return false; 428 429 // If this is an extension, it will be a zero or sign extension, which 430 // isn't a noop. 431 if (SrcVT.bitsLT(DstVT)) return false; 432 433 // If these values will be promoted, find out what they will be promoted 434 // to. This helps us consider truncates on PPC as noop copies when they 435 // are. 436 if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote) 437 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); 438 if (TLI.getTypeAction(DstVT) == TargetLowering::Promote) 439 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); 440 441 // If, after promotion, these are the same types, this is a noop copy. 442 if (SrcVT != DstVT) 443 return false; 444 445 BasicBlock *DefBB = CI->getParent(); 446 447 /// InsertedCasts - Only insert a cast in each block once. 448 DenseMap<BasicBlock*, CastInst*> InsertedCasts; 449 450 bool MadeChange = false; 451 for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); 452 UI != E; ) { 453 Use &TheUse = UI.getUse(); 454 Instruction *User = cast<Instruction>(*UI); 455 456 // Figure out which BB this cast is used in. For PHI's this is the 457 // appropriate predecessor block. 458 BasicBlock *UserBB = User->getParent(); 459 if (PHINode *PN = dyn_cast<PHINode>(User)) { 460 UserBB = PN->getIncomingBlock(UI); 461 } 462 463 // Preincrement use iterator so we don't invalidate it. 464 ++UI; 465 466 // If this user is in the same block as the cast, don't change the cast. 467 if (UserBB == DefBB) continue; 468 469 // If we have already inserted a cast into this block, use it. 470 CastInst *&InsertedCast = InsertedCasts[UserBB]; 471 472 if (!InsertedCast) { 473 BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); 474 475 InsertedCast = 476 CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", 477 InsertPt); 478 MadeChange = true; 479 } 480 481 // Replace a use of the cast with a use of the new cast. 482 TheUse = InsertedCast; 483 } 484 485 // If we removed all uses, nuke the cast. 486 if (CI->use_empty()) { 487 CI->eraseFromParent(); 488 MadeChange = true; 489 } 490 491 return MadeChange; 492} 493 494/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce 495/// the number of virtual registers that must be created and coalesced. This is 496/// a clear win except on targets with multiple condition code registers 497/// (PowerPC), where it might lose; some adjustment may be wanted there. 498/// 499/// Return true if any changes are made. 500static bool OptimizeCmpExpression(CmpInst *CI) { 501 BasicBlock *DefBB = CI->getParent(); 502 503 /// InsertedCmp - Only insert a cmp in each block once. 504 DenseMap<BasicBlock*, CmpInst*> InsertedCmps; 505 506 bool MadeChange = false; 507 for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); 508 UI != E; ) { 509 Use &TheUse = UI.getUse(); 510 Instruction *User = cast<Instruction>(*UI); 511 512 // Preincrement use iterator so we don't invalidate it. 513 ++UI; 514 515 // Don't bother for PHI nodes. 516 if (isa<PHINode>(User)) 517 continue; 518 519 // Figure out which BB this cmp is used in. 520 BasicBlock *UserBB = User->getParent(); 521 522 // If this user is in the same block as the cmp, don't change the cmp. 523 if (UserBB == DefBB) continue; 524 525 // If we have already inserted a cmp into this block, use it. 526 CmpInst *&InsertedCmp = InsertedCmps[UserBB]; 527 528 if (!InsertedCmp) { 529 BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); 530 531 InsertedCmp = 532 CmpInst::Create(CI->getOpcode(), 533 CI->getPredicate(), CI->getOperand(0), 534 CI->getOperand(1), "", InsertPt); 535 MadeChange = true; 536 } 537 538 // Replace a use of the cmp with a use of the new cmp. 539 TheUse = InsertedCmp; 540 } 541 542 // If we removed all uses, nuke the cmp. 543 if (CI->use_empty()) 544 CI->eraseFromParent(); 545 546 return MadeChange; 547} 548 549namespace { 550class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls { 551protected: 552 void replaceCall(Value *With) { 553 CI->replaceAllUsesWith(With); 554 CI->eraseFromParent(); 555 } 556 bool isFoldable(unsigned SizeCIOp, unsigned, bool) const { 557 if (ConstantInt *SizeCI = 558 dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) 559 return SizeCI->isAllOnesValue(); 560 return false; 561 } 562}; 563} // end anonymous namespace 564 565bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { 566 // Lower all uses of llvm.objectsize.* 567 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); 568 if (II && II->getIntrinsicID() == Intrinsic::objectsize) { 569 bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); 570 const Type *ReturnTy = CI->getType(); 571 Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); 572 CI->replaceAllUsesWith(RetVal); 573 CI->eraseFromParent(); 574 return true; 575 } 576 577 // From here on out we're working with named functions. 578 if (CI->getCalledFunction() == 0) return false; 579 580 // We'll need TargetData from here on out. 581 const TargetData *TD = TLI ? TLI->getTargetData() : 0; 582 if (!TD) return false; 583 584 // Lower all default uses of _chk calls. This is very similar 585 // to what InstCombineCalls does, but here we are only lowering calls 586 // that have the default "don't know" as the objectsize. Anything else 587 // should be left alone. 588 CodeGenPrepareFortifiedLibCalls Simplifier; 589 return Simplifier.fold(CI, TD); 590} 591//===----------------------------------------------------------------------===// 592// Memory Optimization 593//===----------------------------------------------------------------------===// 594 595/// IsNonLocalValue - Return true if the specified values are defined in a 596/// different basic block than BB. 597static bool IsNonLocalValue(Value *V, BasicBlock *BB) { 598 if (Instruction *I = dyn_cast<Instruction>(V)) 599 return I->getParent() != BB; 600 return false; 601} 602 603/// OptimizeMemoryInst - Load and Store Instructions often have 604/// addressing modes that can do significant amounts of computation. As such, 605/// instruction selection will try to get the load or store to do as much 606/// computation as possible for the program. The problem is that isel can only 607/// see within a single block. As such, we sink as much legal addressing mode 608/// stuff into the block as possible. 609/// 610/// This method is used to optimize both load/store and inline asms with memory 611/// operands. 612bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, 613 const Type *AccessTy, 614 DenseMap<Value*,Value*> &SunkAddrs) { 615 // Figure out what addressing mode will be built up for this operation. 616 SmallVector<Instruction*, 16> AddrModeInsts; 617 ExtAddrMode AddrMode = AddressingModeMatcher::Match(Addr, AccessTy,MemoryInst, 618 AddrModeInsts, *TLI); 619 620 // Check to see if any of the instructions supersumed by this addr mode are 621 // non-local to I's BB. 622 bool AnyNonLocal = false; 623 for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) { 624 if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) { 625 AnyNonLocal = true; 626 break; 627 } 628 } 629 630 // If all the instructions matched are already in this BB, don't do anything. 631 if (!AnyNonLocal) { 632 DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); 633 return false; 634 } 635 636 // Insert this computation right after this user. Since our caller is 637 // scanning from the top of the BB to the bottom, reuse of the expr are 638 // guaranteed to happen later. 639 BasicBlock::iterator InsertPt = MemoryInst; 640 641 // Now that we determined the addressing expression we want to use and know 642 // that we have to sink it into this block. Check to see if we have already 643 // done this for some other load/store instr in this block. If so, reuse the 644 // computation. 645 Value *&SunkAddr = SunkAddrs[Addr]; 646 if (SunkAddr) { 647 DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " 648 << *MemoryInst); 649 if (SunkAddr->getType() != Addr->getType()) 650 SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt); 651 } else { 652 DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " 653 << *MemoryInst); 654 const Type *IntPtrTy = 655 TLI->getTargetData()->getIntPtrType(AccessTy->getContext()); 656 657 Value *Result = 0; 658 659 // Start with the base register. Do this first so that subsequent address 660 // matching finds it last, which will prevent it from trying to match it 661 // as the scaled value in case it happens to be a mul. That would be 662 // problematic if we've sunk a different mul for the scale, because then 663 // we'd end up sinking both muls. 664 if (AddrMode.BaseReg) { 665 Value *V = AddrMode.BaseReg; 666 if (V->getType()->isPointerTy()) 667 V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt); 668 if (V->getType() != IntPtrTy) 669 V = CastInst::CreateIntegerCast(V, IntPtrTy, /*isSigned=*/true, 670 "sunkaddr", InsertPt); 671 Result = V; 672 } 673 674 // Add the scale value. 675 if (AddrMode.Scale) { 676 Value *V = AddrMode.ScaledReg; 677 if (V->getType() == IntPtrTy) { 678 // done. 679 } else if (V->getType()->isPointerTy()) { 680 V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt); 681 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < 682 cast<IntegerType>(V->getType())->getBitWidth()) { 683 V = new TruncInst(V, IntPtrTy, "sunkaddr", InsertPt); 684 } else { 685 V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt); 686 } 687 if (AddrMode.Scale != 1) 688 V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy, 689 AddrMode.Scale), 690 "sunkaddr", InsertPt); 691 if (Result) 692 Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); 693 else 694 Result = V; 695 } 696 697 // Add in the BaseGV if present. 698 if (AddrMode.BaseGV) { 699 Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, "sunkaddr", 700 InsertPt); 701 if (Result) 702 Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); 703 else 704 Result = V; 705 } 706 707 // Add in the Base Offset if present. 708 if (AddrMode.BaseOffs) { 709 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); 710 if (Result) 711 Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); 712 else 713 Result = V; 714 } 715 716 if (Result == 0) 717 SunkAddr = Constant::getNullValue(Addr->getType()); 718 else 719 SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt); 720 } 721 722 MemoryInst->replaceUsesOfWith(Addr, SunkAddr); 723 724 if (Addr->use_empty()) { 725 RecursivelyDeleteTriviallyDeadInstructions(Addr); 726 // This address is now available for reassignment, so erase the table entry; 727 // we don't want to match some completely different instruction. 728 SunkAddrs[Addr] = 0; 729 } 730 return true; 731} 732 733/// OptimizeInlineAsmInst - If there are any memory operands, use 734/// OptimizeMemoryInst to sink their address computing into the block when 735/// possible / profitable. 736bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS, 737 DenseMap<Value*,Value*> &SunkAddrs) { 738 bool MadeChange = false; 739 740 std::vector<TargetLowering::AsmOperandInfo> TargetConstraints = TLI->ParseConstraints(CS); 741 unsigned ArgNo = 0; 742 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { 743 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; 744 745 // Compute the constraint code and ConstraintType to use. 746 TLI->ComputeConstraintToUse(OpInfo, SDValue()); 747 748 if (OpInfo.ConstraintType == TargetLowering::C_Memory && 749 OpInfo.isIndirect) { 750 Value *OpVal = const_cast<Value *>(CS.getArgument(ArgNo++)); 751 MadeChange |= OptimizeMemoryInst(I, OpVal, OpVal->getType(), SunkAddrs); 752 } else if (OpInfo.Type == InlineAsm::isInput) 753 ArgNo++; 754 } 755 756 return MadeChange; 757} 758 759/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same 760/// basic block as the load, unless conditions are unfavorable. This allows 761/// SelectionDAG to fold the extend into the load. 762/// 763bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) { 764 // Look for a load being extended. 765 LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0)); 766 if (!LI) return false; 767 768 // If they're already in the same block, there's nothing to do. 769 if (LI->getParent() == I->getParent()) 770 return false; 771 772 // If the load has other users and the truncate is not free, this probably 773 // isn't worthwhile. 774 if (!LI->hasOneUse() && 775 TLI && !TLI->isTruncateFree(I->getType(), LI->getType())) 776 return false; 777 778 // Check whether the target supports casts folded into loads. 779 unsigned LType; 780 if (isa<ZExtInst>(I)) 781 LType = ISD::ZEXTLOAD; 782 else { 783 assert(isa<SExtInst>(I) && "Unexpected ext type!"); 784 LType = ISD::SEXTLOAD; 785 } 786 if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType()))) 787 return false; 788 789 // Move the extend into the same block as the load, so that SelectionDAG 790 // can fold it. 791 I->removeFromParent(); 792 I->insertAfter(LI); 793 return true; 794} 795 796bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { 797 BasicBlock *DefBB = I->getParent(); 798 799 // If both result of the {s|z}xt and its source are live out, rewrite all 800 // other uses of the source with result of extension. 801 Value *Src = I->getOperand(0); 802 if (Src->hasOneUse()) 803 return false; 804 805 // Only do this xform if truncating is free. 806 if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType())) 807 return false; 808 809 // Only safe to perform the optimization if the source is also defined in 810 // this block. 811 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent()) 812 return false; 813 814 bool DefIsLiveOut = false; 815 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); 816 UI != E; ++UI) { 817 Instruction *User = cast<Instruction>(*UI); 818 819 // Figure out which BB this ext is used in. 820 BasicBlock *UserBB = User->getParent(); 821 if (UserBB == DefBB) continue; 822 DefIsLiveOut = true; 823 break; 824 } 825 if (!DefIsLiveOut) 826 return false; 827 828 // Make sure non of the uses are PHI nodes. 829 for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); 830 UI != E; ++UI) { 831 Instruction *User = cast<Instruction>(*UI); 832 BasicBlock *UserBB = User->getParent(); 833 if (UserBB == DefBB) continue; 834 // Be conservative. We don't want this xform to end up introducing 835 // reloads just before load / store instructions. 836 if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User)) 837 return false; 838 } 839 840 // InsertedTruncs - Only insert one trunc in each block once. 841 DenseMap<BasicBlock*, Instruction*> InsertedTruncs; 842 843 bool MadeChange = false; 844 for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); 845 UI != E; ++UI) { 846 Use &TheUse = UI.getUse(); 847 Instruction *User = cast<Instruction>(*UI); 848 849 // Figure out which BB this ext is used in. 850 BasicBlock *UserBB = User->getParent(); 851 if (UserBB == DefBB) continue; 852 853 // Both src and def are live in this block. Rewrite the use. 854 Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; 855 856 if (!InsertedTrunc) { 857 BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); 858 859 InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt); 860 } 861 862 // Replace a use of the {s|z}ext source with a use of the result. 863 TheUse = InsertedTrunc; 864 865 MadeChange = true; 866 } 867 868 return MadeChange; 869} 870 871// In this pass we look for GEP and cast instructions that are used 872// across basic blocks and rewrite them to improve basic-block-at-a-time 873// selection. 874bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { 875 bool MadeChange = false; 876 877 // Split all critical edges where the dest block has a PHI. 878 if (CriticalEdgeSplit) { 879 TerminatorInst *BBTI = BB.getTerminator(); 880 if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) { 881 for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) { 882 BasicBlock *SuccBB = BBTI->getSuccessor(i); 883 if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true)) 884 SplitEdgeNicely(BBTI, i, BackEdges, this); 885 } 886 } 887 } 888 889 // Keep track of non-local addresses that have been sunk into this block. 890 // This allows us to avoid inserting duplicate code for blocks with multiple 891 // load/stores of the same address. 892 DenseMap<Value*, Value*> SunkAddrs; 893 894 for (BasicBlock::iterator BBI = BB.begin(), E = BB.end(); BBI != E; ) { 895 Instruction *I = BBI++; 896 897 if (CastInst *CI = dyn_cast<CastInst>(I)) { 898 // If the source of the cast is a constant, then this should have 899 // already been constant folded. The only reason NOT to constant fold 900 // it is if something (e.g. LSR) was careful to place the constant 901 // evaluation in a block other than then one that uses it (e.g. to hoist 902 // the address of globals out of a loop). If this is the case, we don't 903 // want to forward-subst the cast. 904 if (isa<Constant>(CI->getOperand(0))) 905 continue; 906 907 bool Change = false; 908 if (TLI) { 909 Change = OptimizeNoopCopyExpression(CI, *TLI); 910 MadeChange |= Change; 911 } 912 913 if (!Change && (isa<ZExtInst>(I) || isa<SExtInst>(I))) { 914 MadeChange |= MoveExtToFormExtLoad(I); 915 MadeChange |= OptimizeExtUses(I); 916 } 917 } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) { 918 MadeChange |= OptimizeCmpExpression(CI); 919 } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 920 if (TLI) 921 MadeChange |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType(), 922 SunkAddrs); 923 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 924 if (TLI) 925 MadeChange |= OptimizeMemoryInst(I, SI->getOperand(1), 926 SI->getOperand(0)->getType(), 927 SunkAddrs); 928 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { 929 if (GEPI->hasAllZeroIndices()) { 930 /// The GEP operand must be a pointer, so must its result -> BitCast 931 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), 932 GEPI->getName(), GEPI); 933 GEPI->replaceAllUsesWith(NC); 934 GEPI->eraseFromParent(); 935 MadeChange = true; 936 BBI = NC; 937 } 938 } else if (CallInst *CI = dyn_cast<CallInst>(I)) { 939 // If we found an inline asm expession, and if the target knows how to 940 // lower it to normal LLVM code, do so now. 941 if (TLI && isa<InlineAsm>(CI->getCalledValue())) { 942 if (TLI->ExpandInlineAsm(CI)) { 943 BBI = BB.begin(); 944 // Avoid processing instructions out of order, which could cause 945 // reuse before a value is defined. 946 SunkAddrs.clear(); 947 } else 948 // Sink address computing for memory operands into the block. 949 MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs); 950 } else { 951 // Other CallInst optimizations that don't need to muck with the 952 // enclosing iterator here. 953 MadeChange |= OptimizeCallInst(CI); 954 } 955 } 956 } 957 958 return MadeChange; 959} 960