CodeGenPrepare.cpp revision aa0e52328747d982d6c6e501a205832ad724ff62
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass munges the code in the input function to better prepare it for 11// SelectionDAG-based code generation. This works around limitations in it's 12// basic-block-at-a-time approach. It should eventually be removed. 13// 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "codegenprepare" 17#include "llvm/Transforms/Scalar.h" 18#include "llvm/Constants.h" 19#include "llvm/DerivedTypes.h" 20#include "llvm/Function.h" 21#include "llvm/InlineAsm.h" 22#include "llvm/Instructions.h" 23#include "llvm/IntrinsicInst.h" 24#include "llvm/Pass.h" 25#include "llvm/Analysis/ProfileInfo.h" 26#include "llvm/Target/TargetData.h" 27#include "llvm/Target/TargetLowering.h" 28#include "llvm/Transforms/Utils/AddrModeMatcher.h" 29#include "llvm/Transforms/Utils/BasicBlockUtils.h" 30#include "llvm/Transforms/Utils/Local.h" 31#include "llvm/ADT/DenseMap.h" 32#include "llvm/ADT/SmallSet.h" 33#include "llvm/Assembly/Writer.h" 34#include "llvm/Support/CallSite.h" 35#include "llvm/Support/CommandLine.h" 36#include "llvm/Support/Debug.h" 37#include "llvm/Support/GetElementPtrTypeIterator.h" 38#include "llvm/Support/PatternMatch.h" 39#include "llvm/Support/raw_ostream.h" 40using namespace llvm; 41using namespace llvm::PatternMatch; 42 43static cl::opt<bool> FactorCommonPreds("split-critical-paths-tweak", 44 cl::init(false), cl::Hidden); 45 46namespace { 47 class CodeGenPrepare : public FunctionPass { 48 /// TLI - Keep a pointer of a TargetLowering to consult for determining 49 /// transformation profitability. 50 const TargetLowering *TLI; 51 ProfileInfo *PFI; 52 53 /// BackEdges - Keep a set of all the loop back edges. 54 /// 55 SmallSet<std::pair<const BasicBlock*, const BasicBlock*>, 8> BackEdges; 56 public: 57 static char ID; // Pass identification, replacement for typeid 58 explicit CodeGenPrepare(const TargetLowering *tli = 0) 59 : FunctionPass(&ID), TLI(tli) {} 60 bool runOnFunction(Function &F); 61 62 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 63 AU.addPreserved<ProfileInfo>(); 64 } 65 66 virtual void releaseMemory() { 67 BackEdges.clear(); 68 } 69 70 private: 71 bool EliminateMostlyEmptyBlocks(Function &F); 72 bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; 73 void EliminateMostlyEmptyBlock(BasicBlock *BB); 74 bool OptimizeBlock(BasicBlock &BB); 75 bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy, 76 DenseMap<Value*,Value*> &SunkAddrs); 77 bool OptimizeInlineAsmInst(Instruction *I, CallSite CS, 78 DenseMap<Value*,Value*> &SunkAddrs); 79 bool MoveExtToFormExtLoad(Instruction *I); 80 bool OptimizeExtUses(Instruction *I); 81 void findLoopBackEdges(const Function &F); 82 }; 83} 84 85char CodeGenPrepare::ID = 0; 86static RegisterPass<CodeGenPrepare> X("codegenprepare", 87 "Optimize for code generation"); 88 89FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) { 90 return new CodeGenPrepare(TLI); 91} 92 93/// findLoopBackEdges - Do a DFS walk to find loop back edges. 94/// 95void CodeGenPrepare::findLoopBackEdges(const Function &F) { 96 SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges; 97 FindFunctionBackedges(F, Edges); 98 99 BackEdges.insert(Edges.begin(), Edges.end()); 100} 101 102 103bool CodeGenPrepare::runOnFunction(Function &F) { 104 bool EverMadeChange = false; 105 106 PFI = getAnalysisIfAvailable<ProfileInfo>(); 107 // First pass, eliminate blocks that contain only PHI nodes and an 108 // unconditional branch. 109 EverMadeChange |= EliminateMostlyEmptyBlocks(F); 110 111 // Now find loop back edges. 112 findLoopBackEdges(F); 113 114 bool MadeChange = true; 115 while (MadeChange) { 116 MadeChange = false; 117 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) 118 MadeChange |= OptimizeBlock(*BB); 119 EverMadeChange |= MadeChange; 120 } 121 return EverMadeChange; 122} 123 124/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes, 125/// debug info directives, and an unconditional branch. Passes before isel 126/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for 127/// isel. Start by eliminating these blocks so we can split them the way we 128/// want them. 129bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) { 130 bool MadeChange = false; 131 // Note that this intentionally skips the entry block. 132 for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) { 133 BasicBlock *BB = I++; 134 135 // If this block doesn't end with an uncond branch, ignore it. 136 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); 137 if (!BI || !BI->isUnconditional()) 138 continue; 139 140 // If the instruction before the branch (skipping debug info) isn't a phi 141 // node, then other stuff is happening here. 142 BasicBlock::iterator BBI = BI; 143 if (BBI != BB->begin()) { 144 --BBI; 145 while (isa<DbgInfoIntrinsic>(BBI)) { 146 if (BBI == BB->begin()) 147 break; 148 --BBI; 149 } 150 if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI)) 151 continue; 152 } 153 154 // Do not break infinite loops. 155 BasicBlock *DestBB = BI->getSuccessor(0); 156 if (DestBB == BB) 157 continue; 158 159 if (!CanMergeBlocks(BB, DestBB)) 160 continue; 161 162 EliminateMostlyEmptyBlock(BB); 163 MadeChange = true; 164 } 165 return MadeChange; 166} 167 168/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a 169/// single uncond branch between them, and BB contains no other non-phi 170/// instructions. 171bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, 172 const BasicBlock *DestBB) const { 173 // We only want to eliminate blocks whose phi nodes are used by phi nodes in 174 // the successor. If there are more complex condition (e.g. preheaders), 175 // don't mess around with them. 176 BasicBlock::const_iterator BBI = BB->begin(); 177 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 178 for (Value::use_const_iterator UI = PN->use_begin(), E = PN->use_end(); 179 UI != E; ++UI) { 180 const Instruction *User = cast<Instruction>(*UI); 181 if (User->getParent() != DestBB || !isa<PHINode>(User)) 182 return false; 183 // If User is inside DestBB block and it is a PHINode then check 184 // incoming value. If incoming value is not from BB then this is 185 // a complex condition (e.g. preheaders) we want to avoid here. 186 if (User->getParent() == DestBB) { 187 if (const PHINode *UPN = dyn_cast<PHINode>(User)) 188 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { 189 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I)); 190 if (Insn && Insn->getParent() == BB && 191 Insn->getParent() != UPN->getIncomingBlock(I)) 192 return false; 193 } 194 } 195 } 196 } 197 198 // If BB and DestBB contain any common predecessors, then the phi nodes in BB 199 // and DestBB may have conflicting incoming values for the block. If so, we 200 // can't merge the block. 201 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin()); 202 if (!DestBBPN) return true; // no conflict. 203 204 // Collect the preds of BB. 205 SmallPtrSet<const BasicBlock*, 16> BBPreds; 206 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 207 // It is faster to get preds from a PHI than with pred_iterator. 208 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 209 BBPreds.insert(BBPN->getIncomingBlock(i)); 210 } else { 211 BBPreds.insert(pred_begin(BB), pred_end(BB)); 212 } 213 214 // Walk the preds of DestBB. 215 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { 216 BasicBlock *Pred = DestBBPN->getIncomingBlock(i); 217 if (BBPreds.count(Pred)) { // Common predecessor? 218 BBI = DestBB->begin(); 219 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 220 const Value *V1 = PN->getIncomingValueForBlock(Pred); 221 const Value *V2 = PN->getIncomingValueForBlock(BB); 222 223 // If V2 is a phi node in BB, look up what the mapped value will be. 224 if (const PHINode *V2PN = dyn_cast<PHINode>(V2)) 225 if (V2PN->getParent() == BB) 226 V2 = V2PN->getIncomingValueForBlock(Pred); 227 228 // If there is a conflict, bail out. 229 if (V1 != V2) return false; 230 } 231 } 232 } 233 234 return true; 235} 236 237 238/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and 239/// an unconditional branch in it. 240void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { 241 BranchInst *BI = cast<BranchInst>(BB->getTerminator()); 242 BasicBlock *DestBB = BI->getSuccessor(0); 243 244 DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); 245 246 // If the destination block has a single pred, then this is a trivial edge, 247 // just collapse it. 248 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { 249 if (SinglePred != DestBB) { 250 // Remember if SinglePred was the entry block of the function. If so, we 251 // will need to move BB back to the entry position. 252 bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); 253 MergeBasicBlockIntoOnlyPred(DestBB, this); 254 255 if (isEntry && BB != &BB->getParent()->getEntryBlock()) 256 BB->moveBefore(&BB->getParent()->getEntryBlock()); 257 258 DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); 259 return; 260 } 261 } 262 263 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB 264 // to handle the new incoming edges it is about to have. 265 PHINode *PN; 266 for (BasicBlock::iterator BBI = DestBB->begin(); 267 (PN = dyn_cast<PHINode>(BBI)); ++BBI) { 268 // Remove the incoming value for BB, and remember it. 269 Value *InVal = PN->removeIncomingValue(BB, false); 270 271 // Two options: either the InVal is a phi node defined in BB or it is some 272 // value that dominates BB. 273 PHINode *InValPhi = dyn_cast<PHINode>(InVal); 274 if (InValPhi && InValPhi->getParent() == BB) { 275 // Add all of the input values of the input PHI as inputs of this phi. 276 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) 277 PN->addIncoming(InValPhi->getIncomingValue(i), 278 InValPhi->getIncomingBlock(i)); 279 } else { 280 // Otherwise, add one instance of the dominating value for each edge that 281 // we will be adding. 282 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 283 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 284 PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); 285 } else { 286 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) 287 PN->addIncoming(InVal, *PI); 288 } 289 } 290 } 291 292 // The PHIs are now updated, change everything that refers to BB to use 293 // DestBB and remove BB. 294 BB->replaceAllUsesWith(DestBB); 295 if (PFI) { 296 PFI->replaceAllUses(BB, DestBB); 297 PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB)); 298 } 299 BB->eraseFromParent(); 300 301 DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); 302} 303 304 305/// SplitEdgeNicely - Split the critical edge from TI to its specified 306/// successor if it will improve codegen. We only do this if the successor has 307/// phi nodes (otherwise critical edges are ok). If there is already another 308/// predecessor of the succ that is empty (and thus has no phi nodes), use it 309/// instead of introducing a new block. 310static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, 311 SmallSet<std::pair<const BasicBlock*, 312 const BasicBlock*>, 8> &BackEdges, 313 Pass *P) { 314 BasicBlock *TIBB = TI->getParent(); 315 BasicBlock *Dest = TI->getSuccessor(SuccNum); 316 assert(isa<PHINode>(Dest->begin()) && 317 "This should only be called if Dest has a PHI!"); 318 319 // Do not split edges to EH landing pads. 320 if (InvokeInst *Invoke = dyn_cast<InvokeInst>(TI)) { 321 if (Invoke->getSuccessor(1) == Dest) 322 return; 323 } 324 325 326 // As a hack, never split backedges of loops. Even though the copy for any 327 // PHIs inserted on the backedge would be dead for exits from the loop, we 328 // assume that the cost of *splitting* the backedge would be too high. 329 if (BackEdges.count(std::make_pair(TIBB, Dest))) 330 return; 331 332 if (!FactorCommonPreds) { 333 /// TIPHIValues - This array is lazily computed to determine the values of 334 /// PHIs in Dest that TI would provide. 335 SmallVector<Value*, 32> TIPHIValues; 336 337 // Check to see if Dest has any blocks that can be used as a split edge for 338 // this terminator. 339 for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) { 340 BasicBlock *Pred = *PI; 341 // To be usable, the pred has to end with an uncond branch to the dest. 342 BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator()); 343 if (!PredBr || !PredBr->isUnconditional()) 344 continue; 345 // Must be empty other than the branch and debug info. 346 BasicBlock::iterator I = Pred->begin(); 347 while (isa<DbgInfoIntrinsic>(I)) 348 I++; 349 if (dyn_cast<Instruction>(I) != PredBr) 350 continue; 351 // Cannot be the entry block; its label does not get emitted. 352 if (Pred == &(Dest->getParent()->getEntryBlock())) 353 continue; 354 355 // Finally, since we know that Dest has phi nodes in it, we have to make 356 // sure that jumping to Pred will have the same effect as going to Dest in 357 // terms of PHI values. 358 PHINode *PN; 359 unsigned PHINo = 0; 360 bool FoundMatch = true; 361 for (BasicBlock::iterator I = Dest->begin(); 362 (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) { 363 if (PHINo == TIPHIValues.size()) 364 TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB)); 365 366 // If the PHI entry doesn't work, we can't use this pred. 367 if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) { 368 FoundMatch = false; 369 break; 370 } 371 } 372 373 // If we found a workable predecessor, change TI to branch to Succ. 374 if (FoundMatch) { 375 ProfileInfo *PFI = P->getAnalysisIfAvailable<ProfileInfo>(); 376 if (PFI) 377 PFI->splitEdge(TIBB, Dest, Pred); 378 Dest->removePredecessor(TIBB); 379 TI->setSuccessor(SuccNum, Pred); 380 return; 381 } 382 } 383 384 SplitCriticalEdge(TI, SuccNum, P, true); 385 return; 386 } 387 388 PHINode *PN; 389 SmallVector<Value*, 8> TIPHIValues; 390 for (BasicBlock::iterator I = Dest->begin(); 391 (PN = dyn_cast<PHINode>(I)); ++I) 392 TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB)); 393 394 SmallVector<BasicBlock*, 8> IdenticalPreds; 395 for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) { 396 BasicBlock *Pred = *PI; 397 if (BackEdges.count(std::make_pair(Pred, Dest))) 398 continue; 399 if (PI == TIBB) 400 IdenticalPreds.push_back(Pred); 401 else { 402 bool Identical = true; 403 unsigned PHINo = 0; 404 for (BasicBlock::iterator I = Dest->begin(); 405 (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) 406 if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) { 407 Identical = false; 408 break; 409 } 410 if (Identical) 411 IdenticalPreds.push_back(Pred); 412 } 413 } 414 415 assert(!IdenticalPreds.empty()); 416 SplitBlockPredecessors(Dest, &IdenticalPreds[0], IdenticalPreds.size(), 417 ".critedge", P); 418} 419 420 421/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop 422/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), 423/// sink it into user blocks to reduce the number of virtual 424/// registers that must be created and coalesced. 425/// 426/// Return true if any changes are made. 427/// 428static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ 429 // If this is a noop copy, 430 EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); 431 EVT DstVT = TLI.getValueType(CI->getType()); 432 433 // This is an fp<->int conversion? 434 if (SrcVT.isInteger() != DstVT.isInteger()) 435 return false; 436 437 // If this is an extension, it will be a zero or sign extension, which 438 // isn't a noop. 439 if (SrcVT.bitsLT(DstVT)) return false; 440 441 // If these values will be promoted, find out what they will be promoted 442 // to. This helps us consider truncates on PPC as noop copies when they 443 // are. 444 if (TLI.getTypeAction(CI->getContext(), SrcVT) == TargetLowering::Promote) 445 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); 446 if (TLI.getTypeAction(CI->getContext(), DstVT) == TargetLowering::Promote) 447 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); 448 449 // If, after promotion, these are the same types, this is a noop copy. 450 if (SrcVT != DstVT) 451 return false; 452 453 BasicBlock *DefBB = CI->getParent(); 454 455 /// InsertedCasts - Only insert a cast in each block once. 456 DenseMap<BasicBlock*, CastInst*> InsertedCasts; 457 458 bool MadeChange = false; 459 for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); 460 UI != E; ) { 461 Use &TheUse = UI.getUse(); 462 Instruction *User = cast<Instruction>(*UI); 463 464 // Figure out which BB this cast is used in. For PHI's this is the 465 // appropriate predecessor block. 466 BasicBlock *UserBB = User->getParent(); 467 if (PHINode *PN = dyn_cast<PHINode>(User)) { 468 UserBB = PN->getIncomingBlock(UI); 469 } 470 471 // Preincrement use iterator so we don't invalidate it. 472 ++UI; 473 474 // If this user is in the same block as the cast, don't change the cast. 475 if (UserBB == DefBB) continue; 476 477 // If we have already inserted a cast into this block, use it. 478 CastInst *&InsertedCast = InsertedCasts[UserBB]; 479 480 if (!InsertedCast) { 481 BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); 482 483 InsertedCast = 484 CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", 485 InsertPt); 486 MadeChange = true; 487 } 488 489 // Replace a use of the cast with a use of the new cast. 490 TheUse = InsertedCast; 491 } 492 493 // If we removed all uses, nuke the cast. 494 if (CI->use_empty()) { 495 CI->eraseFromParent(); 496 MadeChange = true; 497 } 498 499 return MadeChange; 500} 501 502/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce 503/// the number of virtual registers that must be created and coalesced. This is 504/// a clear win except on targets with multiple condition code registers 505/// (PowerPC), where it might lose; some adjustment may be wanted there. 506/// 507/// Return true if any changes are made. 508static bool OptimizeCmpExpression(CmpInst *CI) { 509 BasicBlock *DefBB = CI->getParent(); 510 511 /// InsertedCmp - Only insert a cmp in each block once. 512 DenseMap<BasicBlock*, CmpInst*> InsertedCmps; 513 514 bool MadeChange = false; 515 for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); 516 UI != E; ) { 517 Use &TheUse = UI.getUse(); 518 Instruction *User = cast<Instruction>(*UI); 519 520 // Preincrement use iterator so we don't invalidate it. 521 ++UI; 522 523 // Don't bother for PHI nodes. 524 if (isa<PHINode>(User)) 525 continue; 526 527 // Figure out which BB this cmp is used in. 528 BasicBlock *UserBB = User->getParent(); 529 530 // If this user is in the same block as the cmp, don't change the cmp. 531 if (UserBB == DefBB) continue; 532 533 // If we have already inserted a cmp into this block, use it. 534 CmpInst *&InsertedCmp = InsertedCmps[UserBB]; 535 536 if (!InsertedCmp) { 537 BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); 538 539 InsertedCmp = 540 CmpInst::Create(CI->getOpcode(), 541 CI->getPredicate(), CI->getOperand(0), 542 CI->getOperand(1), "", InsertPt); 543 MadeChange = true; 544 } 545 546 // Replace a use of the cmp with a use of the new cmp. 547 TheUse = InsertedCmp; 548 } 549 550 // If we removed all uses, nuke the cmp. 551 if (CI->use_empty()) 552 CI->eraseFromParent(); 553 554 return MadeChange; 555} 556 557//===----------------------------------------------------------------------===// 558// Memory Optimization 559//===----------------------------------------------------------------------===// 560 561/// IsNonLocalValue - Return true if the specified values are defined in a 562/// different basic block than BB. 563static bool IsNonLocalValue(Value *V, BasicBlock *BB) { 564 if (Instruction *I = dyn_cast<Instruction>(V)) 565 return I->getParent() != BB; 566 return false; 567} 568 569/// OptimizeMemoryInst - Load and Store Instructions often have 570/// addressing modes that can do significant amounts of computation. As such, 571/// instruction selection will try to get the load or store to do as much 572/// computation as possible for the program. The problem is that isel can only 573/// see within a single block. As such, we sink as much legal addressing mode 574/// stuff into the block as possible. 575/// 576/// This method is used to optimize both load/store and inline asms with memory 577/// operands. 578bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, 579 const Type *AccessTy, 580 DenseMap<Value*,Value*> &SunkAddrs) { 581 // Figure out what addressing mode will be built up for this operation. 582 SmallVector<Instruction*, 16> AddrModeInsts; 583 ExtAddrMode AddrMode = AddressingModeMatcher::Match(Addr, AccessTy,MemoryInst, 584 AddrModeInsts, *TLI); 585 586 // Check to see if any of the instructions supersumed by this addr mode are 587 // non-local to I's BB. 588 bool AnyNonLocal = false; 589 for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) { 590 if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) { 591 AnyNonLocal = true; 592 break; 593 } 594 } 595 596 // If all the instructions matched are already in this BB, don't do anything. 597 if (!AnyNonLocal) { 598 DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); 599 return false; 600 } 601 602 // Insert this computation right after this user. Since our caller is 603 // scanning from the top of the BB to the bottom, reuse of the expr are 604 // guaranteed to happen later. 605 BasicBlock::iterator InsertPt = MemoryInst; 606 607 // Now that we determined the addressing expression we want to use and know 608 // that we have to sink it into this block. Check to see if we have already 609 // done this for some other load/store instr in this block. If so, reuse the 610 // computation. 611 Value *&SunkAddr = SunkAddrs[Addr]; 612 if (SunkAddr) { 613 DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " 614 << *MemoryInst); 615 if (SunkAddr->getType() != Addr->getType()) 616 SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt); 617 } else { 618 DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " 619 << *MemoryInst); 620 const Type *IntPtrTy = 621 TLI->getTargetData()->getIntPtrType(AccessTy->getContext()); 622 623 Value *Result = 0; 624 625 // Start with the base register. Do this first so that subsequent address 626 // matching finds it last, which will prevent it from trying to match it 627 // as the scaled value in case it happens to be a mul. That would be 628 // problematic if we've sunk a different mul for the scale, because then 629 // we'd end up sinking both muls. 630 if (AddrMode.BaseReg) { 631 Value *V = AddrMode.BaseReg; 632 if (isa<PointerType>(V->getType())) 633 V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt); 634 if (V->getType() != IntPtrTy) 635 V = CastInst::CreateIntegerCast(V, IntPtrTy, /*isSigned=*/true, 636 "sunkaddr", InsertPt); 637 Result = V; 638 } 639 640 // Add the scale value. 641 if (AddrMode.Scale) { 642 Value *V = AddrMode.ScaledReg; 643 if (V->getType() == IntPtrTy) { 644 // done. 645 } else if (isa<PointerType>(V->getType())) { 646 V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt); 647 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < 648 cast<IntegerType>(V->getType())->getBitWidth()) { 649 V = new TruncInst(V, IntPtrTy, "sunkaddr", InsertPt); 650 } else { 651 V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt); 652 } 653 if (AddrMode.Scale != 1) 654 V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy, 655 AddrMode.Scale), 656 "sunkaddr", InsertPt); 657 if (Result) 658 Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); 659 else 660 Result = V; 661 } 662 663 // Add in the BaseGV if present. 664 if (AddrMode.BaseGV) { 665 Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, "sunkaddr", 666 InsertPt); 667 if (Result) 668 Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); 669 else 670 Result = V; 671 } 672 673 // Add in the Base Offset if present. 674 if (AddrMode.BaseOffs) { 675 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); 676 if (Result) 677 Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); 678 else 679 Result = V; 680 } 681 682 if (Result == 0) 683 SunkAddr = Constant::getNullValue(Addr->getType()); 684 else 685 SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt); 686 } 687 688 MemoryInst->replaceUsesOfWith(Addr, SunkAddr); 689 690 if (Addr->use_empty()) 691 RecursivelyDeleteTriviallyDeadInstructions(Addr); 692 return true; 693} 694 695/// OptimizeInlineAsmInst - If there are any memory operands, use 696/// OptimizeMemoryInst to sink their address computing into the block when 697/// possible / profitable. 698bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS, 699 DenseMap<Value*,Value*> &SunkAddrs) { 700 bool MadeChange = false; 701 InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); 702 703 // Do a prepass over the constraints, canonicalizing them, and building up the 704 // ConstraintOperands list. 705 std::vector<InlineAsm::ConstraintInfo> 706 ConstraintInfos = IA->ParseConstraints(); 707 708 /// ConstraintOperands - Information about all of the constraints. 709 std::vector<TargetLowering::AsmOperandInfo> ConstraintOperands; 710 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. 711 for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { 712 ConstraintOperands. 713 push_back(TargetLowering::AsmOperandInfo(ConstraintInfos[i])); 714 TargetLowering::AsmOperandInfo &OpInfo = ConstraintOperands.back(); 715 716 // Compute the value type for each operand. 717 switch (OpInfo.Type) { 718 case InlineAsm::isOutput: 719 if (OpInfo.isIndirect) 720 OpInfo.CallOperandVal = CS.getArgument(ArgNo++); 721 break; 722 case InlineAsm::isInput: 723 OpInfo.CallOperandVal = CS.getArgument(ArgNo++); 724 break; 725 case InlineAsm::isClobber: 726 // Nothing to do. 727 break; 728 } 729 730 // Compute the constraint code and ConstraintType to use. 731 TLI->ComputeConstraintToUse(OpInfo, SDValue(), 732 OpInfo.ConstraintType == TargetLowering::C_Memory); 733 734 if (OpInfo.ConstraintType == TargetLowering::C_Memory && 735 OpInfo.isIndirect) { 736 Value *OpVal = OpInfo.CallOperandVal; 737 MadeChange |= OptimizeMemoryInst(I, OpVal, OpVal->getType(), SunkAddrs); 738 } 739 } 740 741 return MadeChange; 742} 743 744/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same 745/// basic block as the load, unless conditions are unfavorable. This allows 746/// SelectionDAG to fold the extend into the load. 747/// 748bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) { 749 // Look for a load being extended. 750 LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0)); 751 if (!LI) return false; 752 753 // If they're already in the same block, there's nothing to do. 754 if (LI->getParent() == I->getParent()) 755 return false; 756 757 // If the load has other users and the truncate is not free, this probably 758 // isn't worthwhile. 759 if (!LI->hasOneUse() && 760 TLI && !TLI->isTruncateFree(I->getType(), LI->getType())) 761 return false; 762 763 // Check whether the target supports casts folded into loads. 764 unsigned LType; 765 if (isa<ZExtInst>(I)) 766 LType = ISD::ZEXTLOAD; 767 else { 768 assert(isa<SExtInst>(I) && "Unexpected ext type!"); 769 LType = ISD::SEXTLOAD; 770 } 771 if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType()))) 772 return false; 773 774 // Move the extend into the same block as the load, so that SelectionDAG 775 // can fold it. 776 I->removeFromParent(); 777 I->insertAfter(LI); 778 return true; 779} 780 781bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { 782 BasicBlock *DefBB = I->getParent(); 783 784 // If both result of the {s|z}xt and its source are live out, rewrite all 785 // other uses of the source with result of extension. 786 Value *Src = I->getOperand(0); 787 if (Src->hasOneUse()) 788 return false; 789 790 // Only do this xform if truncating is free. 791 if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType())) 792 return false; 793 794 // Only safe to perform the optimization if the source is also defined in 795 // this block. 796 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent()) 797 return false; 798 799 bool DefIsLiveOut = false; 800 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); 801 UI != E; ++UI) { 802 Instruction *User = cast<Instruction>(*UI); 803 804 // Figure out which BB this ext is used in. 805 BasicBlock *UserBB = User->getParent(); 806 if (UserBB == DefBB) continue; 807 DefIsLiveOut = true; 808 break; 809 } 810 if (!DefIsLiveOut) 811 return false; 812 813 // Make sure non of the uses are PHI nodes. 814 for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); 815 UI != E; ++UI) { 816 Instruction *User = cast<Instruction>(*UI); 817 BasicBlock *UserBB = User->getParent(); 818 if (UserBB == DefBB) continue; 819 // Be conservative. We don't want this xform to end up introducing 820 // reloads just before load / store instructions. 821 if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User)) 822 return false; 823 } 824 825 // InsertedTruncs - Only insert one trunc in each block once. 826 DenseMap<BasicBlock*, Instruction*> InsertedTruncs; 827 828 bool MadeChange = false; 829 for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); 830 UI != E; ++UI) { 831 Use &TheUse = UI.getUse(); 832 Instruction *User = cast<Instruction>(*UI); 833 834 // Figure out which BB this ext is used in. 835 BasicBlock *UserBB = User->getParent(); 836 if (UserBB == DefBB) continue; 837 838 // Both src and def are live in this block. Rewrite the use. 839 Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; 840 841 if (!InsertedTrunc) { 842 BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); 843 844 InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt); 845 } 846 847 // Replace a use of the {s|z}ext source with a use of the result. 848 TheUse = InsertedTrunc; 849 850 MadeChange = true; 851 } 852 853 return MadeChange; 854} 855 856// In this pass we look for GEP and cast instructions that are used 857// across basic blocks and rewrite them to improve basic-block-at-a-time 858// selection. 859bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { 860 bool MadeChange = false; 861 862 // Split all critical edges where the dest block has a PHI. 863 TerminatorInst *BBTI = BB.getTerminator(); 864 if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) { 865 for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) { 866 BasicBlock *SuccBB = BBTI->getSuccessor(i); 867 if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true)) 868 SplitEdgeNicely(BBTI, i, BackEdges, this); 869 } 870 } 871 872 // Keep track of non-local addresses that have been sunk into this block. 873 // This allows us to avoid inserting duplicate code for blocks with multiple 874 // load/stores of the same address. 875 DenseMap<Value*, Value*> SunkAddrs; 876 877 for (BasicBlock::iterator BBI = BB.begin(), E = BB.end(); BBI != E; ) { 878 Instruction *I = BBI++; 879 880 if (CastInst *CI = dyn_cast<CastInst>(I)) { 881 // If the source of the cast is a constant, then this should have 882 // already been constant folded. The only reason NOT to constant fold 883 // it is if something (e.g. LSR) was careful to place the constant 884 // evaluation in a block other than then one that uses it (e.g. to hoist 885 // the address of globals out of a loop). If this is the case, we don't 886 // want to forward-subst the cast. 887 if (isa<Constant>(CI->getOperand(0))) 888 continue; 889 890 bool Change = false; 891 if (TLI) { 892 Change = OptimizeNoopCopyExpression(CI, *TLI); 893 MadeChange |= Change; 894 } 895 896 if (!Change && (isa<ZExtInst>(I) || isa<SExtInst>(I))) { 897 MadeChange |= MoveExtToFormExtLoad(I); 898 MadeChange |= OptimizeExtUses(I); 899 } 900 } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) { 901 MadeChange |= OptimizeCmpExpression(CI); 902 } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 903 if (TLI) 904 MadeChange |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType(), 905 SunkAddrs); 906 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 907 if (TLI) 908 MadeChange |= OptimizeMemoryInst(I, SI->getOperand(1), 909 SI->getOperand(0)->getType(), 910 SunkAddrs); 911 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { 912 if (GEPI->hasAllZeroIndices()) { 913 /// The GEP operand must be a pointer, so must its result -> BitCast 914 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), 915 GEPI->getName(), GEPI); 916 GEPI->replaceAllUsesWith(NC); 917 GEPI->eraseFromParent(); 918 MadeChange = true; 919 BBI = NC; 920 } 921 } else if (CallInst *CI = dyn_cast<CallInst>(I)) { 922 // If we found an inline asm expession, and if the target knows how to 923 // lower it to normal LLVM code, do so now. 924 if (TLI && isa<InlineAsm>(CI->getCalledValue())) { 925 if (TLI->ExpandInlineAsm(CI)) { 926 BBI = BB.begin(); 927 // Avoid processing instructions out of order, which could cause 928 // reuse before a value is defined. 929 SunkAddrs.clear(); 930 } else 931 // Sink address computing for memory operands into the block. 932 MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs); 933 } 934 } 935 } 936 937 return MadeChange; 938} 939