CodeGenPrepare.cpp revision a4b04210d4a838e5a48b0c128bdfe42c3055633f
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass munges the code in the input function to better prepare it for 11// SelectionDAG-based code generation. This works around limitations in it's 12// basic-block-at-a-time approach. It should eventually be removed. 13// 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "codegenprepare" 17#include "llvm/Transforms/Scalar.h" 18#include "llvm/Constants.h" 19#include "llvm/DerivedTypes.h" 20#include "llvm/Function.h" 21#include "llvm/InlineAsm.h" 22#include "llvm/Instructions.h" 23#include "llvm/IntrinsicInst.h" 24#include "llvm/LLVMContext.h" 25#include "llvm/Pass.h" 26#include "llvm/Analysis/ProfileInfo.h" 27#include "llvm/Target/TargetData.h" 28#include "llvm/Target/TargetLowering.h" 29#include "llvm/Transforms/Utils/AddrModeMatcher.h" 30#include "llvm/Transforms/Utils/BasicBlockUtils.h" 31#include "llvm/Transforms/Utils/Local.h" 32#include "llvm/ADT/DenseMap.h" 33#include "llvm/ADT/SmallSet.h" 34#include "llvm/Assembly/Writer.h" 35#include "llvm/Support/CallSite.h" 36#include "llvm/Support/CommandLine.h" 37#include "llvm/Support/Debug.h" 38#include "llvm/Support/GetElementPtrTypeIterator.h" 39#include "llvm/Support/PatternMatch.h" 40#include "llvm/Support/raw_ostream.h" 41using namespace llvm; 42using namespace llvm::PatternMatch; 43 44static cl::opt<bool> FactorCommonPreds("split-critical-paths-tweak", 45 cl::init(false), cl::Hidden); 46 47namespace { 48 class CodeGenPrepare : public FunctionPass { 49 /// TLI - Keep a pointer of a TargetLowering to consult for determining 50 /// transformation profitability. 51 const TargetLowering *TLI; 52 ProfileInfo *PI; 53 54 /// BackEdges - Keep a set of all the loop back edges. 55 /// 56 SmallSet<std::pair<const BasicBlock*, const BasicBlock*>, 8> BackEdges; 57 public: 58 static char ID; // Pass identification, replacement for typeid 59 explicit CodeGenPrepare(const TargetLowering *tli = 0) 60 : FunctionPass(&ID), TLI(tli) {} 61 bool runOnFunction(Function &F); 62 63 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 64 AU.addPreserved<ProfileInfo>(); 65 } 66 67 private: 68 bool EliminateMostlyEmptyBlocks(Function &F); 69 bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; 70 void EliminateMostlyEmptyBlock(BasicBlock *BB); 71 bool OptimizeBlock(BasicBlock &BB); 72 bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy, 73 DenseMap<Value*,Value*> &SunkAddrs); 74 bool OptimizeInlineAsmInst(Instruction *I, CallSite CS, 75 DenseMap<Value*,Value*> &SunkAddrs); 76 bool MoveExtToFormExtLoad(Instruction *I); 77 bool OptimizeExtUses(Instruction *I); 78 void findLoopBackEdges(const Function &F); 79 }; 80} 81 82char CodeGenPrepare::ID = 0; 83static RegisterPass<CodeGenPrepare> X("codegenprepare", 84 "Optimize for code generation"); 85 86FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) { 87 return new CodeGenPrepare(TLI); 88} 89 90/// findLoopBackEdges - Do a DFS walk to find loop back edges. 91/// 92void CodeGenPrepare::findLoopBackEdges(const Function &F) { 93 SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges; 94 FindFunctionBackedges(F, Edges); 95 96 BackEdges.insert(Edges.begin(), Edges.end()); 97} 98 99 100bool CodeGenPrepare::runOnFunction(Function &F) { 101 bool EverMadeChange = false; 102 103 PI = getAnalysisIfAvailable<ProfileInfo>(); 104 // First pass, eliminate blocks that contain only PHI nodes and an 105 // unconditional branch. 106 EverMadeChange |= EliminateMostlyEmptyBlocks(F); 107 108 // Now find loop back edges. 109 findLoopBackEdges(F); 110 111 bool MadeChange = true; 112 while (MadeChange) { 113 MadeChange = false; 114 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) 115 MadeChange |= OptimizeBlock(*BB); 116 EverMadeChange |= MadeChange; 117 } 118 return EverMadeChange; 119} 120 121/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes, 122/// debug info directives, and an unconditional branch. Passes before isel 123/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for 124/// isel. Start by eliminating these blocks so we can split them the way we 125/// want them. 126bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) { 127 bool MadeChange = false; 128 // Note that this intentionally skips the entry block. 129 for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) { 130 BasicBlock *BB = I++; 131 132 // If this block doesn't end with an uncond branch, ignore it. 133 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); 134 if (!BI || !BI->isUnconditional()) 135 continue; 136 137 // If the instruction before the branch (skipping debug info) isn't a phi 138 // node, then other stuff is happening here. 139 BasicBlock::iterator BBI = BI; 140 if (BBI != BB->begin()) { 141 --BBI; 142 while (isa<DbgInfoIntrinsic>(BBI)) { 143 if (BBI == BB->begin()) 144 break; 145 --BBI; 146 } 147 if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI)) 148 continue; 149 } 150 151 // Do not break infinite loops. 152 BasicBlock *DestBB = BI->getSuccessor(0); 153 if (DestBB == BB) 154 continue; 155 156 if (!CanMergeBlocks(BB, DestBB)) 157 continue; 158 159 EliminateMostlyEmptyBlock(BB); 160 MadeChange = true; 161 } 162 return MadeChange; 163} 164 165/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a 166/// single uncond branch between them, and BB contains no other non-phi 167/// instructions. 168bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, 169 const BasicBlock *DestBB) const { 170 // We only want to eliminate blocks whose phi nodes are used by phi nodes in 171 // the successor. If there are more complex condition (e.g. preheaders), 172 // don't mess around with them. 173 BasicBlock::const_iterator BBI = BB->begin(); 174 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 175 for (Value::use_const_iterator UI = PN->use_begin(), E = PN->use_end(); 176 UI != E; ++UI) { 177 const Instruction *User = cast<Instruction>(*UI); 178 if (User->getParent() != DestBB || !isa<PHINode>(User)) 179 return false; 180 // If User is inside DestBB block and it is a PHINode then check 181 // incoming value. If incoming value is not from BB then this is 182 // a complex condition (e.g. preheaders) we want to avoid here. 183 if (User->getParent() == DestBB) { 184 if (const PHINode *UPN = dyn_cast<PHINode>(User)) 185 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { 186 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I)); 187 if (Insn && Insn->getParent() == BB && 188 Insn->getParent() != UPN->getIncomingBlock(I)) 189 return false; 190 } 191 } 192 } 193 } 194 195 // If BB and DestBB contain any common predecessors, then the phi nodes in BB 196 // and DestBB may have conflicting incoming values for the block. If so, we 197 // can't merge the block. 198 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin()); 199 if (!DestBBPN) return true; // no conflict. 200 201 // Collect the preds of BB. 202 SmallPtrSet<const BasicBlock*, 16> BBPreds; 203 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 204 // It is faster to get preds from a PHI than with pred_iterator. 205 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 206 BBPreds.insert(BBPN->getIncomingBlock(i)); 207 } else { 208 BBPreds.insert(pred_begin(BB), pred_end(BB)); 209 } 210 211 // Walk the preds of DestBB. 212 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { 213 BasicBlock *Pred = DestBBPN->getIncomingBlock(i); 214 if (BBPreds.count(Pred)) { // Common predecessor? 215 BBI = DestBB->begin(); 216 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 217 const Value *V1 = PN->getIncomingValueForBlock(Pred); 218 const Value *V2 = PN->getIncomingValueForBlock(BB); 219 220 // If V2 is a phi node in BB, look up what the mapped value will be. 221 if (const PHINode *V2PN = dyn_cast<PHINode>(V2)) 222 if (V2PN->getParent() == BB) 223 V2 = V2PN->getIncomingValueForBlock(Pred); 224 225 // If there is a conflict, bail out. 226 if (V1 != V2) return false; 227 } 228 } 229 } 230 231 return true; 232} 233 234 235/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and 236/// an unconditional branch in it. 237void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { 238 BranchInst *BI = cast<BranchInst>(BB->getTerminator()); 239 BasicBlock *DestBB = BI->getSuccessor(0); 240 241 DEBUG(errs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); 242 243 // If the destination block has a single pred, then this is a trivial edge, 244 // just collapse it. 245 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { 246 if (SinglePred != DestBB) { 247 // Remember if SinglePred was the entry block of the function. If so, we 248 // will need to move BB back to the entry position. 249 bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); 250 MergeBasicBlockIntoOnlyPred(DestBB, this); 251 252 if (isEntry && BB != &BB->getParent()->getEntryBlock()) 253 BB->moveBefore(&BB->getParent()->getEntryBlock()); 254 255 DEBUG(errs() << "AFTER:\n" << *DestBB << "\n\n\n"); 256 return; 257 } 258 } 259 260 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB 261 // to handle the new incoming edges it is about to have. 262 PHINode *PN; 263 for (BasicBlock::iterator BBI = DestBB->begin(); 264 (PN = dyn_cast<PHINode>(BBI)); ++BBI) { 265 // Remove the incoming value for BB, and remember it. 266 Value *InVal = PN->removeIncomingValue(BB, false); 267 268 // Two options: either the InVal is a phi node defined in BB or it is some 269 // value that dominates BB. 270 PHINode *InValPhi = dyn_cast<PHINode>(InVal); 271 if (InValPhi && InValPhi->getParent() == BB) { 272 // Add all of the input values of the input PHI as inputs of this phi. 273 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) 274 PN->addIncoming(InValPhi->getIncomingValue(i), 275 InValPhi->getIncomingBlock(i)); 276 } else { 277 // Otherwise, add one instance of the dominating value for each edge that 278 // we will be adding. 279 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 280 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 281 PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); 282 } else { 283 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) 284 PN->addIncoming(InVal, *PI); 285 } 286 } 287 } 288 289 // The PHIs are now updated, change everything that refers to BB to use 290 // DestBB and remove BB. 291 BB->replaceAllUsesWith(DestBB); 292 if (PI) { 293 PI->replaceAllUses(BB, DestBB); 294 PI->removeEdge(ProfileInfo::getEdge(BB, DestBB)); 295 } 296 BB->eraseFromParent(); 297 298 DEBUG(errs() << "AFTER:\n" << *DestBB << "\n\n\n"); 299} 300 301 302/// SplitEdgeNicely - Split the critical edge from TI to its specified 303/// successor if it will improve codegen. We only do this if the successor has 304/// phi nodes (otherwise critical edges are ok). If there is already another 305/// predecessor of the succ that is empty (and thus has no phi nodes), use it 306/// instead of introducing a new block. 307static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, 308 SmallSet<std::pair<const BasicBlock*, 309 const BasicBlock*>, 8> &BackEdges, 310 Pass *P) { 311 BasicBlock *TIBB = TI->getParent(); 312 BasicBlock *Dest = TI->getSuccessor(SuccNum); 313 assert(isa<PHINode>(Dest->begin()) && 314 "This should only be called if Dest has a PHI!"); 315 316 // Do not split edges to EH landing pads. 317 if (InvokeInst *Invoke = dyn_cast<InvokeInst>(TI)) { 318 if (Invoke->getSuccessor(1) == Dest) 319 return; 320 } 321 322 323 // As a hack, never split backedges of loops. Even though the copy for any 324 // PHIs inserted on the backedge would be dead for exits from the loop, we 325 // assume that the cost of *splitting* the backedge would be too high. 326 if (BackEdges.count(std::make_pair(TIBB, Dest))) 327 return; 328 329 if (!FactorCommonPreds) { 330 /// TIPHIValues - This array is lazily computed to determine the values of 331 /// PHIs in Dest that TI would provide. 332 SmallVector<Value*, 32> TIPHIValues; 333 334 // Check to see if Dest has any blocks that can be used as a split edge for 335 // this terminator. 336 for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) { 337 BasicBlock *Pred = *PI; 338 // To be usable, the pred has to end with an uncond branch to the dest. 339 BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator()); 340 if (!PredBr || !PredBr->isUnconditional()) 341 continue; 342 // Must be empty other than the branch and debug info. 343 BasicBlock::iterator I = Pred->begin(); 344 while (isa<DbgInfoIntrinsic>(I)) 345 I++; 346 if (dyn_cast<Instruction>(I) != PredBr) 347 continue; 348 // Cannot be the entry block; its label does not get emitted. 349 if (Pred == &(Dest->getParent()->getEntryBlock())) 350 continue; 351 352 // Finally, since we know that Dest has phi nodes in it, we have to make 353 // sure that jumping to Pred will have the same effect as going to Dest in 354 // terms of PHI values. 355 PHINode *PN; 356 unsigned PHINo = 0; 357 bool FoundMatch = true; 358 for (BasicBlock::iterator I = Dest->begin(); 359 (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) { 360 if (PHINo == TIPHIValues.size()) 361 TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB)); 362 363 // If the PHI entry doesn't work, we can't use this pred. 364 if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) { 365 FoundMatch = false; 366 break; 367 } 368 } 369 370 // If we found a workable predecessor, change TI to branch to Succ. 371 if (FoundMatch) { 372 ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>(); 373 if (PI) 374 PI->splitEdge(TIBB, Dest, Pred); 375 Dest->removePredecessor(TIBB); 376 TI->setSuccessor(SuccNum, Pred); 377 return; 378 } 379 } 380 381 SplitCriticalEdge(TI, SuccNum, P, true); 382 return; 383 } 384 385 PHINode *PN; 386 SmallVector<Value*, 8> TIPHIValues; 387 for (BasicBlock::iterator I = Dest->begin(); 388 (PN = dyn_cast<PHINode>(I)); ++I) 389 TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB)); 390 391 SmallVector<BasicBlock*, 8> IdenticalPreds; 392 for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) { 393 BasicBlock *Pred = *PI; 394 if (BackEdges.count(std::make_pair(Pred, Dest))) 395 continue; 396 if (PI == TIBB) 397 IdenticalPreds.push_back(Pred); 398 else { 399 bool Identical = true; 400 unsigned PHINo = 0; 401 for (BasicBlock::iterator I = Dest->begin(); 402 (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) 403 if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) { 404 Identical = false; 405 break; 406 } 407 if (Identical) 408 IdenticalPreds.push_back(Pred); 409 } 410 } 411 412 assert(!IdenticalPreds.empty()); 413 SplitBlockPredecessors(Dest, &IdenticalPreds[0], IdenticalPreds.size(), 414 ".critedge", P); 415} 416 417 418/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop 419/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), 420/// sink it into user blocks to reduce the number of virtual 421/// registers that must be created and coalesced. 422/// 423/// Return true if any changes are made. 424/// 425static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ 426 // If this is a noop copy, 427 EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); 428 EVT DstVT = TLI.getValueType(CI->getType()); 429 430 // This is an fp<->int conversion? 431 if (SrcVT.isInteger() != DstVT.isInteger()) 432 return false; 433 434 // If this is an extension, it will be a zero or sign extension, which 435 // isn't a noop. 436 if (SrcVT.bitsLT(DstVT)) return false; 437 438 // If these values will be promoted, find out what they will be promoted 439 // to. This helps us consider truncates on PPC as noop copies when they 440 // are. 441 if (TLI.getTypeAction(CI->getContext(), SrcVT) == TargetLowering::Promote) 442 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); 443 if (TLI.getTypeAction(CI->getContext(), DstVT) == TargetLowering::Promote) 444 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); 445 446 // If, after promotion, these are the same types, this is a noop copy. 447 if (SrcVT != DstVT) 448 return false; 449 450 BasicBlock *DefBB = CI->getParent(); 451 452 /// InsertedCasts - Only insert a cast in each block once. 453 DenseMap<BasicBlock*, CastInst*> InsertedCasts; 454 455 bool MadeChange = false; 456 for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); 457 UI != E; ) { 458 Use &TheUse = UI.getUse(); 459 Instruction *User = cast<Instruction>(*UI); 460 461 // Figure out which BB this cast is used in. For PHI's this is the 462 // appropriate predecessor block. 463 BasicBlock *UserBB = User->getParent(); 464 if (PHINode *PN = dyn_cast<PHINode>(User)) { 465 UserBB = PN->getIncomingBlock(UI); 466 } 467 468 // Preincrement use iterator so we don't invalidate it. 469 ++UI; 470 471 // If this user is in the same block as the cast, don't change the cast. 472 if (UserBB == DefBB) continue; 473 474 // If we have already inserted a cast into this block, use it. 475 CastInst *&InsertedCast = InsertedCasts[UserBB]; 476 477 if (!InsertedCast) { 478 BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); 479 480 InsertedCast = 481 CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", 482 InsertPt); 483 MadeChange = true; 484 } 485 486 // Replace a use of the cast with a use of the new cast. 487 TheUse = InsertedCast; 488 } 489 490 // If we removed all uses, nuke the cast. 491 if (CI->use_empty()) { 492 CI->eraseFromParent(); 493 MadeChange = true; 494 } 495 496 return MadeChange; 497} 498 499/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce 500/// the number of virtual registers that must be created and coalesced. This is 501/// a clear win except on targets with multiple condition code registers 502/// (PowerPC), where it might lose; some adjustment may be wanted there. 503/// 504/// Return true if any changes are made. 505static bool OptimizeCmpExpression(CmpInst *CI) { 506 BasicBlock *DefBB = CI->getParent(); 507 508 /// InsertedCmp - Only insert a cmp in each block once. 509 DenseMap<BasicBlock*, CmpInst*> InsertedCmps; 510 511 bool MadeChange = false; 512 for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); 513 UI != E; ) { 514 Use &TheUse = UI.getUse(); 515 Instruction *User = cast<Instruction>(*UI); 516 517 // Preincrement use iterator so we don't invalidate it. 518 ++UI; 519 520 // Don't bother for PHI nodes. 521 if (isa<PHINode>(User)) 522 continue; 523 524 // Figure out which BB this cmp is used in. 525 BasicBlock *UserBB = User->getParent(); 526 527 // If this user is in the same block as the cmp, don't change the cmp. 528 if (UserBB == DefBB) continue; 529 530 // If we have already inserted a cmp into this block, use it. 531 CmpInst *&InsertedCmp = InsertedCmps[UserBB]; 532 533 if (!InsertedCmp) { 534 BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); 535 536 InsertedCmp = 537 CmpInst::Create(CI->getOpcode(), 538 CI->getPredicate(), CI->getOperand(0), 539 CI->getOperand(1), "", InsertPt); 540 MadeChange = true; 541 } 542 543 // Replace a use of the cmp with a use of the new cmp. 544 TheUse = InsertedCmp; 545 } 546 547 // If we removed all uses, nuke the cmp. 548 if (CI->use_empty()) 549 CI->eraseFromParent(); 550 551 return MadeChange; 552} 553 554//===----------------------------------------------------------------------===// 555// Memory Optimization 556//===----------------------------------------------------------------------===// 557 558/// IsNonLocalValue - Return true if the specified values are defined in a 559/// different basic block than BB. 560static bool IsNonLocalValue(Value *V, BasicBlock *BB) { 561 if (Instruction *I = dyn_cast<Instruction>(V)) 562 return I->getParent() != BB; 563 return false; 564} 565 566/// OptimizeMemoryInst - Load and Store Instructions have often have 567/// addressing modes that can do significant amounts of computation. As such, 568/// instruction selection will try to get the load or store to do as much 569/// computation as possible for the program. The problem is that isel can only 570/// see within a single block. As such, we sink as much legal addressing mode 571/// stuff into the block as possible. 572/// 573/// This method is used to optimize both load/store and inline asms with memory 574/// operands. 575bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, 576 const Type *AccessTy, 577 DenseMap<Value*,Value*> &SunkAddrs) { 578 // Figure out what addressing mode will be built up for this operation. 579 SmallVector<Instruction*, 16> AddrModeInsts; 580 ExtAddrMode AddrMode = AddressingModeMatcher::Match(Addr, AccessTy,MemoryInst, 581 AddrModeInsts, *TLI); 582 583 // Check to see if any of the instructions supersumed by this addr mode are 584 // non-local to I's BB. 585 bool AnyNonLocal = false; 586 for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) { 587 if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) { 588 AnyNonLocal = true; 589 break; 590 } 591 } 592 593 // If all the instructions matched are already in this BB, don't do anything. 594 if (!AnyNonLocal) { 595 DEBUG(errs() << "CGP: Found local addrmode: " << AddrMode << "\n"); 596 return false; 597 } 598 599 // Insert this computation right after this user. Since our caller is 600 // scanning from the top of the BB to the bottom, reuse of the expr are 601 // guaranteed to happen later. 602 BasicBlock::iterator InsertPt = MemoryInst; 603 604 // Now that we determined the addressing expression we want to use and know 605 // that we have to sink it into this block. Check to see if we have already 606 // done this for some other load/store instr in this block. If so, reuse the 607 // computation. 608 Value *&SunkAddr = SunkAddrs[Addr]; 609 if (SunkAddr) { 610 DEBUG(errs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " 611 << *MemoryInst); 612 if (SunkAddr->getType() != Addr->getType()) 613 SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt); 614 } else { 615 DEBUG(errs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " 616 << *MemoryInst); 617 const Type *IntPtrTy = 618 TLI->getTargetData()->getIntPtrType(AccessTy->getContext()); 619 620 Value *Result = 0; 621 // Start with the scale value. 622 if (AddrMode.Scale) { 623 Value *V = AddrMode.ScaledReg; 624 if (V->getType() == IntPtrTy) { 625 // done. 626 } else if (isa<PointerType>(V->getType())) { 627 V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt); 628 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < 629 cast<IntegerType>(V->getType())->getBitWidth()) { 630 V = new TruncInst(V, IntPtrTy, "sunkaddr", InsertPt); 631 } else { 632 V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt); 633 } 634 if (AddrMode.Scale != 1) 635 V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy, 636 AddrMode.Scale), 637 "sunkaddr", InsertPt); 638 Result = V; 639 } 640 641 // Add in the base register. 642 if (AddrMode.BaseReg) { 643 Value *V = AddrMode.BaseReg; 644 if (isa<PointerType>(V->getType())) 645 V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt); 646 if (V->getType() != IntPtrTy) 647 V = CastInst::CreateIntegerCast(V, IntPtrTy, /*isSigned=*/true, 648 "sunkaddr", InsertPt); 649 if (Result) 650 Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); 651 else 652 Result = V; 653 } 654 655 // Add in the BaseGV if present. 656 if (AddrMode.BaseGV) { 657 Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, "sunkaddr", 658 InsertPt); 659 if (Result) 660 Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); 661 else 662 Result = V; 663 } 664 665 // Add in the Base Offset if present. 666 if (AddrMode.BaseOffs) { 667 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); 668 if (Result) 669 Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); 670 else 671 Result = V; 672 } 673 674 if (Result == 0) 675 SunkAddr = Constant::getNullValue(Addr->getType()); 676 else 677 SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt); 678 } 679 680 MemoryInst->replaceUsesOfWith(Addr, SunkAddr); 681 682 if (Addr->use_empty()) 683 RecursivelyDeleteTriviallyDeadInstructions(Addr); 684 return true; 685} 686 687/// OptimizeInlineAsmInst - If there are any memory operands, use 688/// OptimizeMemoryInst to sink their address computing into the block when 689/// possible / profitable. 690bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS, 691 DenseMap<Value*,Value*> &SunkAddrs) { 692 bool MadeChange = false; 693 InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); 694 695 // Do a prepass over the constraints, canonicalizing them, and building up the 696 // ConstraintOperands list. 697 std::vector<InlineAsm::ConstraintInfo> 698 ConstraintInfos = IA->ParseConstraints(); 699 700 /// ConstraintOperands - Information about all of the constraints. 701 std::vector<TargetLowering::AsmOperandInfo> ConstraintOperands; 702 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. 703 for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { 704 ConstraintOperands. 705 push_back(TargetLowering::AsmOperandInfo(ConstraintInfos[i])); 706 TargetLowering::AsmOperandInfo &OpInfo = ConstraintOperands.back(); 707 708 // Compute the value type for each operand. 709 switch (OpInfo.Type) { 710 case InlineAsm::isOutput: 711 if (OpInfo.isIndirect) 712 OpInfo.CallOperandVal = CS.getArgument(ArgNo++); 713 break; 714 case InlineAsm::isInput: 715 OpInfo.CallOperandVal = CS.getArgument(ArgNo++); 716 break; 717 case InlineAsm::isClobber: 718 // Nothing to do. 719 break; 720 } 721 722 // Compute the constraint code and ConstraintType to use. 723 TLI->ComputeConstraintToUse(OpInfo, SDValue(), 724 OpInfo.ConstraintType == TargetLowering::C_Memory); 725 726 if (OpInfo.ConstraintType == TargetLowering::C_Memory && 727 OpInfo.isIndirect) { 728 Value *OpVal = OpInfo.CallOperandVal; 729 MadeChange |= OptimizeMemoryInst(I, OpVal, OpVal->getType(), SunkAddrs); 730 } 731 } 732 733 return MadeChange; 734} 735 736/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same 737/// basic block as the load, unless conditions are unfavorable. This allows 738/// SelectionDAG to fold the extend into the load. 739/// 740bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) { 741 // Look for a load being extended. 742 LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0)); 743 if (!LI) return false; 744 745 // If they're already in the same block, there's nothing to do. 746 if (LI->getParent() == I->getParent()) 747 return false; 748 749 // If the load has other users and the truncate is not free, this probably 750 // isn't worthwhile. 751 if (!LI->hasOneUse() && 752 TLI && !TLI->isTruncateFree(I->getType(), LI->getType())) 753 return false; 754 755 // Check whether the target supports casts folded into loads. 756 unsigned LType; 757 if (isa<ZExtInst>(I)) 758 LType = ISD::ZEXTLOAD; 759 else { 760 assert(isa<SExtInst>(I) && "Unexpected ext type!"); 761 LType = ISD::SEXTLOAD; 762 } 763 if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType()))) 764 return false; 765 766 // Move the extend into the same block as the load, so that SelectionDAG 767 // can fold it. 768 I->removeFromParent(); 769 I->insertAfter(LI); 770 return true; 771} 772 773bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { 774 BasicBlock *DefBB = I->getParent(); 775 776 // If both result of the {s|z}xt and its source are live out, rewrite all 777 // other uses of the source with result of extension. 778 Value *Src = I->getOperand(0); 779 if (Src->hasOneUse()) 780 return false; 781 782 // Only do this xform if truncating is free. 783 if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType())) 784 return false; 785 786 // Only safe to perform the optimization if the source is also defined in 787 // this block. 788 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent()) 789 return false; 790 791 bool DefIsLiveOut = false; 792 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); 793 UI != E; ++UI) { 794 Instruction *User = cast<Instruction>(*UI); 795 796 // Figure out which BB this ext is used in. 797 BasicBlock *UserBB = User->getParent(); 798 if (UserBB == DefBB) continue; 799 DefIsLiveOut = true; 800 break; 801 } 802 if (!DefIsLiveOut) 803 return false; 804 805 // Make sure non of the uses are PHI nodes. 806 for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); 807 UI != E; ++UI) { 808 Instruction *User = cast<Instruction>(*UI); 809 BasicBlock *UserBB = User->getParent(); 810 if (UserBB == DefBB) continue; 811 // Be conservative. We don't want this xform to end up introducing 812 // reloads just before load / store instructions. 813 if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User)) 814 return false; 815 } 816 817 // InsertedTruncs - Only insert one trunc in each block once. 818 DenseMap<BasicBlock*, Instruction*> InsertedTruncs; 819 820 bool MadeChange = false; 821 for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); 822 UI != E; ++UI) { 823 Use &TheUse = UI.getUse(); 824 Instruction *User = cast<Instruction>(*UI); 825 826 // Figure out which BB this ext is used in. 827 BasicBlock *UserBB = User->getParent(); 828 if (UserBB == DefBB) continue; 829 830 // Both src and def are live in this block. Rewrite the use. 831 Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; 832 833 if (!InsertedTrunc) { 834 BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); 835 836 InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt); 837 } 838 839 // Replace a use of the {s|z}ext source with a use of the result. 840 TheUse = InsertedTrunc; 841 842 MadeChange = true; 843 } 844 845 return MadeChange; 846} 847 848// In this pass we look for GEP and cast instructions that are used 849// across basic blocks and rewrite them to improve basic-block-at-a-time 850// selection. 851bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { 852 bool MadeChange = false; 853 854 // Split all critical edges where the dest block has a PHI. 855 TerminatorInst *BBTI = BB.getTerminator(); 856 if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) { 857 for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) { 858 BasicBlock *SuccBB = BBTI->getSuccessor(i); 859 if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true)) 860 SplitEdgeNicely(BBTI, i, BackEdges, this); 861 } 862 } 863 864 // Keep track of non-local addresses that have been sunk into this block. 865 // This allows us to avoid inserting duplicate code for blocks with multiple 866 // load/stores of the same address. 867 DenseMap<Value*, Value*> SunkAddrs; 868 869 for (BasicBlock::iterator BBI = BB.begin(), E = BB.end(); BBI != E; ) { 870 Instruction *I = BBI++; 871 872 if (CastInst *CI = dyn_cast<CastInst>(I)) { 873 // If the source of the cast is a constant, then this should have 874 // already been constant folded. The only reason NOT to constant fold 875 // it is if something (e.g. LSR) was careful to place the constant 876 // evaluation in a block other than then one that uses it (e.g. to hoist 877 // the address of globals out of a loop). If this is the case, we don't 878 // want to forward-subst the cast. 879 if (isa<Constant>(CI->getOperand(0))) 880 continue; 881 882 bool Change = false; 883 if (TLI) { 884 Change = OptimizeNoopCopyExpression(CI, *TLI); 885 MadeChange |= Change; 886 } 887 888 if (!Change && (isa<ZExtInst>(I) || isa<SExtInst>(I))) { 889 MadeChange |= MoveExtToFormExtLoad(I); 890 MadeChange |= OptimizeExtUses(I); 891 } 892 } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) { 893 MadeChange |= OptimizeCmpExpression(CI); 894 } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 895 if (TLI) 896 MadeChange |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType(), 897 SunkAddrs); 898 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 899 if (TLI) 900 MadeChange |= OptimizeMemoryInst(I, SI->getOperand(1), 901 SI->getOperand(0)->getType(), 902 SunkAddrs); 903 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { 904 if (GEPI->hasAllZeroIndices()) { 905 /// The GEP operand must be a pointer, so must its result -> BitCast 906 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), 907 GEPI->getName(), GEPI); 908 GEPI->replaceAllUsesWith(NC); 909 GEPI->eraseFromParent(); 910 MadeChange = true; 911 BBI = NC; 912 } 913 } else if (CallInst *CI = dyn_cast<CallInst>(I)) { 914 // If we found an inline asm expession, and if the target knows how to 915 // lower it to normal LLVM code, do so now. 916 if (TLI && isa<InlineAsm>(CI->getCalledValue())) { 917 if (TLI->ExpandInlineAsm(CI)) { 918 BBI = BB.begin(); 919 // Avoid processing instructions out of order, which could cause 920 // reuse before a value is defined. 921 SunkAddrs.clear(); 922 } else 923 // Sink address computing for memory operands into the block. 924 MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs); 925 } 926 } 927 } 928 929 return MadeChange; 930} 931