CodeGenPrepare.cpp revision ceb4d1aecb9deffe59b3dcdc9a783ffde8477be9
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass munges the code in the input function to better prepare it for 11// SelectionDAG-based code generation. This works around limitations in it's 12// basic-block-at-a-time approach. It should eventually be removed. 13// 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "codegenprepare" 17#include "llvm/Transforms/Scalar.h" 18#include "llvm/Constants.h" 19#include "llvm/DerivedTypes.h" 20#include "llvm/Function.h" 21#include "llvm/InlineAsm.h" 22#include "llvm/Instructions.h" 23#include "llvm/Pass.h" 24#include "llvm/Target/TargetAsmInfo.h" 25#include "llvm/Target/TargetData.h" 26#include "llvm/Target/TargetLowering.h" 27#include "llvm/Target/TargetMachine.h" 28#include "llvm/Transforms/Utils/BasicBlockUtils.h" 29#include "llvm/Transforms/Utils/Local.h" 30#include "llvm/ADT/DenseMap.h" 31#include "llvm/ADT/SmallSet.h" 32#include "llvm/Support/CallSite.h" 33#include "llvm/Support/CommandLine.h" 34#include "llvm/Support/Compiler.h" 35#include "llvm/Support/Debug.h" 36#include "llvm/Support/GetElementPtrTypeIterator.h" 37#include "llvm/Support/PatternMatch.h" 38using namespace llvm; 39using namespace llvm::PatternMatch; 40 41static cl::opt<bool> FactorCommonPreds("split-critical-paths-tweak", 42 cl::init(false), cl::Hidden); 43 44namespace { 45 class VISIBILITY_HIDDEN CodeGenPrepare : public FunctionPass { 46 /// TLI - Keep a pointer of a TargetLowering to consult for determining 47 /// transformation profitability. 48 const TargetLowering *TLI; 49 50 /// BackEdges - Keep a set of all the loop back edges. 51 /// 52 SmallSet<std::pair<BasicBlock*,BasicBlock*>, 8> BackEdges; 53 public: 54 static char ID; // Pass identification, replacement for typeid 55 explicit CodeGenPrepare(const TargetLowering *tli = 0) 56 : FunctionPass(&ID), TLI(tli) {} 57 bool runOnFunction(Function &F); 58 59 private: 60 bool EliminateMostlyEmptyBlocks(Function &F); 61 bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; 62 void EliminateMostlyEmptyBlock(BasicBlock *BB); 63 bool OptimizeBlock(BasicBlock &BB); 64 bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy, 65 DenseMap<Value*,Value*> &SunkAddrs); 66 bool OptimizeInlineAsmInst(Instruction *I, CallSite CS, 67 DenseMap<Value*,Value*> &SunkAddrs); 68 bool OptimizeExtUses(Instruction *I); 69 void findLoopBackEdges(Function &F); 70 }; 71} 72 73char CodeGenPrepare::ID = 0; 74static RegisterPass<CodeGenPrepare> X("codegenprepare", 75 "Optimize for code generation"); 76 77FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) { 78 return new CodeGenPrepare(TLI); 79} 80 81/// findLoopBackEdges - Do a DFS walk to find loop back edges. 82/// 83void CodeGenPrepare::findLoopBackEdges(Function &F) { 84 SmallPtrSet<BasicBlock*, 8> Visited; 85 SmallVector<std::pair<BasicBlock*, succ_iterator>, 8> VisitStack; 86 SmallPtrSet<BasicBlock*, 8> InStack; 87 88 BasicBlock *BB = &F.getEntryBlock(); 89 if (succ_begin(BB) == succ_end(BB)) 90 return; 91 Visited.insert(BB); 92 VisitStack.push_back(std::make_pair(BB, succ_begin(BB))); 93 InStack.insert(BB); 94 do { 95 std::pair<BasicBlock*, succ_iterator> &Top = VisitStack.back(); 96 BasicBlock *ParentBB = Top.first; 97 succ_iterator &I = Top.second; 98 99 bool FoundNew = false; 100 while (I != succ_end(ParentBB)) { 101 BB = *I++; 102 if (Visited.insert(BB)) { 103 FoundNew = true; 104 break; 105 } 106 // Successor is in VisitStack, it's a back edge. 107 if (InStack.count(BB)) 108 BackEdges.insert(std::make_pair(ParentBB, BB)); 109 } 110 111 if (FoundNew) { 112 // Go down one level if there is a unvisited successor. 113 InStack.insert(BB); 114 VisitStack.push_back(std::make_pair(BB, succ_begin(BB))); 115 } else { 116 // Go up one level. 117 std::pair<BasicBlock*, succ_iterator> &Pop = VisitStack.back(); 118 InStack.erase(Pop.first); 119 VisitStack.pop_back(); 120 } 121 } while (!VisitStack.empty()); 122} 123 124 125bool CodeGenPrepare::runOnFunction(Function &F) { 126 bool EverMadeChange = false; 127 128 // First pass, eliminate blocks that contain only PHI nodes and an 129 // unconditional branch. 130 EverMadeChange |= EliminateMostlyEmptyBlocks(F); 131 132 // Now find loop back edges. 133 findLoopBackEdges(F); 134 135 bool MadeChange = true; 136 while (MadeChange) { 137 MadeChange = false; 138 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) 139 MadeChange |= OptimizeBlock(*BB); 140 EverMadeChange |= MadeChange; 141 } 142 return EverMadeChange; 143} 144 145/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes 146/// and an unconditional branch. Passes before isel (e.g. LSR/loopsimplify) 147/// often split edges in ways that are non-optimal for isel. Start by 148/// eliminating these blocks so we can split them the way we want them. 149bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) { 150 bool MadeChange = false; 151 // Note that this intentionally skips the entry block. 152 for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) { 153 BasicBlock *BB = I++; 154 155 // If this block doesn't end with an uncond branch, ignore it. 156 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); 157 if (!BI || !BI->isUnconditional()) 158 continue; 159 160 // If the instruction before the branch isn't a phi node, then other stuff 161 // is happening here. 162 BasicBlock::iterator BBI = BI; 163 if (BBI != BB->begin()) { 164 --BBI; 165 if (!isa<PHINode>(BBI)) continue; 166 } 167 168 // Do not break infinite loops. 169 BasicBlock *DestBB = BI->getSuccessor(0); 170 if (DestBB == BB) 171 continue; 172 173 if (!CanMergeBlocks(BB, DestBB)) 174 continue; 175 176 EliminateMostlyEmptyBlock(BB); 177 MadeChange = true; 178 } 179 return MadeChange; 180} 181 182/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a 183/// single uncond branch between them, and BB contains no other non-phi 184/// instructions. 185bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, 186 const BasicBlock *DestBB) const { 187 // We only want to eliminate blocks whose phi nodes are used by phi nodes in 188 // the successor. If there are more complex condition (e.g. preheaders), 189 // don't mess around with them. 190 BasicBlock::const_iterator BBI = BB->begin(); 191 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 192 for (Value::use_const_iterator UI = PN->use_begin(), E = PN->use_end(); 193 UI != E; ++UI) { 194 const Instruction *User = cast<Instruction>(*UI); 195 if (User->getParent() != DestBB || !isa<PHINode>(User)) 196 return false; 197 // If User is inside DestBB block and it is a PHINode then check 198 // incoming value. If incoming value is not from BB then this is 199 // a complex condition (e.g. preheaders) we want to avoid here. 200 if (User->getParent() == DestBB) { 201 if (const PHINode *UPN = dyn_cast<PHINode>(User)) 202 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { 203 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I)); 204 if (Insn && Insn->getParent() == BB && 205 Insn->getParent() != UPN->getIncomingBlock(I)) 206 return false; 207 } 208 } 209 } 210 } 211 212 // If BB and DestBB contain any common predecessors, then the phi nodes in BB 213 // and DestBB may have conflicting incoming values for the block. If so, we 214 // can't merge the block. 215 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin()); 216 if (!DestBBPN) return true; // no conflict. 217 218 // Collect the preds of BB. 219 SmallPtrSet<const BasicBlock*, 16> BBPreds; 220 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 221 // It is faster to get preds from a PHI than with pred_iterator. 222 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 223 BBPreds.insert(BBPN->getIncomingBlock(i)); 224 } else { 225 BBPreds.insert(pred_begin(BB), pred_end(BB)); 226 } 227 228 // Walk the preds of DestBB. 229 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { 230 BasicBlock *Pred = DestBBPN->getIncomingBlock(i); 231 if (BBPreds.count(Pred)) { // Common predecessor? 232 BBI = DestBB->begin(); 233 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 234 const Value *V1 = PN->getIncomingValueForBlock(Pred); 235 const Value *V2 = PN->getIncomingValueForBlock(BB); 236 237 // If V2 is a phi node in BB, look up what the mapped value will be. 238 if (const PHINode *V2PN = dyn_cast<PHINode>(V2)) 239 if (V2PN->getParent() == BB) 240 V2 = V2PN->getIncomingValueForBlock(Pred); 241 242 // If there is a conflict, bail out. 243 if (V1 != V2) return false; 244 } 245 } 246 } 247 248 return true; 249} 250 251 252/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and 253/// an unconditional branch in it. 254void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { 255 BranchInst *BI = cast<BranchInst>(BB->getTerminator()); 256 BasicBlock *DestBB = BI->getSuccessor(0); 257 258 DOUT << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB; 259 260 // If the destination block has a single pred, then this is a trivial edge, 261 // just collapse it. 262 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { 263 if (SinglePred != DestBB) { 264 // Remember if SinglePred was the entry block of the function. If so, we 265 // will need to move BB back to the entry position. 266 bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); 267 MergeBasicBlockIntoOnlyPred(DestBB); 268 269 if (isEntry && BB != &BB->getParent()->getEntryBlock()) 270 BB->moveBefore(&BB->getParent()->getEntryBlock()); 271 272 DOUT << "AFTER:\n" << *DestBB << "\n\n\n"; 273 return; 274 } 275 } 276 277 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB 278 // to handle the new incoming edges it is about to have. 279 PHINode *PN; 280 for (BasicBlock::iterator BBI = DestBB->begin(); 281 (PN = dyn_cast<PHINode>(BBI)); ++BBI) { 282 // Remove the incoming value for BB, and remember it. 283 Value *InVal = PN->removeIncomingValue(BB, false); 284 285 // Two options: either the InVal is a phi node defined in BB or it is some 286 // value that dominates BB. 287 PHINode *InValPhi = dyn_cast<PHINode>(InVal); 288 if (InValPhi && InValPhi->getParent() == BB) { 289 // Add all of the input values of the input PHI as inputs of this phi. 290 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) 291 PN->addIncoming(InValPhi->getIncomingValue(i), 292 InValPhi->getIncomingBlock(i)); 293 } else { 294 // Otherwise, add one instance of the dominating value for each edge that 295 // we will be adding. 296 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 297 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 298 PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); 299 } else { 300 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) 301 PN->addIncoming(InVal, *PI); 302 } 303 } 304 } 305 306 // The PHIs are now updated, change everything that refers to BB to use 307 // DestBB and remove BB. 308 BB->replaceAllUsesWith(DestBB); 309 BB->eraseFromParent(); 310 311 DOUT << "AFTER:\n" << *DestBB << "\n\n\n"; 312} 313 314 315/// SplitEdgeNicely - Split the critical edge from TI to its specified 316/// successor if it will improve codegen. We only do this if the successor has 317/// phi nodes (otherwise critical edges are ok). If there is already another 318/// predecessor of the succ that is empty (and thus has no phi nodes), use it 319/// instead of introducing a new block. 320static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, 321 SmallSet<std::pair<BasicBlock*,BasicBlock*>, 8> &BackEdges, 322 Pass *P) { 323 BasicBlock *TIBB = TI->getParent(); 324 BasicBlock *Dest = TI->getSuccessor(SuccNum); 325 assert(isa<PHINode>(Dest->begin()) && 326 "This should only be called if Dest has a PHI!"); 327 328 // As a hack, never split backedges of loops. Even though the copy for any 329 // PHIs inserted on the backedge would be dead for exits from the loop, we 330 // assume that the cost of *splitting* the backedge would be too high. 331 if (BackEdges.count(std::make_pair(TIBB, Dest))) 332 return; 333 334 if (!FactorCommonPreds) { 335 /// TIPHIValues - This array is lazily computed to determine the values of 336 /// PHIs in Dest that TI would provide. 337 SmallVector<Value*, 32> TIPHIValues; 338 339 // Check to see if Dest has any blocks that can be used as a split edge for 340 // this terminator. 341 for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) { 342 BasicBlock *Pred = *PI; 343 // To be usable, the pred has to end with an uncond branch to the dest. 344 BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator()); 345 if (!PredBr || !PredBr->isUnconditional() || 346 // Must be empty other than the branch. 347 &Pred->front() != PredBr || 348 // Cannot be the entry block; its label does not get emitted. 349 Pred == &(Dest->getParent()->getEntryBlock())) 350 continue; 351 352 // Finally, since we know that Dest has phi nodes in it, we have to make 353 // sure that jumping to Pred will have the same affect as going to Dest in 354 // terms of PHI values. 355 PHINode *PN; 356 unsigned PHINo = 0; 357 bool FoundMatch = true; 358 for (BasicBlock::iterator I = Dest->begin(); 359 (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) { 360 if (PHINo == TIPHIValues.size()) 361 TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB)); 362 363 // If the PHI entry doesn't work, we can't use this pred. 364 if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) { 365 FoundMatch = false; 366 break; 367 } 368 } 369 370 // If we found a workable predecessor, change TI to branch to Succ. 371 if (FoundMatch) { 372 Dest->removePredecessor(TIBB); 373 TI->setSuccessor(SuccNum, Pred); 374 return; 375 } 376 } 377 378 SplitCriticalEdge(TI, SuccNum, P, true); 379 return; 380 } 381 382 PHINode *PN; 383 SmallVector<Value*, 8> TIPHIValues; 384 for (BasicBlock::iterator I = Dest->begin(); 385 (PN = dyn_cast<PHINode>(I)); ++I) 386 TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB)); 387 388 SmallVector<BasicBlock*, 8> IdenticalPreds; 389 for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) { 390 BasicBlock *Pred = *PI; 391 if (BackEdges.count(std::make_pair(Pred, Dest))) 392 continue; 393 if (PI == TIBB) 394 IdenticalPreds.push_back(Pred); 395 else { 396 bool Identical = true; 397 unsigned PHINo = 0; 398 for (BasicBlock::iterator I = Dest->begin(); 399 (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) 400 if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) { 401 Identical = false; 402 break; 403 } 404 if (Identical) 405 IdenticalPreds.push_back(Pred); 406 } 407 } 408 409 assert(!IdenticalPreds.empty()); 410 SplitBlockPredecessors(Dest, &IdenticalPreds[0], IdenticalPreds.size(), 411 ".critedge", P); 412} 413 414 415/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop 416/// copy (e.g. it's casting from one pointer type to another, int->uint, or 417/// int->sbyte on PPC), sink it into user blocks to reduce the number of virtual 418/// registers that must be created and coalesced. 419/// 420/// Return true if any changes are made. 421/// 422static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ 423 // If this is a noop copy, 424 MVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); 425 MVT DstVT = TLI.getValueType(CI->getType()); 426 427 // This is an fp<->int conversion? 428 if (SrcVT.isInteger() != DstVT.isInteger()) 429 return false; 430 431 // If this is an extension, it will be a zero or sign extension, which 432 // isn't a noop. 433 if (SrcVT.bitsLT(DstVT)) return false; 434 435 // If these values will be promoted, find out what they will be promoted 436 // to. This helps us consider truncates on PPC as noop copies when they 437 // are. 438 if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote) 439 SrcVT = TLI.getTypeToTransformTo(SrcVT); 440 if (TLI.getTypeAction(DstVT) == TargetLowering::Promote) 441 DstVT = TLI.getTypeToTransformTo(DstVT); 442 443 // If, after promotion, these are the same types, this is a noop copy. 444 if (SrcVT != DstVT) 445 return false; 446 447 BasicBlock *DefBB = CI->getParent(); 448 449 /// InsertedCasts - Only insert a cast in each block once. 450 DenseMap<BasicBlock*, CastInst*> InsertedCasts; 451 452 bool MadeChange = false; 453 for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); 454 UI != E; ) { 455 Use &TheUse = UI.getUse(); 456 Instruction *User = cast<Instruction>(*UI); 457 458 // Figure out which BB this cast is used in. For PHI's this is the 459 // appropriate predecessor block. 460 BasicBlock *UserBB = User->getParent(); 461 if (PHINode *PN = dyn_cast<PHINode>(User)) { 462 unsigned OpVal = UI.getOperandNo()/2; 463 UserBB = PN->getIncomingBlock(OpVal); 464 } 465 466 // Preincrement use iterator so we don't invalidate it. 467 ++UI; 468 469 // If this user is in the same block as the cast, don't change the cast. 470 if (UserBB == DefBB) continue; 471 472 // If we have already inserted a cast into this block, use it. 473 CastInst *&InsertedCast = InsertedCasts[UserBB]; 474 475 if (!InsertedCast) { 476 BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); 477 478 InsertedCast = 479 CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", 480 InsertPt); 481 MadeChange = true; 482 } 483 484 // Replace a use of the cast with a use of the new cast. 485 TheUse = InsertedCast; 486 } 487 488 // If we removed all uses, nuke the cast. 489 if (CI->use_empty()) { 490 CI->eraseFromParent(); 491 MadeChange = true; 492 } 493 494 return MadeChange; 495} 496 497/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce 498/// the number of virtual registers that must be created and coalesced. This is 499/// a clear win except on targets with multiple condition code registers 500/// (PowerPC), where it might lose; some adjustment may be wanted there. 501/// 502/// Return true if any changes are made. 503static bool OptimizeCmpExpression(CmpInst *CI) { 504 BasicBlock *DefBB = CI->getParent(); 505 506 /// InsertedCmp - Only insert a cmp in each block once. 507 DenseMap<BasicBlock*, CmpInst*> InsertedCmps; 508 509 bool MadeChange = false; 510 for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); 511 UI != E; ) { 512 Use &TheUse = UI.getUse(); 513 Instruction *User = cast<Instruction>(*UI); 514 515 // Preincrement use iterator so we don't invalidate it. 516 ++UI; 517 518 // Don't bother for PHI nodes. 519 if (isa<PHINode>(User)) 520 continue; 521 522 // Figure out which BB this cmp is used in. 523 BasicBlock *UserBB = User->getParent(); 524 525 // If this user is in the same block as the cmp, don't change the cmp. 526 if (UserBB == DefBB) continue; 527 528 // If we have already inserted a cmp into this block, use it. 529 CmpInst *&InsertedCmp = InsertedCmps[UserBB]; 530 531 if (!InsertedCmp) { 532 BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); 533 534 InsertedCmp = 535 CmpInst::Create(CI->getOpcode(), CI->getPredicate(), CI->getOperand(0), 536 CI->getOperand(1), "", InsertPt); 537 MadeChange = true; 538 } 539 540 // Replace a use of the cmp with a use of the new cmp. 541 TheUse = InsertedCmp; 542 } 543 544 // If we removed all uses, nuke the cmp. 545 if (CI->use_empty()) 546 CI->eraseFromParent(); 547 548 return MadeChange; 549} 550 551//===----------------------------------------------------------------------===// 552// Addressing Mode Analysis and Optimization 553//===----------------------------------------------------------------------===// 554 555namespace { 556 /// ExtAddrMode - This is an extended version of TargetLowering::AddrMode 557 /// which holds actual Value*'s for register values. 558 struct ExtAddrMode : public TargetLowering::AddrMode { 559 Value *BaseReg; 560 Value *ScaledReg; 561 ExtAddrMode() : BaseReg(0), ScaledReg(0) {} 562 void print(OStream &OS) const; 563 void dump() const { 564 print(cerr); 565 cerr << '\n'; 566 } 567 }; 568} // end anonymous namespace 569 570static inline OStream &operator<<(OStream &OS, const ExtAddrMode &AM) { 571 AM.print(OS); 572 return OS; 573} 574 575void ExtAddrMode::print(OStream &OS) const { 576 bool NeedPlus = false; 577 OS << "["; 578 if (BaseGV) 579 OS << (NeedPlus ? " + " : "") 580 << "GV:%" << BaseGV->getName(), NeedPlus = true; 581 582 if (BaseOffs) 583 OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true; 584 585 if (BaseReg) 586 OS << (NeedPlus ? " + " : "") 587 << "Base:%" << BaseReg->getName(), NeedPlus = true; 588 if (Scale) 589 OS << (NeedPlus ? " + " : "") 590 << Scale << "*%" << ScaledReg->getName(), NeedPlus = true; 591 592 OS << ']'; 593} 594 595namespace { 596/// AddressingModeMatcher - This class exposes a single public method, which is 597/// used to construct a "maximal munch" of the addressing mode for the target 598/// specified by TLI for an access to "V" with an access type of AccessTy. This 599/// returns the addressing mode that is actually matched by value, but also 600/// returns the list of instructions involved in that addressing computation in 601/// AddrModeInsts. 602class AddressingModeMatcher { 603 SmallVectorImpl<Instruction*> &AddrModeInsts; 604 const TargetLowering &TLI; 605 606 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and 607 /// the memory instruction that we're computing this address for. 608 const Type *AccessTy; 609 Instruction *MemoryInst; 610 611 /// AddrMode - This is the addressing mode that we're building up. This is 612 /// part of the return value of this addressing mode matching stuff. 613 ExtAddrMode &AddrMode; 614 615 /// IgnoreProfitability - This is set to true when we should not do 616 /// profitability checks. When true, IsProfitableToFoldIntoAddressingMode 617 /// always returns true. 618 bool IgnoreProfitability; 619 620 AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI, 621 const TargetLowering &T, const Type *AT, 622 Instruction *MI, ExtAddrMode &AM) 623 : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM) { 624 IgnoreProfitability = false; 625 } 626public: 627 628 /// Match - Find the maximal addressing mode that a load/store of V can fold, 629 /// give an access type of AccessTy. This returns a list of involved 630 /// instructions in AddrModeInsts. 631 static ExtAddrMode Match(Value *V, const Type *AccessTy, 632 Instruction *MemoryInst, 633 SmallVectorImpl<Instruction*> &AddrModeInsts, 634 const TargetLowering &TLI) { 635 ExtAddrMode Result; 636 637 bool Success = 638 AddressingModeMatcher(AddrModeInsts, TLI, AccessTy, 639 MemoryInst, Result).MatchAddr(V, 0); 640 Success = Success; assert(Success && "Couldn't select *anything*?"); 641 return Result; 642 } 643private: 644 bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth); 645 bool MatchAddr(Value *V, unsigned Depth); 646 bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth); 647 bool IsProfitableToFoldIntoAddressingMode(Instruction *I, 648 ExtAddrMode &AMBefore, 649 ExtAddrMode &AMAfter); 650 bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2); 651}; 652} // end anonymous namespace 653 654/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode. 655/// Return true and update AddrMode if this addr mode is legal for the target, 656/// false if not. 657bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, 658 unsigned Depth) { 659 // If Scale is 1, then this is the same as adding ScaleReg to the addressing 660 // mode. Just process that directly. 661 if (Scale == 1) 662 return MatchAddr(ScaleReg, Depth); 663 664 // If the scale is 0, it takes nothing to add this. 665 if (Scale == 0) 666 return true; 667 668 // If we already have a scale of this value, we can add to it, otherwise, we 669 // need an available scale field. 670 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg) 671 return false; 672 673 ExtAddrMode TestAddrMode = AddrMode; 674 675 // Add scale to turn X*4+X*3 -> X*7. This could also do things like 676 // [A+B + A*7] -> [B+A*8]. 677 TestAddrMode.Scale += Scale; 678 TestAddrMode.ScaledReg = ScaleReg; 679 680 // If the new address isn't legal, bail out. 681 if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) 682 return false; 683 684 // It was legal, so commit it. 685 AddrMode = TestAddrMode; 686 687 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now 688 // to see if ScaleReg is actually X+C. If so, we can turn this into adding 689 // X*Scale + C*Scale to addr mode. 690 ConstantInt *CI; Value *AddLHS; 691 if (match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) { 692 TestAddrMode.ScaledReg = AddLHS; 693 TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale; 694 695 // If this addressing mode is legal, commit it and remember that we folded 696 // this instruction. 697 if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) { 698 AddrModeInsts.push_back(cast<Instruction>(ScaleReg)); 699 AddrMode = TestAddrMode; 700 return true; 701 } 702 } 703 704 // Otherwise, not (x+c)*scale, just return what we have. 705 return true; 706} 707 708/// MightBeFoldableInst - This is a little filter, which returns true if an 709/// addressing computation involving I might be folded into a load/store 710/// accessing it. This doesn't need to be perfect, but needs to accept at least 711/// the set of instructions that MatchOperationAddr can. 712static bool MightBeFoldableInst(Instruction *I) { 713 switch (I->getOpcode()) { 714 case Instruction::BitCast: 715 // Don't touch identity bitcasts. 716 if (I->getType() == I->getOperand(0)->getType()) 717 return false; 718 return isa<PointerType>(I->getType()) || isa<IntegerType>(I->getType()); 719 case Instruction::PtrToInt: 720 // PtrToInt is always a noop, as we know that the int type is pointer sized. 721 return true; 722 case Instruction::IntToPtr: 723 // We know the input is intptr_t, so this is foldable. 724 return true; 725 case Instruction::Add: 726 return true; 727 case Instruction::Mul: 728 case Instruction::Shl: 729 // Can only handle X*C and X << C. 730 return isa<ConstantInt>(I->getOperand(1)); 731 case Instruction::GetElementPtr: 732 return true; 733 default: 734 return false; 735 } 736} 737 738 739/// MatchOperationAddr - Given an instruction or constant expr, see if we can 740/// fold the operation into the addressing mode. If so, update the addressing 741/// mode and return true, otherwise return false without modifying AddrMode. 742bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, 743 unsigned Depth) { 744 // Avoid exponential behavior on extremely deep expression trees. 745 if (Depth >= 5) return false; 746 747 switch (Opcode) { 748 case Instruction::PtrToInt: 749 // PtrToInt is always a noop, as we know that the int type is pointer sized. 750 return MatchAddr(AddrInst->getOperand(0), Depth); 751 case Instruction::IntToPtr: 752 // This inttoptr is a no-op if the integer type is pointer sized. 753 if (TLI.getValueType(AddrInst->getOperand(0)->getType()) == 754 TLI.getPointerTy()) 755 return MatchAddr(AddrInst->getOperand(0), Depth); 756 return false; 757 case Instruction::BitCast: 758 // BitCast is always a noop, and we can handle it as long as it is 759 // int->int or pointer->pointer (we don't want int<->fp or something). 760 if ((isa<PointerType>(AddrInst->getOperand(0)->getType()) || 761 isa<IntegerType>(AddrInst->getOperand(0)->getType())) && 762 // Don't touch identity bitcasts. These were probably put here by LSR, 763 // and we don't want to mess around with them. Assume it knows what it 764 // is doing. 765 AddrInst->getOperand(0)->getType() != AddrInst->getType()) 766 return MatchAddr(AddrInst->getOperand(0), Depth); 767 return false; 768 case Instruction::Add: { 769 // Check to see if we can merge in the RHS then the LHS. If so, we win. 770 ExtAddrMode BackupAddrMode = AddrMode; 771 unsigned OldSize = AddrModeInsts.size(); 772 if (MatchAddr(AddrInst->getOperand(1), Depth+1) && 773 MatchAddr(AddrInst->getOperand(0), Depth+1)) 774 return true; 775 776 // Restore the old addr mode info. 777 AddrMode = BackupAddrMode; 778 AddrModeInsts.resize(OldSize); 779 780 // Otherwise this was over-aggressive. Try merging in the LHS then the RHS. 781 if (MatchAddr(AddrInst->getOperand(0), Depth+1) && 782 MatchAddr(AddrInst->getOperand(1), Depth+1)) 783 return true; 784 785 // Otherwise we definitely can't merge the ADD in. 786 AddrMode = BackupAddrMode; 787 AddrModeInsts.resize(OldSize); 788 break; 789 } 790 //case Instruction::Or: 791 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD. 792 //break; 793 case Instruction::Mul: 794 case Instruction::Shl: { 795 // Can only handle X*C and X << C. 796 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1)); 797 if (!RHS) return false; 798 int64_t Scale = RHS->getSExtValue(); 799 if (Opcode == Instruction::Shl) 800 Scale = 1 << Scale; 801 802 return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth); 803 } 804 case Instruction::GetElementPtr: { 805 // Scan the GEP. We check it if it contains constant offsets and at most 806 // one variable offset. 807 int VariableOperand = -1; 808 unsigned VariableScale = 0; 809 810 int64_t ConstantOffset = 0; 811 const TargetData *TD = TLI.getTargetData(); 812 gep_type_iterator GTI = gep_type_begin(AddrInst); 813 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { 814 if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 815 const StructLayout *SL = TD->getStructLayout(STy); 816 unsigned Idx = 817 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); 818 ConstantOffset += SL->getElementOffset(Idx); 819 } else { 820 uint64_t TypeSize = TD->getTypePaddedSize(GTI.getIndexedType()); 821 if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) { 822 ConstantOffset += CI->getSExtValue()*TypeSize; 823 } else if (TypeSize) { // Scales of zero don't do anything. 824 // We only allow one variable index at the moment. 825 if (VariableOperand != -1) 826 return false; 827 828 // Remember the variable index. 829 VariableOperand = i; 830 VariableScale = TypeSize; 831 } 832 } 833 } 834 835 // A common case is for the GEP to only do a constant offset. In this case, 836 // just add it to the disp field and check validity. 837 if (VariableOperand == -1) { 838 AddrMode.BaseOffs += ConstantOffset; 839 if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){ 840 // Check to see if we can fold the base pointer in too. 841 if (MatchAddr(AddrInst->getOperand(0), Depth+1)) 842 return true; 843 } 844 AddrMode.BaseOffs -= ConstantOffset; 845 return false; 846 } 847 848 // Save the valid addressing mode in case we can't match. 849 ExtAddrMode BackupAddrMode = AddrMode; 850 851 // Check that this has no base reg yet. If so, we won't have a place to 852 // put the base of the GEP (assuming it is not a null ptr). 853 bool SetBaseReg = true; 854 if (isa<ConstantPointerNull>(AddrInst->getOperand(0))) 855 SetBaseReg = false; // null pointer base doesn't need representation. 856 else if (AddrMode.HasBaseReg) 857 return false; // Base register already specified, can't match GEP. 858 else { 859 // Otherwise, we'll use the GEP base as the BaseReg. 860 AddrMode.HasBaseReg = true; 861 AddrMode.BaseReg = AddrInst->getOperand(0); 862 } 863 864 // See if the scale and offset amount is valid for this target. 865 AddrMode.BaseOffs += ConstantOffset; 866 867 if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale, 868 Depth)) { 869 AddrMode = BackupAddrMode; 870 return false; 871 } 872 873 // If we have a null as the base of the GEP, folding in the constant offset 874 // plus variable scale is all we can do. 875 if (!SetBaseReg) return true; 876 877 // If this match succeeded, we know that we can form an address with the 878 // GepBase as the basereg. Match the base pointer of the GEP more 879 // aggressively by zeroing out BaseReg and rematching. If the base is 880 // (for example) another GEP, this allows merging in that other GEP into 881 // the addressing mode we're forming. 882 AddrMode.HasBaseReg = false; 883 AddrMode.BaseReg = 0; 884 bool Success = MatchAddr(AddrInst->getOperand(0), Depth+1); 885 assert(Success && "MatchAddr should be able to fill in BaseReg!"); 886 Success=Success; 887 return true; 888 } 889 } 890 return false; 891} 892 893/// MatchAddr - If we can, try to add the value of 'Addr' into the current 894/// addressing mode. If Addr can't be added to AddrMode this returns false and 895/// leaves AddrMode unmodified. This assumes that Addr is either a pointer type 896/// or intptr_t for the target. 897/// 898bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { 899 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) { 900 // Fold in immediates if legal for the target. 901 AddrMode.BaseOffs += CI->getSExtValue(); 902 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 903 return true; 904 AddrMode.BaseOffs -= CI->getSExtValue(); 905 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) { 906 // If this is a global variable, try to fold it into the addressing mode. 907 if (AddrMode.BaseGV == 0) { 908 AddrMode.BaseGV = GV; 909 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 910 return true; 911 AddrMode.BaseGV = 0; 912 } 913 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) { 914 ExtAddrMode BackupAddrMode = AddrMode; 915 unsigned OldSize = AddrModeInsts.size(); 916 917 // Check to see if it is possible to fold this operation. 918 if (MatchOperationAddr(I, I->getOpcode(), Depth)) { 919 // Okay, it's possible to fold this. Check to see if it is actually 920 // *profitable* to do so. We use a simple cost model to avoid increasing 921 // register pressure too much. 922 if (I->hasOneUse() || 923 IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) { 924 AddrModeInsts.push_back(I); 925 return true; 926 } 927 928 // It isn't profitable to do this, roll back. 929 //cerr << "NOT FOLDING: " << *I; 930 AddrMode = BackupAddrMode; 931 AddrModeInsts.resize(OldSize); 932 } 933 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) { 934 if (MatchOperationAddr(CE, CE->getOpcode(), Depth)) 935 return true; 936 } else if (isa<ConstantPointerNull>(Addr)) { 937 // Null pointer gets folded without affecting the addressing mode. 938 return true; 939 } 940 941 // Worse case, the target should support [reg] addressing modes. :) 942 if (!AddrMode.HasBaseReg) { 943 AddrMode.HasBaseReg = true; 944 AddrMode.BaseReg = Addr; 945 // Still check for legality in case the target supports [imm] but not [i+r]. 946 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 947 return true; 948 AddrMode.HasBaseReg = false; 949 AddrMode.BaseReg = 0; 950 } 951 952 // If the base register is already taken, see if we can do [r+r]. 953 if (AddrMode.Scale == 0) { 954 AddrMode.Scale = 1; 955 AddrMode.ScaledReg = Addr; 956 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 957 return true; 958 AddrMode.Scale = 0; 959 AddrMode.ScaledReg = 0; 960 } 961 // Couldn't match. 962 return false; 963} 964 965 966/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified 967/// inline asm call are due to memory operands. If so, return true, otherwise 968/// return false. 969static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, 970 const TargetLowering &TLI) { 971 std::vector<InlineAsm::ConstraintInfo> 972 Constraints = IA->ParseConstraints(); 973 974 unsigned ArgNo = 1; // ArgNo - The operand of the CallInst. 975 for (unsigned i = 0, e = Constraints.size(); i != e; ++i) { 976 TargetLowering::AsmOperandInfo OpInfo(Constraints[i]); 977 978 // Compute the value type for each operand. 979 switch (OpInfo.Type) { 980 case InlineAsm::isOutput: 981 if (OpInfo.isIndirect) 982 OpInfo.CallOperandVal = CI->getOperand(ArgNo++); 983 break; 984 case InlineAsm::isInput: 985 OpInfo.CallOperandVal = CI->getOperand(ArgNo++); 986 break; 987 case InlineAsm::isClobber: 988 // Nothing to do. 989 break; 990 } 991 992 // Compute the constraint code and ConstraintType to use. 993 TLI.ComputeConstraintToUse(OpInfo, SDValue(), 994 OpInfo.ConstraintType == TargetLowering::C_Memory); 995 996 // If this asm operand is our Value*, and if it isn't an indirect memory 997 // operand, we can't fold it! 998 if (OpInfo.CallOperandVal == OpVal && 999 (OpInfo.ConstraintType != TargetLowering::C_Memory || 1000 !OpInfo.isIndirect)) 1001 return false; 1002 } 1003 1004 return true; 1005} 1006 1007 1008/// FindAllMemoryUses - Recursively walk all the uses of I until we find a 1009/// memory use. If we find an obviously non-foldable instruction, return true. 1010/// Add the ultimately found memory instructions to MemoryUses. 1011static bool FindAllMemoryUses(Instruction *I, 1012 SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses, 1013 SmallPtrSet<Instruction*, 16> &ConsideredInsts, 1014 const TargetLowering &TLI) { 1015 // If we already considered this instruction, we're done. 1016 if (!ConsideredInsts.insert(I)) 1017 return false; 1018 1019 // If this is an obviously unfoldable instruction, bail out. 1020 if (!MightBeFoldableInst(I)) 1021 return true; 1022 1023 // Loop over all the uses, recursively processing them. 1024 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); 1025 UI != E; ++UI) { 1026 if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { 1027 MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo())); 1028 continue; 1029 } 1030 1031 if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) { 1032 if (UI.getOperandNo() == 0) return true; // Storing addr, not into addr. 1033 MemoryUses.push_back(std::make_pair(SI, UI.getOperandNo())); 1034 continue; 1035 } 1036 1037 if (CallInst *CI = dyn_cast<CallInst>(*UI)) { 1038 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue()); 1039 if (IA == 0) return true; 1040 1041 // If this is a memory operand, we're cool, otherwise bail out. 1042 if (!IsOperandAMemoryOperand(CI, IA, I, TLI)) 1043 return true; 1044 continue; 1045 } 1046 1047 if (FindAllMemoryUses(cast<Instruction>(*UI), MemoryUses, ConsideredInsts, 1048 TLI)) 1049 return true; 1050 } 1051 1052 return false; 1053} 1054 1055 1056/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at 1057/// the use site that we're folding it into. If so, there is no cost to 1058/// include it in the addressing mode. KnownLive1 and KnownLive2 are two values 1059/// that we know are live at the instruction already. 1060bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, 1061 Value *KnownLive2) { 1062 // If Val is either of the known-live values, we know it is live! 1063 if (Val == 0 || Val == KnownLive1 || Val == KnownLive2) 1064 return true; 1065 1066 // All values other than instructions and arguments (e.g. constants) are live. 1067 if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true; 1068 1069 // If Val is a constant sized alloca in the entry block, it is live, this is 1070 // true because it is just a reference to the stack/frame pointer, which is 1071 // live for the whole function. 1072 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val)) 1073 if (AI->isStaticAlloca()) 1074 return true; 1075 1076 // Check to see if this value is already used in the memory instruction's 1077 // block. If so, it's already live into the block at the very least, so we 1078 // can reasonably fold it. 1079 BasicBlock *MemBB = MemoryInst->getParent(); 1080 for (Value::use_iterator UI = Val->use_begin(), E = Val->use_end(); 1081 UI != E; ++UI) 1082 // We know that uses of arguments and instructions have to be instructions. 1083 if (cast<Instruction>(*UI)->getParent() == MemBB) 1084 return true; 1085 1086 return false; 1087} 1088 1089 1090 1091/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing 1092/// mode of the machine to fold the specified instruction into a load or store 1093/// that ultimately uses it. However, the specified instruction has multiple 1094/// uses. Given this, it may actually increase register pressure to fold it 1095/// into the load. For example, consider this code: 1096/// 1097/// X = ... 1098/// Y = X+1 1099/// use(Y) -> nonload/store 1100/// Z = Y+1 1101/// load Z 1102/// 1103/// In this case, Y has multiple uses, and can be folded into the load of Z 1104/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to 1105/// be live at the use(Y) line. If we don't fold Y into load Z, we use one 1106/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the 1107/// number of computations either. 1108/// 1109/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If 1110/// X was live across 'load Z' for other reasons, we actually *would* want to 1111/// fold the addressing mode in the Z case. This would make Y die earlier. 1112bool AddressingModeMatcher:: 1113IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, 1114 ExtAddrMode &AMAfter) { 1115 if (IgnoreProfitability) return true; 1116 1117 // AMBefore is the addressing mode before this instruction was folded into it, 1118 // and AMAfter is the addressing mode after the instruction was folded. Get 1119 // the set of registers referenced by AMAfter and subtract out those 1120 // referenced by AMBefore: this is the set of values which folding in this 1121 // address extends the lifetime of. 1122 // 1123 // Note that there are only two potential values being referenced here, 1124 // BaseReg and ScaleReg (global addresses are always available, as are any 1125 // folded immediates). 1126 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg; 1127 1128 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their 1129 // lifetime wasn't extended by adding this instruction. 1130 if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg)) 1131 BaseReg = 0; 1132 if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg)) 1133 ScaledReg = 0; 1134 1135 // If folding this instruction (and it's subexprs) didn't extend any live 1136 // ranges, we're ok with it. 1137 if (BaseReg == 0 && ScaledReg == 0) 1138 return true; 1139 1140 // If all uses of this instruction are ultimately load/store/inlineasm's, 1141 // check to see if their addressing modes will include this instruction. If 1142 // so, we can fold it into all uses, so it doesn't matter if it has multiple 1143 // uses. 1144 SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses; 1145 SmallPtrSet<Instruction*, 16> ConsideredInsts; 1146 if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI)) 1147 return false; // Has a non-memory, non-foldable use! 1148 1149 // Now that we know that all uses of this instruction are part of a chain of 1150 // computation involving only operations that could theoretically be folded 1151 // into a memory use, loop over each of these uses and see if they could 1152 // *actually* fold the instruction. 1153 SmallVector<Instruction*, 32> MatchedAddrModeInsts; 1154 for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) { 1155 Instruction *User = MemoryUses[i].first; 1156 unsigned OpNo = MemoryUses[i].second; 1157 1158 // Get the access type of this use. If the use isn't a pointer, we don't 1159 // know what it accesses. 1160 Value *Address = User->getOperand(OpNo); 1161 if (!isa<PointerType>(Address->getType())) 1162 return false; 1163 const Type *AddressAccessTy = 1164 cast<PointerType>(Address->getType())->getElementType(); 1165 1166 // Do a match against the root of this address, ignoring profitability. This 1167 // will tell us if the addressing mode for the memory operation will 1168 // *actually* cover the shared instruction. 1169 ExtAddrMode Result; 1170 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy, 1171 MemoryInst, Result); 1172 Matcher.IgnoreProfitability = true; 1173 bool Success = Matcher.MatchAddr(Address, 0); 1174 Success = Success; assert(Success && "Couldn't select *anything*?"); 1175 1176 // If the match didn't cover I, then it won't be shared by it. 1177 if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(), 1178 I) == MatchedAddrModeInsts.end()) 1179 return false; 1180 1181 MatchedAddrModeInsts.clear(); 1182 } 1183 1184 return true; 1185} 1186 1187 1188//===----------------------------------------------------------------------===// 1189// Memory Optimization 1190//===----------------------------------------------------------------------===// 1191 1192/// IsNonLocalValue - Return true if the specified values are defined in a 1193/// different basic block than BB. 1194static bool IsNonLocalValue(Value *V, BasicBlock *BB) { 1195 if (Instruction *I = dyn_cast<Instruction>(V)) 1196 return I->getParent() != BB; 1197 return false; 1198} 1199 1200/// OptimizeMemoryInst - Load and Store Instructions have often have 1201/// addressing modes that can do significant amounts of computation. As such, 1202/// instruction selection will try to get the load or store to do as much 1203/// computation as possible for the program. The problem is that isel can only 1204/// see within a single block. As such, we sink as much legal addressing mode 1205/// stuff into the block as possible. 1206/// 1207/// This method is used to optimize both load/store and inline asms with memory 1208/// operands. 1209bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, 1210 const Type *AccessTy, 1211 DenseMap<Value*,Value*> &SunkAddrs) { 1212 // Figure out what addressing mode will be built up for this operation. 1213 SmallVector<Instruction*, 16> AddrModeInsts; 1214 ExtAddrMode AddrMode = AddressingModeMatcher::Match(Addr, AccessTy,MemoryInst, 1215 AddrModeInsts, *TLI); 1216 1217 // Check to see if any of the instructions supersumed by this addr mode are 1218 // non-local to I's BB. 1219 bool AnyNonLocal = false; 1220 for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) { 1221 if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) { 1222 AnyNonLocal = true; 1223 break; 1224 } 1225 } 1226 1227 // If all the instructions matched are already in this BB, don't do anything. 1228 if (!AnyNonLocal) { 1229 DEBUG(cerr << "CGP: Found local addrmode: " << AddrMode << "\n"); 1230 return false; 1231 } 1232 1233 // Insert this computation right after this user. Since our caller is 1234 // scanning from the top of the BB to the bottom, reuse of the expr are 1235 // guaranteed to happen later. 1236 BasicBlock::iterator InsertPt = MemoryInst; 1237 1238 // Now that we determined the addressing expression we want to use and know 1239 // that we have to sink it into this block. Check to see if we have already 1240 // done this for some other load/store instr in this block. If so, reuse the 1241 // computation. 1242 Value *&SunkAddr = SunkAddrs[Addr]; 1243 if (SunkAddr) { 1244 DEBUG(cerr << "CGP: Reusing nonlocal addrmode: " << AddrMode << "\n"); 1245 if (SunkAddr->getType() != Addr->getType()) 1246 SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt); 1247 } else { 1248 DEBUG(cerr << "CGP: SINKING nonlocal addrmode: " << AddrMode << "\n"); 1249 const Type *IntPtrTy = TLI->getTargetData()->getIntPtrType(); 1250 1251 Value *Result = 0; 1252 // Start with the scale value. 1253 if (AddrMode.Scale) { 1254 Value *V = AddrMode.ScaledReg; 1255 if (V->getType() == IntPtrTy) { 1256 // done. 1257 } else if (isa<PointerType>(V->getType())) { 1258 V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt); 1259 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < 1260 cast<IntegerType>(V->getType())->getBitWidth()) { 1261 V = new TruncInst(V, IntPtrTy, "sunkaddr", InsertPt); 1262 } else { 1263 V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt); 1264 } 1265 if (AddrMode.Scale != 1) 1266 V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy, 1267 AddrMode.Scale), 1268 "sunkaddr", InsertPt); 1269 Result = V; 1270 } 1271 1272 // Add in the base register. 1273 if (AddrMode.BaseReg) { 1274 Value *V = AddrMode.BaseReg; 1275 if (V->getType() != IntPtrTy) 1276 V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt); 1277 if (Result) 1278 Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); 1279 else 1280 Result = V; 1281 } 1282 1283 // Add in the BaseGV if present. 1284 if (AddrMode.BaseGV) { 1285 Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, "sunkaddr", 1286 InsertPt); 1287 if (Result) 1288 Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); 1289 else 1290 Result = V; 1291 } 1292 1293 // Add in the Base Offset if present. 1294 if (AddrMode.BaseOffs) { 1295 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); 1296 if (Result) 1297 Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); 1298 else 1299 Result = V; 1300 } 1301 1302 if (Result == 0) 1303 SunkAddr = Constant::getNullValue(Addr->getType()); 1304 else 1305 SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt); 1306 } 1307 1308 MemoryInst->replaceUsesOfWith(Addr, SunkAddr); 1309 1310 if (Addr->use_empty()) 1311 RecursivelyDeleteTriviallyDeadInstructions(Addr); 1312 return true; 1313} 1314 1315/// OptimizeInlineAsmInst - If there are any memory operands, use 1316/// OptimizeMemoryInst to sink their address computing into the block when 1317/// possible / profitable. 1318bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS, 1319 DenseMap<Value*,Value*> &SunkAddrs) { 1320 bool MadeChange = false; 1321 InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); 1322 1323 // Do a prepass over the constraints, canonicalizing them, and building up the 1324 // ConstraintOperands list. 1325 std::vector<InlineAsm::ConstraintInfo> 1326 ConstraintInfos = IA->ParseConstraints(); 1327 1328 /// ConstraintOperands - Information about all of the constraints. 1329 std::vector<TargetLowering::AsmOperandInfo> ConstraintOperands; 1330 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. 1331 for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { 1332 ConstraintOperands. 1333 push_back(TargetLowering::AsmOperandInfo(ConstraintInfos[i])); 1334 TargetLowering::AsmOperandInfo &OpInfo = ConstraintOperands.back(); 1335 1336 // Compute the value type for each operand. 1337 switch (OpInfo.Type) { 1338 case InlineAsm::isOutput: 1339 if (OpInfo.isIndirect) 1340 OpInfo.CallOperandVal = CS.getArgument(ArgNo++); 1341 break; 1342 case InlineAsm::isInput: 1343 OpInfo.CallOperandVal = CS.getArgument(ArgNo++); 1344 break; 1345 case InlineAsm::isClobber: 1346 // Nothing to do. 1347 break; 1348 } 1349 1350 // Compute the constraint code and ConstraintType to use. 1351 TLI->ComputeConstraintToUse(OpInfo, SDValue(), 1352 OpInfo.ConstraintType == TargetLowering::C_Memory); 1353 1354 if (OpInfo.ConstraintType == TargetLowering::C_Memory && 1355 OpInfo.isIndirect) { 1356 Value *OpVal = OpInfo.CallOperandVal; 1357 MadeChange |= OptimizeMemoryInst(I, OpVal, OpVal->getType(), SunkAddrs); 1358 } 1359 } 1360 1361 return MadeChange; 1362} 1363 1364bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { 1365 BasicBlock *DefBB = I->getParent(); 1366 1367 // If both result of the {s|z}xt and its source are live out, rewrite all 1368 // other uses of the source with result of extension. 1369 Value *Src = I->getOperand(0); 1370 if (Src->hasOneUse()) 1371 return false; 1372 1373 // Only do this xform if truncating is free. 1374 if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType())) 1375 return false; 1376 1377 // Only safe to perform the optimization if the source is also defined in 1378 // this block. 1379 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent()) 1380 return false; 1381 1382 bool DefIsLiveOut = false; 1383 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); 1384 UI != E; ++UI) { 1385 Instruction *User = cast<Instruction>(*UI); 1386 1387 // Figure out which BB this ext is used in. 1388 BasicBlock *UserBB = User->getParent(); 1389 if (UserBB == DefBB) continue; 1390 DefIsLiveOut = true; 1391 break; 1392 } 1393 if (!DefIsLiveOut) 1394 return false; 1395 1396 // Make sure non of the uses are PHI nodes. 1397 for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); 1398 UI != E; ++UI) { 1399 Instruction *User = cast<Instruction>(*UI); 1400 BasicBlock *UserBB = User->getParent(); 1401 if (UserBB == DefBB) continue; 1402 // Be conservative. We don't want this xform to end up introducing 1403 // reloads just before load / store instructions. 1404 if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User)) 1405 return false; 1406 } 1407 1408 // InsertedTruncs - Only insert one trunc in each block once. 1409 DenseMap<BasicBlock*, Instruction*> InsertedTruncs; 1410 1411 bool MadeChange = false; 1412 for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); 1413 UI != E; ++UI) { 1414 Use &TheUse = UI.getUse(); 1415 Instruction *User = cast<Instruction>(*UI); 1416 1417 // Figure out which BB this ext is used in. 1418 BasicBlock *UserBB = User->getParent(); 1419 if (UserBB == DefBB) continue; 1420 1421 // Both src and def are live in this block. Rewrite the use. 1422 Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; 1423 1424 if (!InsertedTrunc) { 1425 BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); 1426 1427 InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt); 1428 } 1429 1430 // Replace a use of the {s|z}ext source with a use of the result. 1431 TheUse = InsertedTrunc; 1432 1433 MadeChange = true; 1434 } 1435 1436 return MadeChange; 1437} 1438 1439// In this pass we look for GEP and cast instructions that are used 1440// across basic blocks and rewrite them to improve basic-block-at-a-time 1441// selection. 1442bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { 1443 bool MadeChange = false; 1444 1445 // Split all critical edges where the dest block has a PHI. 1446 TerminatorInst *BBTI = BB.getTerminator(); 1447 if (BBTI->getNumSuccessors() > 1) { 1448 for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) { 1449 BasicBlock *SuccBB = BBTI->getSuccessor(i); 1450 if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true)) 1451 SplitEdgeNicely(BBTI, i, BackEdges, this); 1452 } 1453 } 1454 1455 // Keep track of non-local addresses that have been sunk into this block. 1456 // This allows us to avoid inserting duplicate code for blocks with multiple 1457 // load/stores of the same address. 1458 DenseMap<Value*, Value*> SunkAddrs; 1459 1460 for (BasicBlock::iterator BBI = BB.begin(), E = BB.end(); BBI != E; ) { 1461 Instruction *I = BBI++; 1462 1463 if (CastInst *CI = dyn_cast<CastInst>(I)) { 1464 // If the source of the cast is a constant, then this should have 1465 // already been constant folded. The only reason NOT to constant fold 1466 // it is if something (e.g. LSR) was careful to place the constant 1467 // evaluation in a block other than then one that uses it (e.g. to hoist 1468 // the address of globals out of a loop). If this is the case, we don't 1469 // want to forward-subst the cast. 1470 if (isa<Constant>(CI->getOperand(0))) 1471 continue; 1472 1473 bool Change = false; 1474 if (TLI) { 1475 Change = OptimizeNoopCopyExpression(CI, *TLI); 1476 MadeChange |= Change; 1477 } 1478 1479 if (!Change && (isa<ZExtInst>(I) || isa<SExtInst>(I))) 1480 MadeChange |= OptimizeExtUses(I); 1481 } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) { 1482 MadeChange |= OptimizeCmpExpression(CI); 1483 } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 1484 if (TLI) 1485 MadeChange |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType(), 1486 SunkAddrs); 1487 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 1488 if (TLI) 1489 MadeChange |= OptimizeMemoryInst(I, SI->getOperand(1), 1490 SI->getOperand(0)->getType(), 1491 SunkAddrs); 1492 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { 1493 if (GEPI->hasAllZeroIndices()) { 1494 /// The GEP operand must be a pointer, so must its result -> BitCast 1495 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), 1496 GEPI->getName(), GEPI); 1497 GEPI->replaceAllUsesWith(NC); 1498 GEPI->eraseFromParent(); 1499 MadeChange = true; 1500 BBI = NC; 1501 } 1502 } else if (CallInst *CI = dyn_cast<CallInst>(I)) { 1503 // If we found an inline asm expession, and if the target knows how to 1504 // lower it to normal LLVM code, do so now. 1505 if (TLI && isa<InlineAsm>(CI->getCalledValue())) 1506 if (const TargetAsmInfo *TAI = 1507 TLI->getTargetMachine().getTargetAsmInfo()) { 1508 if (TAI->ExpandInlineAsm(CI)) 1509 BBI = BB.begin(); 1510 else 1511 // Sink address computing for memory operands into the block. 1512 MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs); 1513 } 1514 } 1515 } 1516 1517 return MadeChange; 1518} 1519