CodeGenPrepare.cpp revision 896617b776e7b015346160645b19be776cbe3805
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass munges the code in the input function to better prepare it for 11// SelectionDAG-based code generation. This works around limitations in it's 12// basic-block-at-a-time approach. It should eventually be removed. 13// 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "codegenprepare" 17#include "llvm/Transforms/Scalar.h" 18#include "llvm/Constants.h" 19#include "llvm/DerivedTypes.h" 20#include "llvm/Function.h" 21#include "llvm/InlineAsm.h" 22#include "llvm/Instructions.h" 23#include "llvm/Pass.h" 24#include "llvm/Target/TargetAsmInfo.h" 25#include "llvm/Target/TargetData.h" 26#include "llvm/Target/TargetLowering.h" 27#include "llvm/Target/TargetMachine.h" 28#include "llvm/Transforms/Utils/BasicBlockUtils.h" 29#include "llvm/Transforms/Utils/Local.h" 30#include "llvm/ADT/DenseMap.h" 31#include "llvm/ADT/SmallSet.h" 32#include "llvm/Support/CallSite.h" 33#include "llvm/Support/Compiler.h" 34#include "llvm/Support/Debug.h" 35#include "llvm/Support/GetElementPtrTypeIterator.h" 36#include "llvm/Support/PatternMatch.h" 37using namespace llvm; 38using namespace llvm::PatternMatch; 39 40namespace { 41 class VISIBILITY_HIDDEN CodeGenPrepare : public FunctionPass { 42 /// TLI - Keep a pointer of a TargetLowering to consult for determining 43 /// transformation profitability. 44 const TargetLowering *TLI; 45 public: 46 static char ID; // Pass identification, replacement for typeid 47 explicit CodeGenPrepare(const TargetLowering *tli = 0) 48 : FunctionPass(&ID), TLI(tli) {} 49 bool runOnFunction(Function &F); 50 51 private: 52 bool EliminateMostlyEmptyBlocks(Function &F); 53 bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; 54 void EliminateMostlyEmptyBlock(BasicBlock *BB); 55 bool OptimizeBlock(BasicBlock &BB); 56 bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy, 57 DenseMap<Value*,Value*> &SunkAddrs); 58 bool OptimizeInlineAsmInst(Instruction *I, CallSite CS, 59 DenseMap<Value*,Value*> &SunkAddrs); 60 bool OptimizeExtUses(Instruction *I); 61 }; 62} 63 64char CodeGenPrepare::ID = 0; 65static RegisterPass<CodeGenPrepare> X("codegenprepare", 66 "Optimize for code generation"); 67 68FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) { 69 return new CodeGenPrepare(TLI); 70} 71 72 73bool CodeGenPrepare::runOnFunction(Function &F) { 74 bool EverMadeChange = false; 75 76 // First pass, eliminate blocks that contain only PHI nodes and an 77 // unconditional branch. 78 EverMadeChange |= EliminateMostlyEmptyBlocks(F); 79 80 bool MadeChange = true; 81 while (MadeChange) { 82 MadeChange = false; 83 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) 84 MadeChange |= OptimizeBlock(*BB); 85 EverMadeChange |= MadeChange; 86 } 87 return EverMadeChange; 88} 89 90/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes 91/// and an unconditional branch. Passes before isel (e.g. LSR/loopsimplify) 92/// often split edges in ways that are non-optimal for isel. Start by 93/// eliminating these blocks so we can split them the way we want them. 94bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) { 95 bool MadeChange = false; 96 // Note that this intentionally skips the entry block. 97 for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) { 98 BasicBlock *BB = I++; 99 100 // If this block doesn't end with an uncond branch, ignore it. 101 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); 102 if (!BI || !BI->isUnconditional()) 103 continue; 104 105 // If the instruction before the branch isn't a phi node, then other stuff 106 // is happening here. 107 BasicBlock::iterator BBI = BI; 108 if (BBI != BB->begin()) { 109 --BBI; 110 if (!isa<PHINode>(BBI)) continue; 111 } 112 113 // Do not break infinite loops. 114 BasicBlock *DestBB = BI->getSuccessor(0); 115 if (DestBB == BB) 116 continue; 117 118 if (!CanMergeBlocks(BB, DestBB)) 119 continue; 120 121 EliminateMostlyEmptyBlock(BB); 122 MadeChange = true; 123 } 124 return MadeChange; 125} 126 127/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a 128/// single uncond branch between them, and BB contains no other non-phi 129/// instructions. 130bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, 131 const BasicBlock *DestBB) const { 132 // We only want to eliminate blocks whose phi nodes are used by phi nodes in 133 // the successor. If there are more complex condition (e.g. preheaders), 134 // don't mess around with them. 135 BasicBlock::const_iterator BBI = BB->begin(); 136 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 137 for (Value::use_const_iterator UI = PN->use_begin(), E = PN->use_end(); 138 UI != E; ++UI) { 139 const Instruction *User = cast<Instruction>(*UI); 140 if (User->getParent() != DestBB || !isa<PHINode>(User)) 141 return false; 142 // If User is inside DestBB block and it is a PHINode then check 143 // incoming value. If incoming value is not from BB then this is 144 // a complex condition (e.g. preheaders) we want to avoid here. 145 if (User->getParent() == DestBB) { 146 if (const PHINode *UPN = dyn_cast<PHINode>(User)) 147 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { 148 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I)); 149 if (Insn && Insn->getParent() == BB && 150 Insn->getParent() != UPN->getIncomingBlock(I)) 151 return false; 152 } 153 } 154 } 155 } 156 157 // If BB and DestBB contain any common predecessors, then the phi nodes in BB 158 // and DestBB may have conflicting incoming values for the block. If so, we 159 // can't merge the block. 160 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin()); 161 if (!DestBBPN) return true; // no conflict. 162 163 // Collect the preds of BB. 164 SmallPtrSet<const BasicBlock*, 16> BBPreds; 165 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 166 // It is faster to get preds from a PHI than with pred_iterator. 167 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 168 BBPreds.insert(BBPN->getIncomingBlock(i)); 169 } else { 170 BBPreds.insert(pred_begin(BB), pred_end(BB)); 171 } 172 173 // Walk the preds of DestBB. 174 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { 175 BasicBlock *Pred = DestBBPN->getIncomingBlock(i); 176 if (BBPreds.count(Pred)) { // Common predecessor? 177 BBI = DestBB->begin(); 178 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 179 const Value *V1 = PN->getIncomingValueForBlock(Pred); 180 const Value *V2 = PN->getIncomingValueForBlock(BB); 181 182 // If V2 is a phi node in BB, look up what the mapped value will be. 183 if (const PHINode *V2PN = dyn_cast<PHINode>(V2)) 184 if (V2PN->getParent() == BB) 185 V2 = V2PN->getIncomingValueForBlock(Pred); 186 187 // If there is a conflict, bail out. 188 if (V1 != V2) return false; 189 } 190 } 191 } 192 193 return true; 194} 195 196 197/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and 198/// an unconditional branch in it. 199void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { 200 BranchInst *BI = cast<BranchInst>(BB->getTerminator()); 201 BasicBlock *DestBB = BI->getSuccessor(0); 202 203 DOUT << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB; 204 205 // If the destination block has a single pred, then this is a trivial edge, 206 // just collapse it. 207 if (DestBB->getSinglePredecessor()) { 208 // If DestBB has single-entry PHI nodes, fold them. 209 while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { 210 Value *NewVal = PN->getIncomingValue(0); 211 // Replace self referencing PHI with undef, it must be dead. 212 if (NewVal == PN) NewVal = UndefValue::get(PN->getType()); 213 PN->replaceAllUsesWith(NewVal); 214 PN->eraseFromParent(); 215 } 216 217 // Splice all the PHI nodes from BB over to DestBB. 218 DestBB->getInstList().splice(DestBB->begin(), BB->getInstList(), 219 BB->begin(), BI); 220 221 // Anything that branched to BB now branches to DestBB. 222 BB->replaceAllUsesWith(DestBB); 223 224 // Nuke BB. 225 BB->eraseFromParent(); 226 227 DOUT << "AFTER:\n" << *DestBB << "\n\n\n"; 228 return; 229 } 230 231 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB 232 // to handle the new incoming edges it is about to have. 233 PHINode *PN; 234 for (BasicBlock::iterator BBI = DestBB->begin(); 235 (PN = dyn_cast<PHINode>(BBI)); ++BBI) { 236 // Remove the incoming value for BB, and remember it. 237 Value *InVal = PN->removeIncomingValue(BB, false); 238 239 // Two options: either the InVal is a phi node defined in BB or it is some 240 // value that dominates BB. 241 PHINode *InValPhi = dyn_cast<PHINode>(InVal); 242 if (InValPhi && InValPhi->getParent() == BB) { 243 // Add all of the input values of the input PHI as inputs of this phi. 244 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) 245 PN->addIncoming(InValPhi->getIncomingValue(i), 246 InValPhi->getIncomingBlock(i)); 247 } else { 248 // Otherwise, add one instance of the dominating value for each edge that 249 // we will be adding. 250 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 251 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 252 PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); 253 } else { 254 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) 255 PN->addIncoming(InVal, *PI); 256 } 257 } 258 } 259 260 // The PHIs are now updated, change everything that refers to BB to use 261 // DestBB and remove BB. 262 BB->replaceAllUsesWith(DestBB); 263 BB->eraseFromParent(); 264 265 DOUT << "AFTER:\n" << *DestBB << "\n\n\n"; 266} 267 268 269/// SplitEdgeNicely - Split the critical edge from TI to its specified 270/// successor if it will improve codegen. We only do this if the successor has 271/// phi nodes (otherwise critical edges are ok). If there is already another 272/// predecessor of the succ that is empty (and thus has no phi nodes), use it 273/// instead of introducing a new block. 274static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, Pass *P) { 275 BasicBlock *TIBB = TI->getParent(); 276 BasicBlock *Dest = TI->getSuccessor(SuccNum); 277 assert(isa<PHINode>(Dest->begin()) && 278 "This should only be called if Dest has a PHI!"); 279 280 // As a hack, never split backedges of loops. Even though the copy for any 281 // PHIs inserted on the backedge would be dead for exits from the loop, we 282 // assume that the cost of *splitting* the backedge would be too high. 283 if (Dest == TIBB) 284 return; 285 286 /// TIPHIValues - This array is lazily computed to determine the values of 287 /// PHIs in Dest that TI would provide. 288 SmallVector<Value*, 32> TIPHIValues; 289 290 // Check to see if Dest has any blocks that can be used as a split edge for 291 // this terminator. 292 for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) { 293 BasicBlock *Pred = *PI; 294 // To be usable, the pred has to end with an uncond branch to the dest. 295 BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator()); 296 if (!PredBr || !PredBr->isUnconditional() || 297 // Must be empty other than the branch. 298 &Pred->front() != PredBr || 299 // Cannot be the entry block; its label does not get emitted. 300 Pred == &(Dest->getParent()->getEntryBlock())) 301 continue; 302 303 // Finally, since we know that Dest has phi nodes in it, we have to make 304 // sure that jumping to Pred will have the same affect as going to Dest in 305 // terms of PHI values. 306 PHINode *PN; 307 unsigned PHINo = 0; 308 bool FoundMatch = true; 309 for (BasicBlock::iterator I = Dest->begin(); 310 (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) { 311 if (PHINo == TIPHIValues.size()) 312 TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB)); 313 314 // If the PHI entry doesn't work, we can't use this pred. 315 if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) { 316 FoundMatch = false; 317 break; 318 } 319 } 320 321 // If we found a workable predecessor, change TI to branch to Succ. 322 if (FoundMatch) { 323 Dest->removePredecessor(TIBB); 324 TI->setSuccessor(SuccNum, Pred); 325 return; 326 } 327 } 328 329 SplitCriticalEdge(TI, SuccNum, P, true); 330} 331 332/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop 333/// copy (e.g. it's casting from one pointer type to another, int->uint, or 334/// int->sbyte on PPC), sink it into user blocks to reduce the number of virtual 335/// registers that must be created and coalesced. 336/// 337/// Return true if any changes are made. 338/// 339static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ 340 // If this is a noop copy, 341 MVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); 342 MVT DstVT = TLI.getValueType(CI->getType()); 343 344 // This is an fp<->int conversion? 345 if (SrcVT.isInteger() != DstVT.isInteger()) 346 return false; 347 348 // If this is an extension, it will be a zero or sign extension, which 349 // isn't a noop. 350 if (SrcVT.bitsLT(DstVT)) return false; 351 352 // If these values will be promoted, find out what they will be promoted 353 // to. This helps us consider truncates on PPC as noop copies when they 354 // are. 355 if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote) 356 SrcVT = TLI.getTypeToTransformTo(SrcVT); 357 if (TLI.getTypeAction(DstVT) == TargetLowering::Promote) 358 DstVT = TLI.getTypeToTransformTo(DstVT); 359 360 // If, after promotion, these are the same types, this is a noop copy. 361 if (SrcVT != DstVT) 362 return false; 363 364 BasicBlock *DefBB = CI->getParent(); 365 366 /// InsertedCasts - Only insert a cast in each block once. 367 DenseMap<BasicBlock*, CastInst*> InsertedCasts; 368 369 bool MadeChange = false; 370 for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); 371 UI != E; ) { 372 Use &TheUse = UI.getUse(); 373 Instruction *User = cast<Instruction>(*UI); 374 375 // Figure out which BB this cast is used in. For PHI's this is the 376 // appropriate predecessor block. 377 BasicBlock *UserBB = User->getParent(); 378 if (PHINode *PN = dyn_cast<PHINode>(User)) { 379 unsigned OpVal = UI.getOperandNo()/2; 380 UserBB = PN->getIncomingBlock(OpVal); 381 } 382 383 // Preincrement use iterator so we don't invalidate it. 384 ++UI; 385 386 // If this user is in the same block as the cast, don't change the cast. 387 if (UserBB == DefBB) continue; 388 389 // If we have already inserted a cast into this block, use it. 390 CastInst *&InsertedCast = InsertedCasts[UserBB]; 391 392 if (!InsertedCast) { 393 BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); 394 395 InsertedCast = 396 CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", 397 InsertPt); 398 MadeChange = true; 399 } 400 401 // Replace a use of the cast with a use of the new cast. 402 TheUse = InsertedCast; 403 } 404 405 // If we removed all uses, nuke the cast. 406 if (CI->use_empty()) { 407 CI->eraseFromParent(); 408 MadeChange = true; 409 } 410 411 return MadeChange; 412} 413 414/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce 415/// the number of virtual registers that must be created and coalesced. This is 416/// a clear win except on targets with multiple condition code registers 417/// (PowerPC), where it might lose; some adjustment may be wanted there. 418/// 419/// Return true if any changes are made. 420static bool OptimizeCmpExpression(CmpInst *CI) { 421 BasicBlock *DefBB = CI->getParent(); 422 423 /// InsertedCmp - Only insert a cmp in each block once. 424 DenseMap<BasicBlock*, CmpInst*> InsertedCmps; 425 426 bool MadeChange = false; 427 for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); 428 UI != E; ) { 429 Use &TheUse = UI.getUse(); 430 Instruction *User = cast<Instruction>(*UI); 431 432 // Preincrement use iterator so we don't invalidate it. 433 ++UI; 434 435 // Don't bother for PHI nodes. 436 if (isa<PHINode>(User)) 437 continue; 438 439 // Figure out which BB this cmp is used in. 440 BasicBlock *UserBB = User->getParent(); 441 442 // If this user is in the same block as the cmp, don't change the cmp. 443 if (UserBB == DefBB) continue; 444 445 // If we have already inserted a cmp into this block, use it. 446 CmpInst *&InsertedCmp = InsertedCmps[UserBB]; 447 448 if (!InsertedCmp) { 449 BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); 450 451 InsertedCmp = 452 CmpInst::Create(CI->getOpcode(), CI->getPredicate(), CI->getOperand(0), 453 CI->getOperand(1), "", InsertPt); 454 MadeChange = true; 455 } 456 457 // Replace a use of the cmp with a use of the new cmp. 458 TheUse = InsertedCmp; 459 } 460 461 // If we removed all uses, nuke the cmp. 462 if (CI->use_empty()) 463 CI->eraseFromParent(); 464 465 return MadeChange; 466} 467 468/// EraseDeadInstructions - Erase any dead instructions, recursively. 469static void EraseDeadInstructions(Value *V) { 470 Instruction *I = dyn_cast<Instruction>(V); 471 if (!I || !I->use_empty()) return; 472 473 SmallPtrSet<Instruction*, 16> Insts; 474 Insts.insert(I); 475 476 while (!Insts.empty()) { 477 I = *Insts.begin(); 478 Insts.erase(I); 479 if (isInstructionTriviallyDead(I)) { 480 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) 481 if (Instruction *U = dyn_cast<Instruction>(I->getOperand(i))) 482 Insts.insert(U); 483 I->eraseFromParent(); 484 } 485 } 486} 487 488//===----------------------------------------------------------------------===// 489// Addressing Mode Analysis and Optimization 490//===----------------------------------------------------------------------===// 491 492namespace { 493 /// ExtAddrMode - This is an extended version of TargetLowering::AddrMode 494 /// which holds actual Value*'s for register values. 495 struct ExtAddrMode : public TargetLowering::AddrMode { 496 Value *BaseReg; 497 Value *ScaledReg; 498 ExtAddrMode() : BaseReg(0), ScaledReg(0) {} 499 void print(OStream &OS) const; 500 void dump() const { 501 print(cerr); 502 cerr << '\n'; 503 } 504 }; 505} // end anonymous namespace 506 507static inline OStream &operator<<(OStream &OS, const ExtAddrMode &AM) { 508 AM.print(OS); 509 return OS; 510} 511 512void ExtAddrMode::print(OStream &OS) const { 513 bool NeedPlus = false; 514 OS << "["; 515 if (BaseGV) 516 OS << (NeedPlus ? " + " : "") 517 << "GV:%" << BaseGV->getName(), NeedPlus = true; 518 519 if (BaseOffs) 520 OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true; 521 522 if (BaseReg) 523 OS << (NeedPlus ? " + " : "") 524 << "Base:%" << BaseReg->getName(), NeedPlus = true; 525 if (Scale) 526 OS << (NeedPlus ? " + " : "") 527 << Scale << "*%" << ScaledReg->getName(), NeedPlus = true; 528 529 OS << ']'; 530} 531 532namespace { 533/// AddressingModeMatcher - This class exposes a single public method, which is 534/// used to construct a "maximal munch" of the addressing mode for the target 535/// specified by TLI for an access to "V" with an access type of AccessTy. This 536/// returns the addressing mode that is actually matched by value, but also 537/// returns the list of instructions involved in that addressing computation in 538/// AddrModeInsts. 539class AddressingModeMatcher { 540 SmallVectorImpl<Instruction*> &AddrModeInsts; 541 const TargetLowering &TLI; 542 543 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and 544 /// the memory instruction that we're computing this address for. 545 const Type *AccessTy; 546 Instruction *MemoryInst; 547 548 /// AddrMode - This is the addressing mode that we're building up. This is 549 /// part of the return value of this addressing mode matching stuff. 550 ExtAddrMode &AddrMode; 551 552 /// IgnoreProfitability - This is set to true when we should not do 553 /// profitability checks. When true, IsProfitableToFoldIntoAddressingMode 554 /// always returns true. 555 bool IgnoreProfitability; 556 557 AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI, 558 const TargetLowering &T, const Type *AT, 559 Instruction *MI, ExtAddrMode &AM) 560 : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM) { 561 IgnoreProfitability = false; 562 } 563public: 564 565 /// Match - Find the maximal addressing mode that a load/store of V can fold, 566 /// give an access type of AccessTy. This returns a list of involved 567 /// instructions in AddrModeInsts. 568 static ExtAddrMode Match(Value *V, const Type *AccessTy, 569 Instruction *MemoryInst, 570 SmallVectorImpl<Instruction*> &AddrModeInsts, 571 const TargetLowering &TLI) { 572 ExtAddrMode Result; 573 574 bool Success = 575 AddressingModeMatcher(AddrModeInsts, TLI, AccessTy, 576 MemoryInst, Result).MatchAddr(V, 0); 577 Success = Success; assert(Success && "Couldn't select *anything*?"); 578 return Result; 579 } 580private: 581 bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth); 582 bool MatchAddr(Value *V, unsigned Depth); 583 bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth); 584 bool IsProfitableToFoldIntoAddressingMode(Instruction *I, 585 ExtAddrMode &AMBefore, 586 ExtAddrMode &AMAfter); 587 bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2); 588}; 589} // end anonymous namespace 590 591/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode. 592/// Return true and update AddrMode if this addr mode is legal for the target, 593/// false if not. 594bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, 595 unsigned Depth) { 596 // If Scale is 1, then this is the same as adding ScaleReg to the addressing 597 // mode. Just process that directly. 598 if (Scale == 1) 599 return MatchAddr(ScaleReg, Depth); 600 601 // If the scale is 0, it takes nothing to add this. 602 if (Scale == 0) 603 return true; 604 605 // If we already have a scale of this value, we can add to it, otherwise, we 606 // need an available scale field. 607 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg) 608 return false; 609 610 ExtAddrMode TestAddrMode = AddrMode; 611 612 // Add scale to turn X*4+X*3 -> X*7. This could also do things like 613 // [A+B + A*7] -> [B+A*8]. 614 TestAddrMode.Scale += Scale; 615 TestAddrMode.ScaledReg = ScaleReg; 616 617 // If the new address isn't legal, bail out. 618 if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) 619 return false; 620 621 // It was legal, so commit it. 622 AddrMode = TestAddrMode; 623 624 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now 625 // to see if ScaleReg is actually X+C. If so, we can turn this into adding 626 // X*Scale + C*Scale to addr mode. 627 ConstantInt *CI; Value *AddLHS; 628 if (match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) { 629 TestAddrMode.ScaledReg = AddLHS; 630 TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale; 631 632 // If this addressing mode is legal, commit it and remember that we folded 633 // this instruction. 634 if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) { 635 AddrModeInsts.push_back(cast<Instruction>(ScaleReg)); 636 AddrMode = TestAddrMode; 637 return true; 638 } 639 } 640 641 // Otherwise, not (x+c)*scale, just return what we have. 642 return true; 643} 644 645/// MightBeFoldableInst - This is a little filter, which returns true if an 646/// addressing computation involving I might be folded into a load/store 647/// accessing it. This doesn't need to be perfect, but needs to accept at least 648/// the set of instructions that MatchOperationAddr can. 649static bool MightBeFoldableInst(Instruction *I) { 650 switch (I->getOpcode()) { 651 case Instruction::BitCast: 652 // Don't touch identity bitcasts. 653 if (I->getType() == I->getOperand(0)->getType()) 654 return false; 655 return isa<PointerType>(I->getType()) || isa<IntegerType>(I->getType()); 656 case Instruction::PtrToInt: 657 // PtrToInt is always a noop, as we know that the int type is pointer sized. 658 return true; 659 case Instruction::IntToPtr: 660 // We know the input is intptr_t, so this is foldable. 661 return true; 662 case Instruction::Add: 663 return true; 664 case Instruction::Mul: 665 case Instruction::Shl: 666 // Can only handle X*C and X << C. 667 return isa<ConstantInt>(I->getOperand(1)); 668 case Instruction::GetElementPtr: 669 return true; 670 default: 671 return false; 672 } 673} 674 675 676/// MatchOperationAddr - Given an instruction or constant expr, see if we can 677/// fold the operation into the addressing mode. If so, update the addressing 678/// mode and return true, otherwise return false without modifying AddrMode. 679bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, 680 unsigned Depth) { 681 // Avoid exponential behavior on extremely deep expression trees. 682 if (Depth >= 5) return false; 683 684 switch (Opcode) { 685 case Instruction::PtrToInt: 686 // PtrToInt is always a noop, as we know that the int type is pointer sized. 687 return MatchAddr(AddrInst->getOperand(0), Depth); 688 case Instruction::IntToPtr: 689 // This inttoptr is a no-op if the integer type is pointer sized. 690 if (TLI.getValueType(AddrInst->getOperand(0)->getType()) == 691 TLI.getPointerTy()) 692 return MatchAddr(AddrInst->getOperand(0), Depth); 693 return false; 694 case Instruction::BitCast: 695 // BitCast is always a noop, and we can handle it as long as it is 696 // int->int or pointer->pointer (we don't want int<->fp or something). 697 if ((isa<PointerType>(AddrInst->getOperand(0)->getType()) || 698 isa<IntegerType>(AddrInst->getOperand(0)->getType())) && 699 // Don't touch identity bitcasts. These were probably put here by LSR, 700 // and we don't want to mess around with them. Assume it knows what it 701 // is doing. 702 AddrInst->getOperand(0)->getType() != AddrInst->getType()) 703 return MatchAddr(AddrInst->getOperand(0), Depth); 704 return false; 705 case Instruction::Add: { 706 // Check to see if we can merge in the RHS then the LHS. If so, we win. 707 ExtAddrMode BackupAddrMode = AddrMode; 708 unsigned OldSize = AddrModeInsts.size(); 709 if (MatchAddr(AddrInst->getOperand(1), Depth+1) && 710 MatchAddr(AddrInst->getOperand(0), Depth+1)) 711 return true; 712 713 // Restore the old addr mode info. 714 AddrMode = BackupAddrMode; 715 AddrModeInsts.resize(OldSize); 716 717 // Otherwise this was over-aggressive. Try merging in the LHS then the RHS. 718 if (MatchAddr(AddrInst->getOperand(0), Depth+1) && 719 MatchAddr(AddrInst->getOperand(1), Depth+1)) 720 return true; 721 722 // Otherwise we definitely can't merge the ADD in. 723 AddrMode = BackupAddrMode; 724 AddrModeInsts.resize(OldSize); 725 break; 726 } 727 //case Instruction::Or: 728 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD. 729 //break; 730 case Instruction::Mul: 731 case Instruction::Shl: { 732 // Can only handle X*C and X << C. 733 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1)); 734 if (!RHS) return false; 735 int64_t Scale = RHS->getSExtValue(); 736 if (Opcode == Instruction::Shl) 737 Scale = 1 << Scale; 738 739 return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth); 740 } 741 case Instruction::GetElementPtr: { 742 // Scan the GEP. We check it if it contains constant offsets and at most 743 // one variable offset. 744 int VariableOperand = -1; 745 unsigned VariableScale = 0; 746 747 int64_t ConstantOffset = 0; 748 const TargetData *TD = TLI.getTargetData(); 749 gep_type_iterator GTI = gep_type_begin(AddrInst); 750 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { 751 if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 752 const StructLayout *SL = TD->getStructLayout(STy); 753 unsigned Idx = 754 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); 755 ConstantOffset += SL->getElementOffset(Idx); 756 } else { 757 uint64_t TypeSize = TD->getABITypeSize(GTI.getIndexedType()); 758 if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) { 759 ConstantOffset += CI->getSExtValue()*TypeSize; 760 } else if (TypeSize) { // Scales of zero don't do anything. 761 // We only allow one variable index at the moment. 762 if (VariableOperand != -1) 763 return false; 764 765 // Remember the variable index. 766 VariableOperand = i; 767 VariableScale = TypeSize; 768 } 769 } 770 } 771 772 // A common case is for the GEP to only do a constant offset. In this case, 773 // just add it to the disp field and check validity. 774 if (VariableOperand == -1) { 775 AddrMode.BaseOffs += ConstantOffset; 776 if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){ 777 // Check to see if we can fold the base pointer in too. 778 if (MatchAddr(AddrInst->getOperand(0), Depth+1)) 779 return true; 780 } 781 AddrMode.BaseOffs -= ConstantOffset; 782 return false; 783 } 784 785 // Save the valid addressing mode in case we can't match. 786 ExtAddrMode BackupAddrMode = AddrMode; 787 788 // Check that this has no base reg yet. If so, we won't have a place to 789 // put the base of the GEP (assuming it is not a null ptr). 790 bool SetBaseReg = true; 791 if (isa<ConstantPointerNull>(AddrInst->getOperand(0))) 792 SetBaseReg = false; // null pointer base doesn't need representation. 793 else if (AddrMode.HasBaseReg) 794 return false; // Base register already specified, can't match GEP. 795 else { 796 // Otherwise, we'll use the GEP base as the BaseReg. 797 AddrMode.HasBaseReg = true; 798 AddrMode.BaseReg = AddrInst->getOperand(0); 799 } 800 801 // See if the scale and offset amount is valid for this target. 802 AddrMode.BaseOffs += ConstantOffset; 803 804 if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale, 805 Depth)) { 806 AddrMode = BackupAddrMode; 807 return false; 808 } 809 810 // If we have a null as the base of the GEP, folding in the constant offset 811 // plus variable scale is all we can do. 812 if (!SetBaseReg) return true; 813 814 // If this match succeeded, we know that we can form an address with the 815 // GepBase as the basereg. Match the base pointer of the GEP more 816 // aggressively by zeroing out BaseReg and rematching. If the base is 817 // (for example) another GEP, this allows merging in that other GEP into 818 // the addressing mode we're forming. 819 AddrMode.HasBaseReg = false; 820 AddrMode.BaseReg = 0; 821 bool Success = MatchAddr(AddrInst->getOperand(0), Depth+1); 822 assert(Success && "MatchAddr should be able to fill in BaseReg!"); 823 Success=Success; 824 return true; 825 } 826 } 827 return false; 828} 829 830/// MatchAddr - If we can, try to add the value of 'Addr' into the current 831/// addressing mode. If Addr can't be added to AddrMode this returns false and 832/// leaves AddrMode unmodified. This assumes that Addr is either a pointer type 833/// or intptr_t for the target. 834/// 835bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { 836 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) { 837 // Fold in immediates if legal for the target. 838 AddrMode.BaseOffs += CI->getSExtValue(); 839 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 840 return true; 841 AddrMode.BaseOffs -= CI->getSExtValue(); 842 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) { 843 // If this is a global variable, try to fold it into the addressing mode. 844 if (AddrMode.BaseGV == 0) { 845 AddrMode.BaseGV = GV; 846 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 847 return true; 848 AddrMode.BaseGV = 0; 849 } 850 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) { 851 ExtAddrMode BackupAddrMode = AddrMode; 852 unsigned OldSize = AddrModeInsts.size(); 853 854 // Check to see if it is possible to fold this operation. 855 if (MatchOperationAddr(I, I->getOpcode(), Depth)) { 856 // Okay, it's possible to fold this. Check to see if it is actually 857 // *profitable* to do so. We use a simple cost model to avoid increasing 858 // register pressure too much. 859 if (I->hasOneUse() || 860 IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) { 861 AddrModeInsts.push_back(I); 862 return true; 863 } 864 865 // It isn't profitable to do this, roll back. 866 //cerr << "NOT FOLDING: " << *I; 867 AddrMode = BackupAddrMode; 868 AddrModeInsts.resize(OldSize); 869 } 870 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) { 871 if (MatchOperationAddr(CE, CE->getOpcode(), Depth)) 872 return true; 873 } else if (isa<ConstantPointerNull>(Addr)) { 874 // Null pointer gets folded without affecting the addressing mode. 875 return true; 876 } 877 878 // Worse case, the target should support [reg] addressing modes. :) 879 if (!AddrMode.HasBaseReg) { 880 AddrMode.HasBaseReg = true; 881 AddrMode.BaseReg = Addr; 882 // Still check for legality in case the target supports [imm] but not [i+r]. 883 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 884 return true; 885 AddrMode.HasBaseReg = false; 886 AddrMode.BaseReg = 0; 887 } 888 889 // If the base register is already taken, see if we can do [r+r]. 890 if (AddrMode.Scale == 0) { 891 AddrMode.Scale = 1; 892 AddrMode.ScaledReg = Addr; 893 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 894 return true; 895 AddrMode.Scale = 0; 896 AddrMode.ScaledReg = 0; 897 } 898 // Couldn't match. 899 return false; 900} 901 902/// FindAllMemoryUses - Recursively walk all the uses of I until we find a 903/// memory use. If we find an obviously non-foldable instruction, return true. 904/// Add the ultimately found memory instructions to MemoryUses. 905static bool FindAllMemoryUses(Instruction *I, 906 SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses, 907 SmallPtrSet<Instruction*, 16> &ConsideredInsts) { 908 // If we already considered this instruction, we're done. 909 if (!ConsideredInsts.insert(I)) 910 return false; 911 912 // If this is an obviously unfoldable instruction, bail out. 913 if (!MightBeFoldableInst(I)) 914 return true; 915 916 // Loop over all the uses, recursively processing them. 917 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); 918 UI != E; ++UI) { 919 if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { 920 MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo())); 921 continue; 922 } 923 924 if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) { 925 if (UI.getOperandNo() == 0) return true; // Storing addr, not into addr. 926 MemoryUses.push_back(std::make_pair(SI, UI.getOperandNo())); 927 continue; 928 } 929 930 if (CallInst *CI = dyn_cast<CallInst>(*UI)) { 931 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue()); 932 if (IA == 0) return true; 933 934 935 // FIXME: HANDLE MEM OPS 936 //MemoryUses.push_back(std::make_pair(CI, UI.getOperandNo())); 937 return true; 938 } 939 940 if (FindAllMemoryUses(cast<Instruction>(*UI), MemoryUses, ConsideredInsts)) 941 return true; 942 } 943 944 return false; 945} 946 947 948/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at 949/// the use site that we're folding it into. If so, there is no cost to 950/// include it in the addressing mode. KnownLive1 and KnownLive2 are two values 951/// that we know are live at the instruction already. 952bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, 953 Value *KnownLive2) { 954 // If Val is either of the known-live values, we know it is live! 955 if (Val == 0 || Val == KnownLive1 || Val == KnownLive2) 956 return true; 957 958 // All values other than instructions and arguments (e.g. constants) are live. 959 if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true; 960 961 // If Val is a constant sized alloca in the entry block, it is live, this is 962 // true because it is just a reference to the stack/frame pointer, which is 963 // live for the whole function. 964 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val)) 965 if (AI->isStaticAlloca()) 966 return true; 967 968 // Check to see if this value is already used in the memory instruction's 969 // block. If so, it's already live into the block at the very least, so we 970 // can reasonably fold it. 971 BasicBlock *MemBB = MemoryInst->getParent(); 972 for (Value::use_iterator UI = Val->use_begin(), E = Val->use_end(); 973 UI != E; ++UI) 974 // We know that uses of arguments and instructions have to be instructions. 975 if (cast<Instruction>(*UI)->getParent() == MemBB) 976 return true; 977 978 return false; 979} 980 981 982 983#include "llvm/Support/CommandLine.h" 984cl::opt<bool> ENABLECRAZYHACK("enable-smarter-addr-folding", cl::Hidden); 985 986 987/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing 988/// mode of the machine to fold the specified instruction into a load or store 989/// that ultimately uses it. However, the specified instruction has multiple 990/// uses. Given this, it may actually increase register pressure to fold it 991/// into the load. For example, consider this code: 992/// 993/// X = ... 994/// Y = X+1 995/// use(Y) -> nonload/store 996/// Z = Y+1 997/// load Z 998/// 999/// In this case, Y has multiple uses, and can be folded into the load of Z 1000/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to 1001/// be live at the use(Y) line. If we don't fold Y into load Z, we use one 1002/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the 1003/// number of computations either. 1004/// 1005/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If 1006/// X was live across 'load Z' for other reasons, we actually *would* want to 1007/// fold the addressing mode in the Z case. This would make Y die earlier. 1008bool AddressingModeMatcher:: 1009IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, 1010 ExtAddrMode &AMAfter) { 1011 if (IgnoreProfitability || !ENABLECRAZYHACK) return true; 1012 1013 // AMBefore is the addressing mode before this instruction was folded into it, 1014 // and AMAfter is the addressing mode after the instruction was folded. Get 1015 // the set of registers referenced by AMAfter and subtract out those 1016 // referenced by AMBefore: this is the set of values which folding in this 1017 // address extends the lifetime of. 1018 // 1019 // Note that there are only two potential values being referenced here, 1020 // BaseReg and ScaleReg (global addresses are always available, as are any 1021 // folded immediates). 1022 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg; 1023 1024 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their 1025 // lifetime wasn't extended by adding this instruction. 1026 if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg)) 1027 BaseReg = 0; 1028 if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg)) 1029 ScaledReg = 0; 1030 1031 // If folding this instruction (and it's subexprs) didn't extend any live 1032 // ranges, we're ok with it. 1033 if (BaseReg == 0 && ScaledReg == 0) 1034 return true; 1035 1036 // If all uses of this instruction are ultimately load/store/inlineasm's, 1037 // check to see if their addressing modes will include this instruction. If 1038 // so, we can fold it into all uses, so it doesn't matter if it has multiple 1039 // uses. 1040 SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses; 1041 SmallPtrSet<Instruction*, 16> ConsideredInsts; 1042 if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts)) 1043 return false; // Has a non-memory, non-foldable use! 1044 1045 // Now that we know that all uses of this instruction are part of a chain of 1046 // computation involving only operations that could theoretically be folded 1047 // into a memory use, loop over each of these uses and see if they could 1048 // *actually* fold the instruction. 1049 SmallVector<Instruction*, 32> MatchedAddrModeInsts; 1050 for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) { 1051 Instruction *User = MemoryUses[i].first; 1052 unsigned OpNo = MemoryUses[i].second; 1053 1054 // Get the access type of this use. If the use isn't a pointer, we don't 1055 // know what it accesses. 1056 Value *Address = User->getOperand(OpNo); 1057 if (!isa<PointerType>(Address->getType())) 1058 return false; 1059 const Type *AddressAccessTy = 1060 cast<PointerType>(Address->getType())->getElementType(); 1061 1062 // Do a match against the root of this address, ignoring profitability. This 1063 // will tell us if the addressing mode for the memory operation will 1064 // *actually* cover the shared instruction. 1065 ExtAddrMode Result; 1066 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy, 1067 MemoryInst, Result); 1068 Matcher.IgnoreProfitability = true; 1069 bool Success = Matcher.MatchAddr(Address, 0); 1070 Success = Success; assert(Success && "Couldn't select *anything*?"); 1071 1072 // If the match didn't cover I, then it won't be shared by it. 1073 if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(), 1074 I) == MatchedAddrModeInsts.end()) 1075 return false; 1076 1077 MatchedAddrModeInsts.clear(); 1078 } 1079 1080 return true; 1081} 1082 1083 1084//===----------------------------------------------------------------------===// 1085// Memory Optimization 1086//===----------------------------------------------------------------------===// 1087 1088/// IsNonLocalValue - Return true if the specified values are defined in a 1089/// different basic block than BB. 1090static bool IsNonLocalValue(Value *V, BasicBlock *BB) { 1091 if (Instruction *I = dyn_cast<Instruction>(V)) 1092 return I->getParent() != BB; 1093 return false; 1094} 1095 1096/// OptimizeMemoryInst - Load and Store Instructions have often have 1097/// addressing modes that can do significant amounts of computation. As such, 1098/// instruction selection will try to get the load or store to do as much 1099/// computation as possible for the program. The problem is that isel can only 1100/// see within a single block. As such, we sink as much legal addressing mode 1101/// stuff into the block as possible. 1102/// 1103/// This method is used to optimize both load/store and inline asms with memory 1104/// operands. 1105bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, 1106 const Type *AccessTy, 1107 DenseMap<Value*,Value*> &SunkAddrs) { 1108 // Figure out what addressing mode will be built up for this operation. 1109 SmallVector<Instruction*, 16> AddrModeInsts; 1110 ExtAddrMode AddrMode = AddressingModeMatcher::Match(Addr, AccessTy,MemoryInst, 1111 AddrModeInsts, *TLI); 1112 1113 // Check to see if any of the instructions supersumed by this addr mode are 1114 // non-local to I's BB. 1115 bool AnyNonLocal = false; 1116 for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) { 1117 if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) { 1118 AnyNonLocal = true; 1119 break; 1120 } 1121 } 1122 1123 // If all the instructions matched are already in this BB, don't do anything. 1124 if (!AnyNonLocal) { 1125 DEBUG(cerr << "CGP: Found local addrmode: " << AddrMode << "\n"); 1126 return false; 1127 } 1128 1129 // Insert this computation right after this user. Since our caller is 1130 // scanning from the top of the BB to the bottom, reuse of the expr are 1131 // guaranteed to happen later. 1132 BasicBlock::iterator InsertPt = MemoryInst; 1133 1134 // Now that we determined the addressing expression we want to use and know 1135 // that we have to sink it into this block. Check to see if we have already 1136 // done this for some other load/store instr in this block. If so, reuse the 1137 // computation. 1138 Value *&SunkAddr = SunkAddrs[Addr]; 1139 if (SunkAddr) { 1140 DEBUG(cerr << "CGP: Reusing nonlocal addrmode: " << AddrMode << "\n"); 1141 if (SunkAddr->getType() != Addr->getType()) 1142 SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt); 1143 } else { 1144 DEBUG(cerr << "CGP: SINKING nonlocal addrmode: " << AddrMode << "\n"); 1145 const Type *IntPtrTy = TLI->getTargetData()->getIntPtrType(); 1146 1147 Value *Result = 0; 1148 // Start with the scale value. 1149 if (AddrMode.Scale) { 1150 Value *V = AddrMode.ScaledReg; 1151 if (V->getType() == IntPtrTy) { 1152 // done. 1153 } else if (isa<PointerType>(V->getType())) { 1154 V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt); 1155 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < 1156 cast<IntegerType>(V->getType())->getBitWidth()) { 1157 V = new TruncInst(V, IntPtrTy, "sunkaddr", InsertPt); 1158 } else { 1159 V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt); 1160 } 1161 if (AddrMode.Scale != 1) 1162 V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy, 1163 AddrMode.Scale), 1164 "sunkaddr", InsertPt); 1165 Result = V; 1166 } 1167 1168 // Add in the base register. 1169 if (AddrMode.BaseReg) { 1170 Value *V = AddrMode.BaseReg; 1171 if (V->getType() != IntPtrTy) 1172 V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt); 1173 if (Result) 1174 Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); 1175 else 1176 Result = V; 1177 } 1178 1179 // Add in the BaseGV if present. 1180 if (AddrMode.BaseGV) { 1181 Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, "sunkaddr", 1182 InsertPt); 1183 if (Result) 1184 Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); 1185 else 1186 Result = V; 1187 } 1188 1189 // Add in the Base Offset if present. 1190 if (AddrMode.BaseOffs) { 1191 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); 1192 if (Result) 1193 Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); 1194 else 1195 Result = V; 1196 } 1197 1198 if (Result == 0) 1199 SunkAddr = Constant::getNullValue(Addr->getType()); 1200 else 1201 SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt); 1202 } 1203 1204 MemoryInst->replaceUsesOfWith(Addr, SunkAddr); 1205 1206 if (Addr->use_empty()) 1207 EraseDeadInstructions(Addr); 1208 return true; 1209} 1210 1211/// OptimizeInlineAsmInst - If there are any memory operands, use 1212/// OptimizeMemoryInst to sink their address computing into the block when 1213/// possible / profitable. 1214bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS, 1215 DenseMap<Value*,Value*> &SunkAddrs) { 1216 bool MadeChange = false; 1217 InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); 1218 1219 // Do a prepass over the constraints, canonicalizing them, and building up the 1220 // ConstraintOperands list. 1221 std::vector<InlineAsm::ConstraintInfo> 1222 ConstraintInfos = IA->ParseConstraints(); 1223 1224 /// ConstraintOperands - Information about all of the constraints. 1225 std::vector<TargetLowering::AsmOperandInfo> ConstraintOperands; 1226 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. 1227 for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { 1228 ConstraintOperands. 1229 push_back(TargetLowering::AsmOperandInfo(ConstraintInfos[i])); 1230 TargetLowering::AsmOperandInfo &OpInfo = ConstraintOperands.back(); 1231 1232 // Compute the value type for each operand. 1233 switch (OpInfo.Type) { 1234 case InlineAsm::isOutput: 1235 if (OpInfo.isIndirect) 1236 OpInfo.CallOperandVal = CS.getArgument(ArgNo++); 1237 break; 1238 case InlineAsm::isInput: 1239 OpInfo.CallOperandVal = CS.getArgument(ArgNo++); 1240 break; 1241 case InlineAsm::isClobber: 1242 // Nothing to do. 1243 break; 1244 } 1245 1246 // Compute the constraint code and ConstraintType to use. 1247 TLI->ComputeConstraintToUse(OpInfo, SDValue(), 1248 OpInfo.ConstraintType == TargetLowering::C_Memory); 1249 1250 if (OpInfo.ConstraintType == TargetLowering::C_Memory && 1251 OpInfo.isIndirect) { 1252 Value *OpVal = OpInfo.CallOperandVal; 1253 MadeChange |= OptimizeMemoryInst(I, OpVal, OpVal->getType(), SunkAddrs); 1254 } 1255 } 1256 1257 return MadeChange; 1258} 1259 1260bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { 1261 BasicBlock *DefBB = I->getParent(); 1262 1263 // If both result of the {s|z}xt and its source are live out, rewrite all 1264 // other uses of the source with result of extension. 1265 Value *Src = I->getOperand(0); 1266 if (Src->hasOneUse()) 1267 return false; 1268 1269 // Only do this xform if truncating is free. 1270 if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType())) 1271 return false; 1272 1273 // Only safe to perform the optimization if the source is also defined in 1274 // this block. 1275 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent()) 1276 return false; 1277 1278 bool DefIsLiveOut = false; 1279 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); 1280 UI != E; ++UI) { 1281 Instruction *User = cast<Instruction>(*UI); 1282 1283 // Figure out which BB this ext is used in. 1284 BasicBlock *UserBB = User->getParent(); 1285 if (UserBB == DefBB) continue; 1286 DefIsLiveOut = true; 1287 break; 1288 } 1289 if (!DefIsLiveOut) 1290 return false; 1291 1292 // Make sure non of the uses are PHI nodes. 1293 for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); 1294 UI != E; ++UI) { 1295 Instruction *User = cast<Instruction>(*UI); 1296 BasicBlock *UserBB = User->getParent(); 1297 if (UserBB == DefBB) continue; 1298 // Be conservative. We don't want this xform to end up introducing 1299 // reloads just before load / store instructions. 1300 if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User)) 1301 return false; 1302 } 1303 1304 // InsertedTruncs - Only insert one trunc in each block once. 1305 DenseMap<BasicBlock*, Instruction*> InsertedTruncs; 1306 1307 bool MadeChange = false; 1308 for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); 1309 UI != E; ++UI) { 1310 Use &TheUse = UI.getUse(); 1311 Instruction *User = cast<Instruction>(*UI); 1312 1313 // Figure out which BB this ext is used in. 1314 BasicBlock *UserBB = User->getParent(); 1315 if (UserBB == DefBB) continue; 1316 1317 // Both src and def are live in this block. Rewrite the use. 1318 Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; 1319 1320 if (!InsertedTrunc) { 1321 BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); 1322 1323 InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt); 1324 } 1325 1326 // Replace a use of the {s|z}ext source with a use of the result. 1327 TheUse = InsertedTrunc; 1328 1329 MadeChange = true; 1330 } 1331 1332 return MadeChange; 1333} 1334 1335// In this pass we look for GEP and cast instructions that are used 1336// across basic blocks and rewrite them to improve basic-block-at-a-time 1337// selection. 1338bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { 1339 bool MadeChange = false; 1340 1341 // Split all critical edges where the dest block has a PHI and where the phi 1342 // has shared immediate operands. 1343 TerminatorInst *BBTI = BB.getTerminator(); 1344 if (BBTI->getNumSuccessors() > 1) { 1345 for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) 1346 if (isa<PHINode>(BBTI->getSuccessor(i)->begin()) && 1347 isCriticalEdge(BBTI, i, true)) 1348 SplitEdgeNicely(BBTI, i, this); 1349 } 1350 1351 1352 // Keep track of non-local addresses that have been sunk into this block. 1353 // This allows us to avoid inserting duplicate code for blocks with multiple 1354 // load/stores of the same address. 1355 DenseMap<Value*, Value*> SunkAddrs; 1356 1357 for (BasicBlock::iterator BBI = BB.begin(), E = BB.end(); BBI != E; ) { 1358 Instruction *I = BBI++; 1359 1360 if (CastInst *CI = dyn_cast<CastInst>(I)) { 1361 // If the source of the cast is a constant, then this should have 1362 // already been constant folded. The only reason NOT to constant fold 1363 // it is if something (e.g. LSR) was careful to place the constant 1364 // evaluation in a block other than then one that uses it (e.g. to hoist 1365 // the address of globals out of a loop). If this is the case, we don't 1366 // want to forward-subst the cast. 1367 if (isa<Constant>(CI->getOperand(0))) 1368 continue; 1369 1370 bool Change = false; 1371 if (TLI) { 1372 Change = OptimizeNoopCopyExpression(CI, *TLI); 1373 MadeChange |= Change; 1374 } 1375 1376 if (!Change && (isa<ZExtInst>(I) || isa<SExtInst>(I))) 1377 MadeChange |= OptimizeExtUses(I); 1378 } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) { 1379 MadeChange |= OptimizeCmpExpression(CI); 1380 } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 1381 if (TLI) 1382 MadeChange |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType(), 1383 SunkAddrs); 1384 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 1385 if (TLI) 1386 MadeChange |= OptimizeMemoryInst(I, SI->getOperand(1), 1387 SI->getOperand(0)->getType(), 1388 SunkAddrs); 1389 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { 1390 if (GEPI->hasAllZeroIndices()) { 1391 /// The GEP operand must be a pointer, so must its result -> BitCast 1392 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), 1393 GEPI->getName(), GEPI); 1394 GEPI->replaceAllUsesWith(NC); 1395 GEPI->eraseFromParent(); 1396 MadeChange = true; 1397 BBI = NC; 1398 } 1399 } else if (CallInst *CI = dyn_cast<CallInst>(I)) { 1400 // If we found an inline asm expession, and if the target knows how to 1401 // lower it to normal LLVM code, do so now. 1402 if (TLI && isa<InlineAsm>(CI->getCalledValue())) 1403 if (const TargetAsmInfo *TAI = 1404 TLI->getTargetMachine().getTargetAsmInfo()) { 1405 if (TAI->ExpandInlineAsm(CI)) 1406 BBI = BB.begin(); 1407 else 1408 // Sink address computing for memory operands into the block. 1409 MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs); 1410 } 1411 } 1412 } 1413 1414 return MadeChange; 1415} 1416