CodeGenPrepare.cpp revision bb3204a440e3cf5f9bd44f05d34f99b7450341e6
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass munges the code in the input function to better prepare it for 11// SelectionDAG-based code generation. This works around limitations in it's 12// basic-block-at-a-time approach. It should eventually be removed. 13// 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "codegenprepare" 17#include "llvm/Transforms/Scalar.h" 18#include "llvm/Constants.h" 19#include "llvm/DerivedTypes.h" 20#include "llvm/Function.h" 21#include "llvm/InlineAsm.h" 22#include "llvm/Instructions.h" 23#include "llvm/Pass.h" 24#include "llvm/Target/TargetAsmInfo.h" 25#include "llvm/Target/TargetData.h" 26#include "llvm/Target/TargetLowering.h" 27#include "llvm/Target/TargetMachine.h" 28#include "llvm/Transforms/Utils/BasicBlockUtils.h" 29#include "llvm/Transforms/Utils/Local.h" 30#include "llvm/ADT/DenseMap.h" 31#include "llvm/ADT/SmallSet.h" 32#include "llvm/Support/CallSite.h" 33#include "llvm/Support/Compiler.h" 34#include "llvm/Support/Debug.h" 35#include "llvm/Support/GetElementPtrTypeIterator.h" 36#include "llvm/Support/PatternMatch.h" 37using namespace llvm; 38using namespace llvm::PatternMatch; 39 40namespace { 41 class VISIBILITY_HIDDEN CodeGenPrepare : public FunctionPass { 42 /// TLI - Keep a pointer of a TargetLowering to consult for determining 43 /// transformation profitability. 44 const TargetLowering *TLI; 45 public: 46 static char ID; // Pass identification, replacement for typeid 47 explicit CodeGenPrepare(const TargetLowering *tli = 0) 48 : FunctionPass(&ID), TLI(tli) {} 49 bool runOnFunction(Function &F); 50 51 private: 52 bool EliminateMostlyEmptyBlocks(Function &F); 53 bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; 54 void EliminateMostlyEmptyBlock(BasicBlock *BB); 55 bool OptimizeBlock(BasicBlock &BB); 56 bool OptimizeLoadStoreInst(Instruction *I, Value *Addr, 57 const Type *AccessTy, 58 DenseMap<Value*,Value*> &SunkAddrs); 59 bool OptimizeInlineAsmInst(Instruction *I, CallSite CS, 60 DenseMap<Value*,Value*> &SunkAddrs); 61 bool OptimizeExtUses(Instruction *I); 62 }; 63} 64 65char CodeGenPrepare::ID = 0; 66static RegisterPass<CodeGenPrepare> X("codegenprepare", 67 "Optimize for code generation"); 68 69FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) { 70 return new CodeGenPrepare(TLI); 71} 72 73 74bool CodeGenPrepare::runOnFunction(Function &F) { 75 bool EverMadeChange = false; 76 77 // First pass, eliminate blocks that contain only PHI nodes and an 78 // unconditional branch. 79 EverMadeChange |= EliminateMostlyEmptyBlocks(F); 80 81 bool MadeChange = true; 82 while (MadeChange) { 83 MadeChange = false; 84 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) 85 MadeChange |= OptimizeBlock(*BB); 86 EverMadeChange |= MadeChange; 87 } 88 return EverMadeChange; 89} 90 91/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes 92/// and an unconditional branch. Passes before isel (e.g. LSR/loopsimplify) 93/// often split edges in ways that are non-optimal for isel. Start by 94/// eliminating these blocks so we can split them the way we want them. 95bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) { 96 bool MadeChange = false; 97 // Note that this intentionally skips the entry block. 98 for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) { 99 BasicBlock *BB = I++; 100 101 // If this block doesn't end with an uncond branch, ignore it. 102 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); 103 if (!BI || !BI->isUnconditional()) 104 continue; 105 106 // If the instruction before the branch isn't a phi node, then other stuff 107 // is happening here. 108 BasicBlock::iterator BBI = BI; 109 if (BBI != BB->begin()) { 110 --BBI; 111 if (!isa<PHINode>(BBI)) continue; 112 } 113 114 // Do not break infinite loops. 115 BasicBlock *DestBB = BI->getSuccessor(0); 116 if (DestBB == BB) 117 continue; 118 119 if (!CanMergeBlocks(BB, DestBB)) 120 continue; 121 122 EliminateMostlyEmptyBlock(BB); 123 MadeChange = true; 124 } 125 return MadeChange; 126} 127 128/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a 129/// single uncond branch between them, and BB contains no other non-phi 130/// instructions. 131bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, 132 const BasicBlock *DestBB) const { 133 // We only want to eliminate blocks whose phi nodes are used by phi nodes in 134 // the successor. If there are more complex condition (e.g. preheaders), 135 // don't mess around with them. 136 BasicBlock::const_iterator BBI = BB->begin(); 137 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 138 for (Value::use_const_iterator UI = PN->use_begin(), E = PN->use_end(); 139 UI != E; ++UI) { 140 const Instruction *User = cast<Instruction>(*UI); 141 if (User->getParent() != DestBB || !isa<PHINode>(User)) 142 return false; 143 // If User is inside DestBB block and it is a PHINode then check 144 // incoming value. If incoming value is not from BB then this is 145 // a complex condition (e.g. preheaders) we want to avoid here. 146 if (User->getParent() == DestBB) { 147 if (const PHINode *UPN = dyn_cast<PHINode>(User)) 148 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { 149 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I)); 150 if (Insn && Insn->getParent() == BB && 151 Insn->getParent() != UPN->getIncomingBlock(I)) 152 return false; 153 } 154 } 155 } 156 } 157 158 // If BB and DestBB contain any common predecessors, then the phi nodes in BB 159 // and DestBB may have conflicting incoming values for the block. If so, we 160 // can't merge the block. 161 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin()); 162 if (!DestBBPN) return true; // no conflict. 163 164 // Collect the preds of BB. 165 SmallPtrSet<const BasicBlock*, 16> BBPreds; 166 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 167 // It is faster to get preds from a PHI than with pred_iterator. 168 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 169 BBPreds.insert(BBPN->getIncomingBlock(i)); 170 } else { 171 BBPreds.insert(pred_begin(BB), pred_end(BB)); 172 } 173 174 // Walk the preds of DestBB. 175 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { 176 BasicBlock *Pred = DestBBPN->getIncomingBlock(i); 177 if (BBPreds.count(Pred)) { // Common predecessor? 178 BBI = DestBB->begin(); 179 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 180 const Value *V1 = PN->getIncomingValueForBlock(Pred); 181 const Value *V2 = PN->getIncomingValueForBlock(BB); 182 183 // If V2 is a phi node in BB, look up what the mapped value will be. 184 if (const PHINode *V2PN = dyn_cast<PHINode>(V2)) 185 if (V2PN->getParent() == BB) 186 V2 = V2PN->getIncomingValueForBlock(Pred); 187 188 // If there is a conflict, bail out. 189 if (V1 != V2) return false; 190 } 191 } 192 } 193 194 return true; 195} 196 197 198/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and 199/// an unconditional branch in it. 200void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { 201 BranchInst *BI = cast<BranchInst>(BB->getTerminator()); 202 BasicBlock *DestBB = BI->getSuccessor(0); 203 204 DOUT << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB; 205 206 // If the destination block has a single pred, then this is a trivial edge, 207 // just collapse it. 208 if (DestBB->getSinglePredecessor()) { 209 // If DestBB has single-entry PHI nodes, fold them. 210 while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { 211 Value *NewVal = PN->getIncomingValue(0); 212 // Replace self referencing PHI with undef, it must be dead. 213 if (NewVal == PN) NewVal = UndefValue::get(PN->getType()); 214 PN->replaceAllUsesWith(NewVal); 215 PN->eraseFromParent(); 216 } 217 218 // Splice all the PHI nodes from BB over to DestBB. 219 DestBB->getInstList().splice(DestBB->begin(), BB->getInstList(), 220 BB->begin(), BI); 221 222 // Anything that branched to BB now branches to DestBB. 223 BB->replaceAllUsesWith(DestBB); 224 225 // Nuke BB. 226 BB->eraseFromParent(); 227 228 DOUT << "AFTER:\n" << *DestBB << "\n\n\n"; 229 return; 230 } 231 232 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB 233 // to handle the new incoming edges it is about to have. 234 PHINode *PN; 235 for (BasicBlock::iterator BBI = DestBB->begin(); 236 (PN = dyn_cast<PHINode>(BBI)); ++BBI) { 237 // Remove the incoming value for BB, and remember it. 238 Value *InVal = PN->removeIncomingValue(BB, false); 239 240 // Two options: either the InVal is a phi node defined in BB or it is some 241 // value that dominates BB. 242 PHINode *InValPhi = dyn_cast<PHINode>(InVal); 243 if (InValPhi && InValPhi->getParent() == BB) { 244 // Add all of the input values of the input PHI as inputs of this phi. 245 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) 246 PN->addIncoming(InValPhi->getIncomingValue(i), 247 InValPhi->getIncomingBlock(i)); 248 } else { 249 // Otherwise, add one instance of the dominating value for each edge that 250 // we will be adding. 251 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 252 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 253 PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); 254 } else { 255 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) 256 PN->addIncoming(InVal, *PI); 257 } 258 } 259 } 260 261 // The PHIs are now updated, change everything that refers to BB to use 262 // DestBB and remove BB. 263 BB->replaceAllUsesWith(DestBB); 264 BB->eraseFromParent(); 265 266 DOUT << "AFTER:\n" << *DestBB << "\n\n\n"; 267} 268 269 270/// SplitEdgeNicely - Split the critical edge from TI to its specified 271/// successor if it will improve codegen. We only do this if the successor has 272/// phi nodes (otherwise critical edges are ok). If there is already another 273/// predecessor of the succ that is empty (and thus has no phi nodes), use it 274/// instead of introducing a new block. 275static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, Pass *P) { 276 BasicBlock *TIBB = TI->getParent(); 277 BasicBlock *Dest = TI->getSuccessor(SuccNum); 278 assert(isa<PHINode>(Dest->begin()) && 279 "This should only be called if Dest has a PHI!"); 280 281 // As a hack, never split backedges of loops. Even though the copy for any 282 // PHIs inserted on the backedge would be dead for exits from the loop, we 283 // assume that the cost of *splitting* the backedge would be too high. 284 if (Dest == TIBB) 285 return; 286 287 /// TIPHIValues - This array is lazily computed to determine the values of 288 /// PHIs in Dest that TI would provide. 289 SmallVector<Value*, 32> TIPHIValues; 290 291 // Check to see if Dest has any blocks that can be used as a split edge for 292 // this terminator. 293 for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) { 294 BasicBlock *Pred = *PI; 295 // To be usable, the pred has to end with an uncond branch to the dest. 296 BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator()); 297 if (!PredBr || !PredBr->isUnconditional() || 298 // Must be empty other than the branch. 299 &Pred->front() != PredBr || 300 // Cannot be the entry block; its label does not get emitted. 301 Pred == &(Dest->getParent()->getEntryBlock())) 302 continue; 303 304 // Finally, since we know that Dest has phi nodes in it, we have to make 305 // sure that jumping to Pred will have the same affect as going to Dest in 306 // terms of PHI values. 307 PHINode *PN; 308 unsigned PHINo = 0; 309 bool FoundMatch = true; 310 for (BasicBlock::iterator I = Dest->begin(); 311 (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) { 312 if (PHINo == TIPHIValues.size()) 313 TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB)); 314 315 // If the PHI entry doesn't work, we can't use this pred. 316 if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) { 317 FoundMatch = false; 318 break; 319 } 320 } 321 322 // If we found a workable predecessor, change TI to branch to Succ. 323 if (FoundMatch) { 324 Dest->removePredecessor(TIBB); 325 TI->setSuccessor(SuccNum, Pred); 326 return; 327 } 328 } 329 330 SplitCriticalEdge(TI, SuccNum, P, true); 331} 332 333/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop 334/// copy (e.g. it's casting from one pointer type to another, int->uint, or 335/// int->sbyte on PPC), sink it into user blocks to reduce the number of virtual 336/// registers that must be created and coalesced. 337/// 338/// Return true if any changes are made. 339/// 340static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ 341 // If this is a noop copy, 342 MVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); 343 MVT DstVT = TLI.getValueType(CI->getType()); 344 345 // This is an fp<->int conversion? 346 if (SrcVT.isInteger() != DstVT.isInteger()) 347 return false; 348 349 // If this is an extension, it will be a zero or sign extension, which 350 // isn't a noop. 351 if (SrcVT.bitsLT(DstVT)) return false; 352 353 // If these values will be promoted, find out what they will be promoted 354 // to. This helps us consider truncates on PPC as noop copies when they 355 // are. 356 if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote) 357 SrcVT = TLI.getTypeToTransformTo(SrcVT); 358 if (TLI.getTypeAction(DstVT) == TargetLowering::Promote) 359 DstVT = TLI.getTypeToTransformTo(DstVT); 360 361 // If, after promotion, these are the same types, this is a noop copy. 362 if (SrcVT != DstVT) 363 return false; 364 365 BasicBlock *DefBB = CI->getParent(); 366 367 /// InsertedCasts - Only insert a cast in each block once. 368 DenseMap<BasicBlock*, CastInst*> InsertedCasts; 369 370 bool MadeChange = false; 371 for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); 372 UI != E; ) { 373 Use &TheUse = UI.getUse(); 374 Instruction *User = cast<Instruction>(*UI); 375 376 // Figure out which BB this cast is used in. For PHI's this is the 377 // appropriate predecessor block. 378 BasicBlock *UserBB = User->getParent(); 379 if (PHINode *PN = dyn_cast<PHINode>(User)) { 380 unsigned OpVal = UI.getOperandNo()/2; 381 UserBB = PN->getIncomingBlock(OpVal); 382 } 383 384 // Preincrement use iterator so we don't invalidate it. 385 ++UI; 386 387 // If this user is in the same block as the cast, don't change the cast. 388 if (UserBB == DefBB) continue; 389 390 // If we have already inserted a cast into this block, use it. 391 CastInst *&InsertedCast = InsertedCasts[UserBB]; 392 393 if (!InsertedCast) { 394 BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); 395 396 InsertedCast = 397 CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", 398 InsertPt); 399 MadeChange = true; 400 } 401 402 // Replace a use of the cast with a use of the new cast. 403 TheUse = InsertedCast; 404 } 405 406 // If we removed all uses, nuke the cast. 407 if (CI->use_empty()) { 408 CI->eraseFromParent(); 409 MadeChange = true; 410 } 411 412 return MadeChange; 413} 414 415/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce 416/// the number of virtual registers that must be created and coalesced. This is 417/// a clear win except on targets with multiple condition code registers 418/// (PowerPC), where it might lose; some adjustment may be wanted there. 419/// 420/// Return true if any changes are made. 421static bool OptimizeCmpExpression(CmpInst *CI) { 422 BasicBlock *DefBB = CI->getParent(); 423 424 /// InsertedCmp - Only insert a cmp in each block once. 425 DenseMap<BasicBlock*, CmpInst*> InsertedCmps; 426 427 bool MadeChange = false; 428 for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); 429 UI != E; ) { 430 Use &TheUse = UI.getUse(); 431 Instruction *User = cast<Instruction>(*UI); 432 433 // Preincrement use iterator so we don't invalidate it. 434 ++UI; 435 436 // Don't bother for PHI nodes. 437 if (isa<PHINode>(User)) 438 continue; 439 440 // Figure out which BB this cmp is used in. 441 BasicBlock *UserBB = User->getParent(); 442 443 // If this user is in the same block as the cmp, don't change the cmp. 444 if (UserBB == DefBB) continue; 445 446 // If we have already inserted a cmp into this block, use it. 447 CmpInst *&InsertedCmp = InsertedCmps[UserBB]; 448 449 if (!InsertedCmp) { 450 BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); 451 452 InsertedCmp = 453 CmpInst::Create(CI->getOpcode(), CI->getPredicate(), CI->getOperand(0), 454 CI->getOperand(1), "", InsertPt); 455 MadeChange = true; 456 } 457 458 // Replace a use of the cmp with a use of the new cmp. 459 TheUse = InsertedCmp; 460 } 461 462 // If we removed all uses, nuke the cmp. 463 if (CI->use_empty()) 464 CI->eraseFromParent(); 465 466 return MadeChange; 467} 468 469/// EraseDeadInstructions - Erase any dead instructions, recursively. 470static void EraseDeadInstructions(Value *V) { 471 Instruction *I = dyn_cast<Instruction>(V); 472 if (!I || !I->use_empty()) return; 473 474 SmallPtrSet<Instruction*, 16> Insts; 475 Insts.insert(I); 476 477 while (!Insts.empty()) { 478 I = *Insts.begin(); 479 Insts.erase(I); 480 if (isInstructionTriviallyDead(I)) { 481 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) 482 if (Instruction *U = dyn_cast<Instruction>(I->getOperand(i))) 483 Insts.insert(U); 484 I->eraseFromParent(); 485 } 486 } 487} 488 489namespace { 490 /// ExtAddrMode - This is an extended version of TargetLowering::AddrMode 491 /// which holds actual Value*'s for register values. 492 struct ExtAddrMode : public TargetLowering::AddrMode { 493 Value *BaseReg; 494 Value *ScaledReg; 495 ExtAddrMode() : BaseReg(0), ScaledReg(0) {} 496 void print(OStream &OS) const; 497 void dump() const { 498 print(cerr); 499 cerr << '\n'; 500 } 501 }; 502} // end anonymous namespace 503 504static OStream &operator<<(OStream &OS, const ExtAddrMode &AM) { 505 AM.print(OS); 506 return OS; 507} 508 509 510void ExtAddrMode::print(OStream &OS) const { 511 bool NeedPlus = false; 512 OS << "["; 513 if (BaseGV) 514 OS << (NeedPlus ? " + " : "") 515 << "GV:%" << BaseGV->getName(), NeedPlus = true; 516 517 if (BaseOffs) 518 OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true; 519 520 if (BaseReg) 521 OS << (NeedPlus ? " + " : "") 522 << "Base:%" << BaseReg->getName(), NeedPlus = true; 523 if (Scale) 524 OS << (NeedPlus ? " + " : "") 525 << Scale << "*%" << ScaledReg->getName(), NeedPlus = true; 526 527 OS << ']'; 528} 529 530/// TryMatchingScaledValue - Try adding ScaleReg*Scale to the specified 531/// addressing mode. Return true if this addr mode is legal for the target, 532/// false if not. 533static bool TryMatchingScaledValue(Value *ScaleReg, int64_t Scale, 534 const Type *AccessTy, ExtAddrMode &AddrMode, 535 SmallVectorImpl<Instruction*> &AddrModeInsts, 536 const TargetLowering &TLI, unsigned Depth) { 537 // If we already have a scale of this value, we can add to it, otherwise, we 538 // need an available scale field. 539 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg) 540 return false; 541 542 ExtAddrMode TestAddrMode = AddrMode; 543 544 // Add scale to turn X*4+X*3 -> X*7. This could also do things like 545 // [A+B + A*7] -> [B+A*8]. 546 TestAddrMode.Scale += Scale; 547 TestAddrMode.ScaledReg = ScaleReg; 548 549 // If the new address isn't legal, bail out. 550 if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) 551 return false; 552 553 // It was legal, so commit it. 554 AddrMode = TestAddrMode; 555 556 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now 557 // to see if ScaleReg is actually X+C. If so, we can turn this into adding 558 // X*Scale + C*Scale to addr mode. 559 ConstantInt *CI; Value *AddLHS; 560 if (match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) { 561 TestAddrMode.ScaledReg = AddLHS; 562 TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale; 563 564 // If this addressing mode is legal, commit it and remember that we folded 565 // this instruction. 566 if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) { 567 AddrModeInsts.push_back(cast<Instruction>(ScaleReg)); 568 AddrMode = TestAddrMode; 569 } 570 } 571 572 // Otherwise, not (x+c)*scale, just return what we have. 573 return true; 574} 575 576static bool FindMaximalLegalAddressingMode(Value *Addr, const Type *AccessTy, 577 ExtAddrMode &AddrMode, 578 SmallVectorImpl<Instruction*> &AMI, 579 const TargetLowering &TLI, 580 unsigned Depth); 581 582/// FindMaximalLegalAddressingModeForOperation - Given an instruction or 583/// constant expr, see if we can fold the operation into the addressing mode. 584/// If so, update the addressing mode and return true, otherwise return false. 585static bool 586FindMaximalLegalAddressingModeForOperation(User *AddrInst, unsigned Opcode, 587 const Type *AccessTy, 588 ExtAddrMode &AddrMode, 589 SmallVectorImpl<Instruction*> &AddrModeInsts, 590 const TargetLowering &TLI, 591 unsigned Depth) { 592 switch (Opcode) { 593 case Instruction::PtrToInt: 594 // PtrToInt is always a noop, as we know that the int type is pointer sized. 595 if (FindMaximalLegalAddressingMode(AddrInst->getOperand(0), AccessTy, 596 AddrMode, AddrModeInsts, TLI, Depth)) 597 return true; 598 break; 599 case Instruction::IntToPtr: 600 // This inttoptr is a no-op if the integer type is pointer sized. 601 if (TLI.getValueType(AddrInst->getOperand(0)->getType()) == 602 TLI.getPointerTy()) { 603 if (FindMaximalLegalAddressingMode(AddrInst->getOperand(0), AccessTy, 604 AddrMode, AddrModeInsts, TLI, Depth)) 605 return true; 606 } 607 break; 608 case Instruction::Add: { 609 // Check to see if we can merge in the RHS then the LHS. If so, we win. 610 ExtAddrMode BackupAddrMode = AddrMode; 611 unsigned OldSize = AddrModeInsts.size(); 612 if (FindMaximalLegalAddressingMode(AddrInst->getOperand(1), AccessTy, 613 AddrMode, AddrModeInsts, TLI, Depth+1) && 614 FindMaximalLegalAddressingMode(AddrInst->getOperand(0), AccessTy, 615 AddrMode, AddrModeInsts, TLI, Depth+1)) 616 return true; 617 618 // Restore the old addr mode info. 619 AddrMode = BackupAddrMode; 620 AddrModeInsts.resize(OldSize); 621 622 // Otherwise this was over-aggressive. Try merging in the LHS then the RHS. 623 if (FindMaximalLegalAddressingMode(AddrInst->getOperand(0), AccessTy, 624 AddrMode, AddrModeInsts, TLI, Depth+1) && 625 FindMaximalLegalAddressingMode(AddrInst->getOperand(1), AccessTy, 626 AddrMode, AddrModeInsts, TLI, Depth+1)) 627 return true; 628 629 // Otherwise we definitely can't merge the ADD in. 630 AddrMode = BackupAddrMode; 631 AddrModeInsts.resize(OldSize); 632 break; 633 } 634 case Instruction::Or: { 635 //ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1)); 636 //if (!RHS) break; 637 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD. 638 break; 639 } 640 case Instruction::Mul: 641 case Instruction::Shl: { 642 // Can only handle X*C and X << C. 643 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1)); 644 if (!RHS) break; 645 int64_t Scale = RHS->getSExtValue(); 646 if (Opcode == Instruction::Shl) 647 Scale = 1 << Scale; 648 649 if (TryMatchingScaledValue(AddrInst->getOperand(0), Scale, AccessTy, 650 AddrMode, AddrModeInsts, TLI, Depth)) 651 return true; 652 break; 653 } 654 case Instruction::GetElementPtr: { 655 // Scan the GEP. We check it if it contains constant offsets and at most 656 // one variable offset. 657 int VariableOperand = -1; 658 unsigned VariableScale = 0; 659 660 int64_t ConstantOffset = 0; 661 const TargetData *TD = TLI.getTargetData(); 662 gep_type_iterator GTI = gep_type_begin(AddrInst); 663 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { 664 if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 665 const StructLayout *SL = TD->getStructLayout(STy); 666 unsigned Idx = 667 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); 668 ConstantOffset += SL->getElementOffset(Idx); 669 } else { 670 uint64_t TypeSize = TD->getABITypeSize(GTI.getIndexedType()); 671 if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) { 672 ConstantOffset += CI->getSExtValue()*TypeSize; 673 } else if (TypeSize) { // Scales of zero don't do anything. 674 // We only allow one variable index at the moment. 675 if (VariableOperand != -1) { 676 VariableOperand = -2; 677 break; 678 } 679 680 // Remember the variable index. 681 VariableOperand = i; 682 VariableScale = TypeSize; 683 } 684 } 685 } 686 687 // If the GEP had multiple variable indices, punt. 688 if (VariableOperand == -2) 689 break; 690 691 // A common case is for the GEP to only do a constant offset. In this case, 692 // just add it to the disp field and check validity. 693 if (VariableOperand == -1) { 694 AddrMode.BaseOffs += ConstantOffset; 695 if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){ 696 // Check to see if we can fold the base pointer in too. 697 if (FindMaximalLegalAddressingMode(AddrInst->getOperand(0), AccessTy, 698 AddrMode, AddrModeInsts, TLI, 699 Depth+1)) 700 return true; 701 } 702 AddrMode.BaseOffs -= ConstantOffset; 703 } else { 704 // Check that this has no base reg yet. If so, we won't have a place to 705 // put the base of the GEP (assuming it is not a null ptr). 706 bool SetBaseReg = false; 707 if (AddrMode.HasBaseReg) { 708 if (!isa<ConstantPointerNull>(AddrInst->getOperand(0))) 709 break; 710 } else { 711 AddrMode.HasBaseReg = true; 712 AddrMode.BaseReg = AddrInst->getOperand(0); 713 SetBaseReg = true; 714 } 715 716 // See if the scale amount is valid for this target. 717 AddrMode.BaseOffs += ConstantOffset; 718 if (TryMatchingScaledValue(AddrInst->getOperand(VariableOperand), 719 VariableScale, AccessTy, AddrMode, 720 AddrModeInsts, TLI, Depth)) { 721 if (!SetBaseReg) return true; 722 723 // If this match succeeded, we know that we can form an address with the 724 // GepBase as the basereg. See if we can match *more*. 725 AddrMode.HasBaseReg = false; 726 AddrMode.BaseReg = 0; 727 if (FindMaximalLegalAddressingMode(AddrInst->getOperand(0), AccessTy, 728 AddrMode, AddrModeInsts, TLI, 729 Depth+1)) 730 return true; 731 // Strange, shouldn't happen. Restore the base reg and succeed the easy 732 // way. 733 AddrMode.HasBaseReg = true; 734 AddrMode.BaseReg = AddrInst->getOperand(0); 735 return true; 736 } 737 738 AddrMode.BaseOffs -= ConstantOffset; 739 if (SetBaseReg) { 740 AddrMode.HasBaseReg = false; 741 AddrMode.BaseReg = 0; 742 } 743 } 744 break; 745 } 746 } 747 return false; 748} 749 750/// FindMaximalLegalAddressingMode - If we can, try to merge the computation of 751/// Addr into the specified addressing mode. If Addr can't be added to AddrMode 752/// this returns false. This assumes that Addr is either a pointer type or 753/// intptr_t for the target. 754/// 755/// This method is used to optimize both load/store and inline asms with memory 756/// operands. 757static bool FindMaximalLegalAddressingMode(Value *Addr, const Type *AccessTy, 758 ExtAddrMode &AddrMode, 759 SmallVectorImpl<Instruction*> &AddrModeInsts, 760 const TargetLowering &TLI, 761 unsigned Depth) { 762 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) { 763 // Fold in immediates if legal for the target. 764 AddrMode.BaseOffs += CI->getSExtValue(); 765 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 766 return true; 767 AddrMode.BaseOffs -= CI->getSExtValue(); 768 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) { 769 // If this is a global variable, fold it into the addressing mode if possible. 770 if (AddrMode.BaseGV == 0) { 771 AddrMode.BaseGV = GV; 772 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 773 return true; 774 AddrMode.BaseGV = 0; 775 } 776 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) { 777 if (Depth < 5 && // Limit recursion to avoid exponential behavior. 778 FindMaximalLegalAddressingModeForOperation(I, I->getOpcode(), 779 AccessTy, AddrMode, 780 AddrModeInsts, TLI, Depth)) { 781 AddrModeInsts.push_back(I); 782 return true; 783 } 784 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) { 785 if (Depth < 5 && // Limit recursion to avoid exponential behavior. 786 FindMaximalLegalAddressingModeForOperation(CE, CE->getOpcode(), 787 AccessTy, AddrMode, 788 AddrModeInsts, TLI, Depth)) 789 return true; 790 } else if (isa<ConstantPointerNull>(Addr)) { 791 return true; 792 } 793 794 795 // Worse case, the target should support [reg] addressing modes. :) 796 if (!AddrMode.HasBaseReg) { 797 AddrMode.HasBaseReg = true; 798 // Still check for legality in case the target supports [imm] but not [i+r]. 799 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) { 800 AddrMode.BaseReg = Addr; 801 return true; 802 } 803 AddrMode.HasBaseReg = false; 804 } 805 806 // If the base register is already taken, see if we can do [r+r]. 807 if (AddrMode.Scale == 0) { 808 AddrMode.Scale = 1; 809 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) { 810 AddrMode.ScaledReg = Addr; 811 return true; 812 } 813 AddrMode.Scale = 0; 814 } 815 // Couldn't match. 816 return false; 817} 818 819 820/// IsNonLocalValue - Return true if the specified values are defined in a 821/// different basic block than BB. 822static bool IsNonLocalValue(Value *V, BasicBlock *BB) { 823 if (Instruction *I = dyn_cast<Instruction>(V)) 824 return I->getParent() != BB; 825 return false; 826} 827 828/// OptimizeLoadStoreInst - Load and Store Instructions have often have 829/// addressing modes that can do significant amounts of computation. As such, 830/// instruction selection will try to get the load or store to do as much 831/// computation as possible for the program. The problem is that isel can only 832/// see within a single block. As such, we sink as much legal addressing mode 833/// stuff into the block as possible. 834bool CodeGenPrepare::OptimizeLoadStoreInst(Instruction *LdStInst, Value *Addr, 835 const Type *AccessTy, 836 DenseMap<Value*,Value*> &SunkAddrs) { 837 // Figure out what addressing mode will be built up for this operation. 838 SmallVector<Instruction*, 16> AddrModeInsts; 839 ExtAddrMode AddrMode; 840 bool Success = FindMaximalLegalAddressingMode(Addr, AccessTy, AddrMode, 841 AddrModeInsts, *TLI, 0); 842 Success = Success; assert(Success && "Couldn't select *anything*?"); 843 844 // Check to see if any of the instructions supersumed by this addr mode are 845 // non-local to I's BB. 846 bool AnyNonLocal = false; 847 for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) { 848 if (IsNonLocalValue(AddrModeInsts[i], LdStInst->getParent())) { 849 AnyNonLocal = true; 850 break; 851 } 852 } 853 854 // If all the instructions matched are already in this BB, don't do anything. 855 if (!AnyNonLocal) { 856 DEBUG(cerr << "CGP: Found local addrmode: " << AddrMode << "\n"); 857 return false; 858 } 859 860 // Insert this computation right after this user. Since our caller is 861 // scanning from the top of the BB to the bottom, reuse of the expr are 862 // guaranteed to happen later. 863 BasicBlock::iterator InsertPt = LdStInst; 864 865 // Now that we determined the addressing expression we want to use and know 866 // that we have to sink it into this block. Check to see if we have already 867 // done this for some other load/store instr in this block. If so, reuse the 868 // computation. 869 Value *&SunkAddr = SunkAddrs[Addr]; 870 if (SunkAddr) { 871 DEBUG(cerr << "CGP: Reusing nonlocal addrmode: " << AddrMode << "\n"); 872 if (SunkAddr->getType() != Addr->getType()) 873 SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt); 874 } else { 875 DEBUG(cerr << "CGP: SINKING nonlocal addrmode: " << AddrMode << "\n"); 876 const Type *IntPtrTy = TLI->getTargetData()->getIntPtrType(); 877 878 Value *Result = 0; 879 // Start with the scale value. 880 if (AddrMode.Scale) { 881 Value *V = AddrMode.ScaledReg; 882 if (V->getType() == IntPtrTy) { 883 // done. 884 } else if (isa<PointerType>(V->getType())) { 885 V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt); 886 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < 887 cast<IntegerType>(V->getType())->getBitWidth()) { 888 V = new TruncInst(V, IntPtrTy, "sunkaddr", InsertPt); 889 } else { 890 V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt); 891 } 892 if (AddrMode.Scale != 1) 893 V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy, 894 AddrMode.Scale), 895 "sunkaddr", InsertPt); 896 Result = V; 897 } 898 899 // Add in the base register. 900 if (AddrMode.BaseReg) { 901 Value *V = AddrMode.BaseReg; 902 if (V->getType() != IntPtrTy) 903 V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt); 904 if (Result) 905 Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); 906 else 907 Result = V; 908 } 909 910 // Add in the BaseGV if present. 911 if (AddrMode.BaseGV) { 912 Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, "sunkaddr", 913 InsertPt); 914 if (Result) 915 Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); 916 else 917 Result = V; 918 } 919 920 // Add in the Base Offset if present. 921 if (AddrMode.BaseOffs) { 922 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); 923 if (Result) 924 Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); 925 else 926 Result = V; 927 } 928 929 if (Result == 0) 930 SunkAddr = Constant::getNullValue(Addr->getType()); 931 else 932 SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt); 933 } 934 935 LdStInst->replaceUsesOfWith(Addr, SunkAddr); 936 937 if (Addr->use_empty()) 938 EraseDeadInstructions(Addr); 939 return true; 940} 941 942/// OptimizeInlineAsmInst - If there are any memory operands, use 943/// OptimizeLoadStoreInt to sink their address computing into the block when 944/// possible / profitable. 945bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS, 946 DenseMap<Value*,Value*> &SunkAddrs) { 947 bool MadeChange = false; 948 InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); 949 950 // Do a prepass over the constraints, canonicalizing them, and building up the 951 // ConstraintOperands list. 952 std::vector<InlineAsm::ConstraintInfo> 953 ConstraintInfos = IA->ParseConstraints(); 954 955 /// ConstraintOperands - Information about all of the constraints. 956 std::vector<TargetLowering::AsmOperandInfo> ConstraintOperands; 957 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. 958 for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { 959 ConstraintOperands. 960 push_back(TargetLowering::AsmOperandInfo(ConstraintInfos[i])); 961 TargetLowering::AsmOperandInfo &OpInfo = ConstraintOperands.back(); 962 963 // Compute the value type for each operand. 964 switch (OpInfo.Type) { 965 case InlineAsm::isOutput: 966 if (OpInfo.isIndirect) 967 OpInfo.CallOperandVal = CS.getArgument(ArgNo++); 968 break; 969 case InlineAsm::isInput: 970 OpInfo.CallOperandVal = CS.getArgument(ArgNo++); 971 break; 972 case InlineAsm::isClobber: 973 // Nothing to do. 974 break; 975 } 976 977 // Compute the constraint code and ConstraintType to use. 978 TLI->ComputeConstraintToUse(OpInfo, SDValue(), 979 OpInfo.ConstraintType == TargetLowering::C_Memory); 980 981 if (OpInfo.ConstraintType == TargetLowering::C_Memory && 982 OpInfo.isIndirect) { 983 Value *OpVal = OpInfo.CallOperandVal; 984 MadeChange |= OptimizeLoadStoreInst(I, OpVal, OpVal->getType(), 985 SunkAddrs); 986 } 987 } 988 989 return MadeChange; 990} 991 992bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { 993 BasicBlock *DefBB = I->getParent(); 994 995 // If both result of the {s|z}xt and its source are live out, rewrite all 996 // other uses of the source with result of extension. 997 Value *Src = I->getOperand(0); 998 if (Src->hasOneUse()) 999 return false; 1000 1001 // Only do this xform if truncating is free. 1002 if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType())) 1003 return false; 1004 1005 // Only safe to perform the optimization if the source is also defined in 1006 // this block. 1007 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent()) 1008 return false; 1009 1010 bool DefIsLiveOut = false; 1011 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); 1012 UI != E; ++UI) { 1013 Instruction *User = cast<Instruction>(*UI); 1014 1015 // Figure out which BB this ext is used in. 1016 BasicBlock *UserBB = User->getParent(); 1017 if (UserBB == DefBB) continue; 1018 DefIsLiveOut = true; 1019 break; 1020 } 1021 if (!DefIsLiveOut) 1022 return false; 1023 1024 // Make sure non of the uses are PHI nodes. 1025 for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); 1026 UI != E; ++UI) { 1027 Instruction *User = cast<Instruction>(*UI); 1028 BasicBlock *UserBB = User->getParent(); 1029 if (UserBB == DefBB) continue; 1030 // Be conservative. We don't want this xform to end up introducing 1031 // reloads just before load / store instructions. 1032 if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User)) 1033 return false; 1034 } 1035 1036 // InsertedTruncs - Only insert one trunc in each block once. 1037 DenseMap<BasicBlock*, Instruction*> InsertedTruncs; 1038 1039 bool MadeChange = false; 1040 for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); 1041 UI != E; ++UI) { 1042 Use &TheUse = UI.getUse(); 1043 Instruction *User = cast<Instruction>(*UI); 1044 1045 // Figure out which BB this ext is used in. 1046 BasicBlock *UserBB = User->getParent(); 1047 if (UserBB == DefBB) continue; 1048 1049 // Both src and def are live in this block. Rewrite the use. 1050 Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; 1051 1052 if (!InsertedTrunc) { 1053 BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); 1054 1055 InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt); 1056 } 1057 1058 // Replace a use of the {s|z}ext source with a use of the result. 1059 TheUse = InsertedTrunc; 1060 1061 MadeChange = true; 1062 } 1063 1064 return MadeChange; 1065} 1066 1067// In this pass we look for GEP and cast instructions that are used 1068// across basic blocks and rewrite them to improve basic-block-at-a-time 1069// selection. 1070bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { 1071 bool MadeChange = false; 1072 1073 // Split all critical edges where the dest block has a PHI and where the phi 1074 // has shared immediate operands. 1075 TerminatorInst *BBTI = BB.getTerminator(); 1076 if (BBTI->getNumSuccessors() > 1) { 1077 for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) 1078 if (isa<PHINode>(BBTI->getSuccessor(i)->begin()) && 1079 isCriticalEdge(BBTI, i, true)) 1080 SplitEdgeNicely(BBTI, i, this); 1081 } 1082 1083 1084 // Keep track of non-local addresses that have been sunk into this block. 1085 // This allows us to avoid inserting duplicate code for blocks with multiple 1086 // load/stores of the same address. 1087 DenseMap<Value*, Value*> SunkAddrs; 1088 1089 for (BasicBlock::iterator BBI = BB.begin(), E = BB.end(); BBI != E; ) { 1090 Instruction *I = BBI++; 1091 1092 if (CastInst *CI = dyn_cast<CastInst>(I)) { 1093 // If the source of the cast is a constant, then this should have 1094 // already been constant folded. The only reason NOT to constant fold 1095 // it is if something (e.g. LSR) was careful to place the constant 1096 // evaluation in a block other than then one that uses it (e.g. to hoist 1097 // the address of globals out of a loop). If this is the case, we don't 1098 // want to forward-subst the cast. 1099 if (isa<Constant>(CI->getOperand(0))) 1100 continue; 1101 1102 bool Change = false; 1103 if (TLI) { 1104 Change = OptimizeNoopCopyExpression(CI, *TLI); 1105 MadeChange |= Change; 1106 } 1107 1108 if (!Change && (isa<ZExtInst>(I) || isa<SExtInst>(I))) 1109 MadeChange |= OptimizeExtUses(I); 1110 } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) { 1111 MadeChange |= OptimizeCmpExpression(CI); 1112 } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 1113 if (TLI) 1114 MadeChange |= OptimizeLoadStoreInst(I, I->getOperand(0), LI->getType(), 1115 SunkAddrs); 1116 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 1117 if (TLI) 1118 MadeChange |= OptimizeLoadStoreInst(I, SI->getOperand(1), 1119 SI->getOperand(0)->getType(), 1120 SunkAddrs); 1121 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { 1122 if (GEPI->hasAllZeroIndices()) { 1123 /// The GEP operand must be a pointer, so must its result -> BitCast 1124 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), 1125 GEPI->getName(), GEPI); 1126 GEPI->replaceAllUsesWith(NC); 1127 GEPI->eraseFromParent(); 1128 MadeChange = true; 1129 BBI = NC; 1130 } 1131 } else if (CallInst *CI = dyn_cast<CallInst>(I)) { 1132 // If we found an inline asm expession, and if the target knows how to 1133 // lower it to normal LLVM code, do so now. 1134 if (TLI && isa<InlineAsm>(CI->getCalledValue())) 1135 if (const TargetAsmInfo *TAI = 1136 TLI->getTargetMachine().getTargetAsmInfo()) { 1137 if (TAI->ExpandInlineAsm(CI)) 1138 BBI = BB.begin(); 1139 else 1140 // Sink address computing for memory operands into the block. 1141 MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs); 1142 } 1143 } 1144 } 1145 1146 return MadeChange; 1147} 1148