CodeGenPrepare.cpp revision 9ec8095485c994522c9a50e16fc029de94c20476
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass munges the code in the input function to better prepare it for 11// SelectionDAG-based code generation. This works around limitations in it's 12// basic-block-at-a-time approach. It should eventually be removed. 13// 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "codegenprepare" 17#include "llvm/Transforms/Scalar.h" 18#include "llvm/Constants.h" 19#include "llvm/DerivedTypes.h" 20#include "llvm/Function.h" 21#include "llvm/InlineAsm.h" 22#include "llvm/Instructions.h" 23#include "llvm/Pass.h" 24#include "llvm/Target/TargetAsmInfo.h" 25#include "llvm/Target/TargetData.h" 26#include "llvm/Target/TargetLowering.h" 27#include "llvm/Target/TargetMachine.h" 28#include "llvm/Transforms/Utils/BasicBlockUtils.h" 29#include "llvm/Transforms/Utils/Local.h" 30#include "llvm/ADT/DenseMap.h" 31#include "llvm/ADT/SmallSet.h" 32#include "llvm/Support/CallSite.h" 33#include "llvm/Support/CommandLine.h" 34#include "llvm/Support/Compiler.h" 35#include "llvm/Support/Debug.h" 36#include "llvm/Support/GetElementPtrTypeIterator.h" 37using namespace llvm; 38 39namespace { 40 cl::opt<bool> OptExtUses("optimize-ext-uses", 41 cl::init(true), cl::Hidden); 42} 43 44namespace { 45 class VISIBILITY_HIDDEN CodeGenPrepare : public FunctionPass { 46 /// TLI - Keep a pointer of a TargetLowering to consult for determining 47 /// transformation profitability. 48 const TargetLowering *TLI; 49 public: 50 static char ID; // Pass identification, replacement for typeid 51 explicit CodeGenPrepare(const TargetLowering *tli = 0) 52 : FunctionPass((intptr_t)&ID), TLI(tli) {} 53 bool runOnFunction(Function &F); 54 55 private: 56 bool EliminateMostlyEmptyBlocks(Function &F); 57 bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; 58 void EliminateMostlyEmptyBlock(BasicBlock *BB); 59 bool OptimizeBlock(BasicBlock &BB); 60 bool OptimizeLoadStoreInst(Instruction *I, Value *Addr, 61 const Type *AccessTy, 62 DenseMap<Value*,Value*> &SunkAddrs); 63 bool OptimizeInlineAsmInst(Instruction *I, CallSite CS, 64 DenseMap<Value*,Value*> &SunkAddrs); 65 bool OptimizeExtUses(Instruction *I); 66 }; 67} 68 69char CodeGenPrepare::ID = 0; 70static RegisterPass<CodeGenPrepare> X("codegenprepare", 71 "Optimize for code generation"); 72 73FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) { 74 return new CodeGenPrepare(TLI); 75} 76 77 78bool CodeGenPrepare::runOnFunction(Function &F) { 79 bool EverMadeChange = false; 80 81 // First pass, eliminate blocks that contain only PHI nodes and an 82 // unconditional branch. 83 EverMadeChange |= EliminateMostlyEmptyBlocks(F); 84 85 bool MadeChange = true; 86 while (MadeChange) { 87 MadeChange = false; 88 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) 89 MadeChange |= OptimizeBlock(*BB); 90 EverMadeChange |= MadeChange; 91 } 92 return EverMadeChange; 93} 94 95/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes 96/// and an unconditional branch. Passes before isel (e.g. LSR/loopsimplify) 97/// often split edges in ways that are non-optimal for isel. Start by 98/// eliminating these blocks so we can split them the way we want them. 99bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) { 100 bool MadeChange = false; 101 // Note that this intentionally skips the entry block. 102 for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) { 103 BasicBlock *BB = I++; 104 105 // If this block doesn't end with an uncond branch, ignore it. 106 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); 107 if (!BI || !BI->isUnconditional()) 108 continue; 109 110 // If the instruction before the branch isn't a phi node, then other stuff 111 // is happening here. 112 BasicBlock::iterator BBI = BI; 113 if (BBI != BB->begin()) { 114 --BBI; 115 if (!isa<PHINode>(BBI)) continue; 116 } 117 118 // Do not break infinite loops. 119 BasicBlock *DestBB = BI->getSuccessor(0); 120 if (DestBB == BB) 121 continue; 122 123 if (!CanMergeBlocks(BB, DestBB)) 124 continue; 125 126 EliminateMostlyEmptyBlock(BB); 127 MadeChange = true; 128 } 129 return MadeChange; 130} 131 132/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a 133/// single uncond branch between them, and BB contains no other non-phi 134/// instructions. 135bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, 136 const BasicBlock *DestBB) const { 137 // We only want to eliminate blocks whose phi nodes are used by phi nodes in 138 // the successor. If there are more complex condition (e.g. preheaders), 139 // don't mess around with them. 140 BasicBlock::const_iterator BBI = BB->begin(); 141 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 142 for (Value::use_const_iterator UI = PN->use_begin(), E = PN->use_end(); 143 UI != E; ++UI) { 144 const Instruction *User = cast<Instruction>(*UI); 145 if (User->getParent() != DestBB || !isa<PHINode>(User)) 146 return false; 147 // If User is inside DestBB block and it is a PHINode then check 148 // incoming value. If incoming value is not from BB then this is 149 // a complex condition (e.g. preheaders) we want to avoid here. 150 if (User->getParent() == DestBB) { 151 if (const PHINode *UPN = dyn_cast<PHINode>(User)) 152 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { 153 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I)); 154 if (Insn && Insn->getParent() == BB && 155 Insn->getParent() != UPN->getIncomingBlock(I)) 156 return false; 157 } 158 } 159 } 160 } 161 162 // If BB and DestBB contain any common predecessors, then the phi nodes in BB 163 // and DestBB may have conflicting incoming values for the block. If so, we 164 // can't merge the block. 165 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin()); 166 if (!DestBBPN) return true; // no conflict. 167 168 // Collect the preds of BB. 169 SmallPtrSet<const BasicBlock*, 16> BBPreds; 170 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 171 // It is faster to get preds from a PHI than with pred_iterator. 172 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 173 BBPreds.insert(BBPN->getIncomingBlock(i)); 174 } else { 175 BBPreds.insert(pred_begin(BB), pred_end(BB)); 176 } 177 178 // Walk the preds of DestBB. 179 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { 180 BasicBlock *Pred = DestBBPN->getIncomingBlock(i); 181 if (BBPreds.count(Pred)) { // Common predecessor? 182 BBI = DestBB->begin(); 183 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 184 const Value *V1 = PN->getIncomingValueForBlock(Pred); 185 const Value *V2 = PN->getIncomingValueForBlock(BB); 186 187 // If V2 is a phi node in BB, look up what the mapped value will be. 188 if (const PHINode *V2PN = dyn_cast<PHINode>(V2)) 189 if (V2PN->getParent() == BB) 190 V2 = V2PN->getIncomingValueForBlock(Pred); 191 192 // If there is a conflict, bail out. 193 if (V1 != V2) return false; 194 } 195 } 196 } 197 198 return true; 199} 200 201 202/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and 203/// an unconditional branch in it. 204void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { 205 BranchInst *BI = cast<BranchInst>(BB->getTerminator()); 206 BasicBlock *DestBB = BI->getSuccessor(0); 207 208 DOUT << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB; 209 210 // If the destination block has a single pred, then this is a trivial edge, 211 // just collapse it. 212 if (DestBB->getSinglePredecessor()) { 213 // If DestBB has single-entry PHI nodes, fold them. 214 while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { 215 PN->replaceAllUsesWith(PN->getIncomingValue(0)); 216 PN->eraseFromParent(); 217 } 218 219 // Splice all the PHI nodes from BB over to DestBB. 220 DestBB->getInstList().splice(DestBB->begin(), BB->getInstList(), 221 BB->begin(), BI); 222 223 // Anything that branched to BB now branches to DestBB. 224 BB->replaceAllUsesWith(DestBB); 225 226 // Nuke BB. 227 BB->eraseFromParent(); 228 229 DOUT << "AFTER:\n" << *DestBB << "\n\n\n"; 230 return; 231 } 232 233 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB 234 // to handle the new incoming edges it is about to have. 235 PHINode *PN; 236 for (BasicBlock::iterator BBI = DestBB->begin(); 237 (PN = dyn_cast<PHINode>(BBI)); ++BBI) { 238 // Remove the incoming value for BB, and remember it. 239 Value *InVal = PN->removeIncomingValue(BB, false); 240 241 // Two options: either the InVal is a phi node defined in BB or it is some 242 // value that dominates BB. 243 PHINode *InValPhi = dyn_cast<PHINode>(InVal); 244 if (InValPhi && InValPhi->getParent() == BB) { 245 // Add all of the input values of the input PHI as inputs of this phi. 246 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) 247 PN->addIncoming(InValPhi->getIncomingValue(i), 248 InValPhi->getIncomingBlock(i)); 249 } else { 250 // Otherwise, add one instance of the dominating value for each edge that 251 // we will be adding. 252 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 253 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 254 PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); 255 } else { 256 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) 257 PN->addIncoming(InVal, *PI); 258 } 259 } 260 } 261 262 // The PHIs are now updated, change everything that refers to BB to use 263 // DestBB and remove BB. 264 BB->replaceAllUsesWith(DestBB); 265 BB->eraseFromParent(); 266 267 DOUT << "AFTER:\n" << *DestBB << "\n\n\n"; 268} 269 270 271/// SplitEdgeNicely - Split the critical edge from TI to its specified 272/// successor if it will improve codegen. We only do this if the successor has 273/// phi nodes (otherwise critical edges are ok). If there is already another 274/// predecessor of the succ that is empty (and thus has no phi nodes), use it 275/// instead of introducing a new block. 276static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, Pass *P) { 277 BasicBlock *TIBB = TI->getParent(); 278 BasicBlock *Dest = TI->getSuccessor(SuccNum); 279 assert(isa<PHINode>(Dest->begin()) && 280 "This should only be called if Dest has a PHI!"); 281 282 // As a hack, never split backedges of loops. Even though the copy for any 283 // PHIs inserted on the backedge would be dead for exits from the loop, we 284 // assume that the cost of *splitting* the backedge would be too high. 285 if (Dest == TIBB) 286 return; 287 288 /// TIPHIValues - This array is lazily computed to determine the values of 289 /// PHIs in Dest that TI would provide. 290 SmallVector<Value*, 32> TIPHIValues; 291 292 // Check to see if Dest has any blocks that can be used as a split edge for 293 // this terminator. 294 for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) { 295 BasicBlock *Pred = *PI; 296 // To be usable, the pred has to end with an uncond branch to the dest. 297 BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator()); 298 if (!PredBr || !PredBr->isUnconditional() || 299 // Must be empty other than the branch. 300 &Pred->front() != PredBr || 301 // Cannot be the entry block; its label does not get emitted. 302 Pred == &(Dest->getParent()->getEntryBlock())) 303 continue; 304 305 // Finally, since we know that Dest has phi nodes in it, we have to make 306 // sure that jumping to Pred will have the same affect as going to Dest in 307 // terms of PHI values. 308 PHINode *PN; 309 unsigned PHINo = 0; 310 bool FoundMatch = true; 311 for (BasicBlock::iterator I = Dest->begin(); 312 (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) { 313 if (PHINo == TIPHIValues.size()) 314 TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB)); 315 316 // If the PHI entry doesn't work, we can't use this pred. 317 if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) { 318 FoundMatch = false; 319 break; 320 } 321 } 322 323 // If we found a workable predecessor, change TI to branch to Succ. 324 if (FoundMatch) { 325 Dest->removePredecessor(TIBB); 326 TI->setSuccessor(SuccNum, Pred); 327 return; 328 } 329 } 330 331 SplitCriticalEdge(TI, SuccNum, P, true); 332} 333 334/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop 335/// copy (e.g. it's casting from one pointer type to another, int->uint, or 336/// int->sbyte on PPC), sink it into user blocks to reduce the number of virtual 337/// registers that must be created and coalesced. 338/// 339/// Return true if any changes are made. 340static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ 341 // If this is a noop copy, 342 MVT::ValueType SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); 343 MVT::ValueType DstVT = TLI.getValueType(CI->getType()); 344 345 // This is an fp<->int conversion? 346 if (MVT::isInteger(SrcVT) != MVT::isInteger(DstVT)) 347 return false; 348 349 // If this is an extension, it will be a zero or sign extension, which 350 // isn't a noop. 351 if (SrcVT < DstVT) return false; 352 353 // If these values will be promoted, find out what they will be promoted 354 // to. This helps us consider truncates on PPC as noop copies when they 355 // are. 356 if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote) 357 SrcVT = TLI.getTypeToTransformTo(SrcVT); 358 if (TLI.getTypeAction(DstVT) == TargetLowering::Promote) 359 DstVT = TLI.getTypeToTransformTo(DstVT); 360 361 // If, after promotion, these are the same types, this is a noop copy. 362 if (SrcVT != DstVT) 363 return false; 364 365 BasicBlock *DefBB = CI->getParent(); 366 367 /// InsertedCasts - Only insert a cast in each block once. 368 DenseMap<BasicBlock*, CastInst*> InsertedCasts; 369 370 bool MadeChange = false; 371 for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); 372 UI != E; ) { 373 Use &TheUse = UI.getUse(); 374 Instruction *User = cast<Instruction>(*UI); 375 376 // Figure out which BB this cast is used in. For PHI's this is the 377 // appropriate predecessor block. 378 BasicBlock *UserBB = User->getParent(); 379 if (PHINode *PN = dyn_cast<PHINode>(User)) { 380 unsigned OpVal = UI.getOperandNo()/2; 381 UserBB = PN->getIncomingBlock(OpVal); 382 } 383 384 // Preincrement use iterator so we don't invalidate it. 385 ++UI; 386 387 // If this user is in the same block as the cast, don't change the cast. 388 if (UserBB == DefBB) continue; 389 390 // If we have already inserted a cast into this block, use it. 391 CastInst *&InsertedCast = InsertedCasts[UserBB]; 392 393 if (!InsertedCast) { 394 BasicBlock::iterator InsertPt = UserBB->begin(); 395 while (isa<PHINode>(InsertPt)) ++InsertPt; 396 397 InsertedCast = 398 CastInst::create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", 399 InsertPt); 400 MadeChange = true; 401 } 402 403 // Replace a use of the cast with a use of the new cast. 404 TheUse = InsertedCast; 405 } 406 407 // If we removed all uses, nuke the cast. 408 if (CI->use_empty()) { 409 CI->eraseFromParent(); 410 MadeChange = true; 411 } 412 413 return MadeChange; 414} 415 416/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce 417/// the number of virtual registers that must be created and coalesced. This is 418/// a clear win except on targets with multiple condition code registers 419/// (PowerPC), where it might lose; some adjustment may be wanted there. 420/// 421/// Return true if any changes are made. 422static bool OptimizeCmpExpression(CmpInst *CI){ 423 424 BasicBlock *DefBB = CI->getParent(); 425 426 /// InsertedCmp - Only insert a cmp in each block once. 427 DenseMap<BasicBlock*, CmpInst*> InsertedCmps; 428 429 bool MadeChange = false; 430 for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); 431 UI != E; ) { 432 Use &TheUse = UI.getUse(); 433 Instruction *User = cast<Instruction>(*UI); 434 435 // Preincrement use iterator so we don't invalidate it. 436 ++UI; 437 438 // Don't bother for PHI nodes. 439 if (isa<PHINode>(User)) 440 continue; 441 442 // Figure out which BB this cmp is used in. 443 BasicBlock *UserBB = User->getParent(); 444 445 // If this user is in the same block as the cmp, don't change the cmp. 446 if (UserBB == DefBB) continue; 447 448 // If we have already inserted a cmp into this block, use it. 449 CmpInst *&InsertedCmp = InsertedCmps[UserBB]; 450 451 if (!InsertedCmp) { 452 BasicBlock::iterator InsertPt = UserBB->begin(); 453 while (isa<PHINode>(InsertPt)) ++InsertPt; 454 455 InsertedCmp = 456 CmpInst::create(CI->getOpcode(), CI->getPredicate(), CI->getOperand(0), 457 CI->getOperand(1), "", InsertPt); 458 MadeChange = true; 459 } 460 461 // Replace a use of the cmp with a use of the new cmp. 462 TheUse = InsertedCmp; 463 } 464 465 // If we removed all uses, nuke the cmp. 466 if (CI->use_empty()) 467 CI->eraseFromParent(); 468 469 return MadeChange; 470} 471 472/// EraseDeadInstructions - Erase any dead instructions 473static void EraseDeadInstructions(Value *V) { 474 Instruction *I = dyn_cast<Instruction>(V); 475 if (!I || !I->use_empty()) return; 476 477 SmallPtrSet<Instruction*, 16> Insts; 478 Insts.insert(I); 479 480 while (!Insts.empty()) { 481 I = *Insts.begin(); 482 Insts.erase(I); 483 if (isInstructionTriviallyDead(I)) { 484 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) 485 if (Instruction *U = dyn_cast<Instruction>(I->getOperand(i))) 486 Insts.insert(U); 487 I->eraseFromParent(); 488 } 489 } 490} 491 492 493/// ExtAddrMode - This is an extended version of TargetLowering::AddrMode which 494/// holds actual Value*'s for register values. 495struct ExtAddrMode : public TargetLowering::AddrMode { 496 Value *BaseReg; 497 Value *ScaledReg; 498 ExtAddrMode() : BaseReg(0), ScaledReg(0) {} 499 void dump() const; 500}; 501 502static std::ostream &operator<<(std::ostream &OS, const ExtAddrMode &AM) { 503 bool NeedPlus = false; 504 OS << "["; 505 if (AM.BaseGV) 506 OS << (NeedPlus ? " + " : "") 507 << "GV:%" << AM.BaseGV->getName(), NeedPlus = true; 508 509 if (AM.BaseOffs) 510 OS << (NeedPlus ? " + " : "") << AM.BaseOffs, NeedPlus = true; 511 512 if (AM.BaseReg) 513 OS << (NeedPlus ? " + " : "") 514 << "Base:%" << AM.BaseReg->getName(), NeedPlus = true; 515 if (AM.Scale) 516 OS << (NeedPlus ? " + " : "") 517 << AM.Scale << "*%" << AM.ScaledReg->getName(), NeedPlus = true; 518 519 return OS << "]"; 520} 521 522void ExtAddrMode::dump() const { 523 cerr << *this << "\n"; 524} 525 526static bool TryMatchingScaledValue(Value *ScaleReg, int64_t Scale, 527 const Type *AccessTy, ExtAddrMode &AddrMode, 528 SmallVector<Instruction*, 16> &AddrModeInsts, 529 const TargetLowering &TLI, unsigned Depth); 530 531/// FindMaximalLegalAddressingMode - If we can, try to merge the computation of 532/// Addr into the specified addressing mode. If Addr can't be added to AddrMode 533/// this returns false. This assumes that Addr is either a pointer type or 534/// intptr_t for the target. 535static bool FindMaximalLegalAddressingMode(Value *Addr, const Type *AccessTy, 536 ExtAddrMode &AddrMode, 537 SmallVector<Instruction*, 16> &AddrModeInsts, 538 const TargetLowering &TLI, 539 unsigned Depth) { 540 541 // If this is a global variable, fold it into the addressing mode if possible. 542 if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) { 543 if (AddrMode.BaseGV == 0) { 544 AddrMode.BaseGV = GV; 545 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 546 return true; 547 AddrMode.BaseGV = 0; 548 } 549 } else if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) { 550 AddrMode.BaseOffs += CI->getSExtValue(); 551 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 552 return true; 553 AddrMode.BaseOffs -= CI->getSExtValue(); 554 } else if (isa<ConstantPointerNull>(Addr)) { 555 return true; 556 } 557 558 // Look through constant exprs and instructions. 559 unsigned Opcode = ~0U; 560 User *AddrInst = 0; 561 if (Instruction *I = dyn_cast<Instruction>(Addr)) { 562 Opcode = I->getOpcode(); 563 AddrInst = I; 564 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) { 565 Opcode = CE->getOpcode(); 566 AddrInst = CE; 567 } 568 569 // Limit recursion to avoid exponential behavior. 570 if (Depth == 5) { AddrInst = 0; Opcode = ~0U; } 571 572 // If this is really an instruction, add it to our list of related 573 // instructions. 574 if (Instruction *I = dyn_cast_or_null<Instruction>(AddrInst)) 575 AddrModeInsts.push_back(I); 576 577 switch (Opcode) { 578 case Instruction::PtrToInt: 579 // PtrToInt is always a noop, as we know that the int type is pointer sized. 580 if (FindMaximalLegalAddressingMode(AddrInst->getOperand(0), AccessTy, 581 AddrMode, AddrModeInsts, TLI, Depth)) 582 return true; 583 break; 584 case Instruction::IntToPtr: 585 // This inttoptr is a no-op if the integer type is pointer sized. 586 if (TLI.getValueType(AddrInst->getOperand(0)->getType()) == 587 TLI.getPointerTy()) { 588 if (FindMaximalLegalAddressingMode(AddrInst->getOperand(0), AccessTy, 589 AddrMode, AddrModeInsts, TLI, Depth)) 590 return true; 591 } 592 break; 593 case Instruction::Add: { 594 // Check to see if we can merge in the RHS then the LHS. If so, we win. 595 ExtAddrMode BackupAddrMode = AddrMode; 596 unsigned OldSize = AddrModeInsts.size(); 597 if (FindMaximalLegalAddressingMode(AddrInst->getOperand(1), AccessTy, 598 AddrMode, AddrModeInsts, TLI, Depth+1) && 599 FindMaximalLegalAddressingMode(AddrInst->getOperand(0), AccessTy, 600 AddrMode, AddrModeInsts, TLI, Depth+1)) 601 return true; 602 603 // Restore the old addr mode info. 604 AddrMode = BackupAddrMode; 605 AddrModeInsts.resize(OldSize); 606 607 // Otherwise this was over-aggressive. Try merging in the LHS then the RHS. 608 if (FindMaximalLegalAddressingMode(AddrInst->getOperand(0), AccessTy, 609 AddrMode, AddrModeInsts, TLI, Depth+1) && 610 FindMaximalLegalAddressingMode(AddrInst->getOperand(1), AccessTy, 611 AddrMode, AddrModeInsts, TLI, Depth+1)) 612 return true; 613 614 // Otherwise we definitely can't merge the ADD in. 615 AddrMode = BackupAddrMode; 616 AddrModeInsts.resize(OldSize); 617 break; 618 } 619 case Instruction::Or: { 620 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1)); 621 if (!RHS) break; 622 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD. 623 break; 624 } 625 case Instruction::Mul: 626 case Instruction::Shl: { 627 // Can only handle X*C and X << C, and can only handle this when the scale 628 // field is available. 629 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1)); 630 if (!RHS) break; 631 int64_t Scale = RHS->getSExtValue(); 632 if (Opcode == Instruction::Shl) 633 Scale = 1 << Scale; 634 635 if (TryMatchingScaledValue(AddrInst->getOperand(0), Scale, AccessTy, 636 AddrMode, AddrModeInsts, TLI, Depth)) 637 return true; 638 break; 639 } 640 case Instruction::GetElementPtr: { 641 // Scan the GEP. We check it if it contains constant offsets and at most 642 // one variable offset. 643 int VariableOperand = -1; 644 unsigned VariableScale = 0; 645 646 int64_t ConstantOffset = 0; 647 const TargetData *TD = TLI.getTargetData(); 648 gep_type_iterator GTI = gep_type_begin(AddrInst); 649 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { 650 if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 651 const StructLayout *SL = TD->getStructLayout(STy); 652 unsigned Idx = 653 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); 654 ConstantOffset += SL->getElementOffset(Idx); 655 } else { 656 uint64_t TypeSize = TD->getABITypeSize(GTI.getIndexedType()); 657 if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) { 658 ConstantOffset += CI->getSExtValue()*TypeSize; 659 } else if (TypeSize) { // Scales of zero don't do anything. 660 // We only allow one variable index at the moment. 661 if (VariableOperand != -1) { 662 VariableOperand = -2; 663 break; 664 } 665 666 // Remember the variable index. 667 VariableOperand = i; 668 VariableScale = TypeSize; 669 } 670 } 671 } 672 673 // If the GEP had multiple variable indices, punt. 674 if (VariableOperand == -2) 675 break; 676 677 // A common case is for the GEP to only do a constant offset. In this case, 678 // just add it to the disp field and check validity. 679 if (VariableOperand == -1) { 680 AddrMode.BaseOffs += ConstantOffset; 681 if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){ 682 // Check to see if we can fold the base pointer in too. 683 if (FindMaximalLegalAddressingMode(AddrInst->getOperand(0), AccessTy, 684 AddrMode, AddrModeInsts, TLI, 685 Depth+1)) 686 return true; 687 } 688 AddrMode.BaseOffs -= ConstantOffset; 689 } else { 690 // Check that this has no base reg yet. If so, we won't have a place to 691 // put the base of the GEP (assuming it is not a null ptr). 692 bool SetBaseReg = false; 693 if (AddrMode.HasBaseReg) { 694 if (!isa<ConstantPointerNull>(AddrInst->getOperand(0))) 695 break; 696 } else { 697 AddrMode.HasBaseReg = true; 698 AddrMode.BaseReg = AddrInst->getOperand(0); 699 SetBaseReg = true; 700 } 701 702 // See if the scale amount is valid for this target. 703 AddrMode.BaseOffs += ConstantOffset; 704 if (TryMatchingScaledValue(AddrInst->getOperand(VariableOperand), 705 VariableScale, AccessTy, AddrMode, 706 AddrModeInsts, TLI, Depth)) { 707 if (!SetBaseReg) return true; 708 709 // If this match succeeded, we know that we can form an address with the 710 // GepBase as the basereg. See if we can match *more*. 711 AddrMode.HasBaseReg = false; 712 AddrMode.BaseReg = 0; 713 if (FindMaximalLegalAddressingMode(AddrInst->getOperand(0), AccessTy, 714 AddrMode, AddrModeInsts, TLI, 715 Depth+1)) 716 return true; 717 // Strange, shouldn't happen. Restore the base reg and succeed the easy 718 // way. 719 AddrMode.HasBaseReg = true; 720 AddrMode.BaseReg = AddrInst->getOperand(0); 721 return true; 722 } 723 724 AddrMode.BaseOffs -= ConstantOffset; 725 if (SetBaseReg) { 726 AddrMode.HasBaseReg = false; 727 AddrMode.BaseReg = 0; 728 } 729 } 730 break; 731 } 732 } 733 734 if (Instruction *I = dyn_cast_or_null<Instruction>(AddrInst)) { 735 assert(AddrModeInsts.back() == I && "Stack imbalance"); 736 AddrModeInsts.pop_back(); 737 } 738 739 // Worse case, the target should support [reg] addressing modes. :) 740 if (!AddrMode.HasBaseReg) { 741 AddrMode.HasBaseReg = true; 742 // Still check for legality in case the target supports [imm] but not [i+r]. 743 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) { 744 AddrMode.BaseReg = Addr; 745 return true; 746 } 747 AddrMode.HasBaseReg = false; 748 } 749 750 // If the base register is already taken, see if we can do [r+r]. 751 if (AddrMode.Scale == 0) { 752 AddrMode.Scale = 1; 753 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) { 754 AddrMode.ScaledReg = Addr; 755 return true; 756 } 757 AddrMode.Scale = 0; 758 } 759 // Couldn't match. 760 return false; 761} 762 763/// TryMatchingScaledValue - Try adding ScaleReg*Scale to the specified 764/// addressing mode. Return true if this addr mode is legal for the target, 765/// false if not. 766static bool TryMatchingScaledValue(Value *ScaleReg, int64_t Scale, 767 const Type *AccessTy, ExtAddrMode &AddrMode, 768 SmallVector<Instruction*, 16> &AddrModeInsts, 769 const TargetLowering &TLI, unsigned Depth) { 770 // If we already have a scale of this value, we can add to it, otherwise, we 771 // need an available scale field. 772 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg) 773 return false; 774 775 ExtAddrMode InputAddrMode = AddrMode; 776 777 // Add scale to turn X*4+X*3 -> X*7. This could also do things like 778 // [A+B + A*7] -> [B+A*8]. 779 AddrMode.Scale += Scale; 780 AddrMode.ScaledReg = ScaleReg; 781 782 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) { 783 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now 784 // to see if ScaleReg is actually X+C. If so, we can turn this into adding 785 // X*Scale + C*Scale to addr mode. 786 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(ScaleReg); 787 if (BinOp && BinOp->getOpcode() == Instruction::Add && 788 isa<ConstantInt>(BinOp->getOperand(1)) && InputAddrMode.ScaledReg ==0) { 789 790 InputAddrMode.Scale = Scale; 791 InputAddrMode.ScaledReg = BinOp->getOperand(0); 792 InputAddrMode.BaseOffs += 793 cast<ConstantInt>(BinOp->getOperand(1))->getSExtValue()*Scale; 794 if (TLI.isLegalAddressingMode(InputAddrMode, AccessTy)) { 795 AddrModeInsts.push_back(BinOp); 796 AddrMode = InputAddrMode; 797 return true; 798 } 799 } 800 801 // Otherwise, not (x+c)*scale, just return what we have. 802 return true; 803 } 804 805 // Otherwise, back this attempt out. 806 AddrMode.Scale -= Scale; 807 if (AddrMode.Scale == 0) AddrMode.ScaledReg = 0; 808 809 return false; 810} 811 812 813/// IsNonLocalValue - Return true if the specified values are defined in a 814/// different basic block than BB. 815static bool IsNonLocalValue(Value *V, BasicBlock *BB) { 816 if (Instruction *I = dyn_cast<Instruction>(V)) 817 return I->getParent() != BB; 818 return false; 819} 820 821/// OptimizeLoadStoreInst - Load and Store Instructions have often have 822/// addressing modes that can do significant amounts of computation. As such, 823/// instruction selection will try to get the load or store to do as much 824/// computation as possible for the program. The problem is that isel can only 825/// see within a single block. As such, we sink as much legal addressing mode 826/// stuff into the block as possible. 827bool CodeGenPrepare::OptimizeLoadStoreInst(Instruction *LdStInst, Value *Addr, 828 const Type *AccessTy, 829 DenseMap<Value*,Value*> &SunkAddrs) { 830 // Figure out what addressing mode will be built up for this operation. 831 SmallVector<Instruction*, 16> AddrModeInsts; 832 ExtAddrMode AddrMode; 833 bool Success = FindMaximalLegalAddressingMode(Addr, AccessTy, AddrMode, 834 AddrModeInsts, *TLI, 0); 835 Success = Success; assert(Success && "Couldn't select *anything*?"); 836 837 // Check to see if any of the instructions supersumed by this addr mode are 838 // non-local to I's BB. 839 bool AnyNonLocal = false; 840 for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) { 841 if (IsNonLocalValue(AddrModeInsts[i], LdStInst->getParent())) { 842 AnyNonLocal = true; 843 break; 844 } 845 } 846 847 // If all the instructions matched are already in this BB, don't do anything. 848 if (!AnyNonLocal) { 849 DEBUG(cerr << "CGP: Found local addrmode: " << AddrMode << "\n"); 850 return false; 851 } 852 853 // Insert this computation right after this user. Since our caller is 854 // scanning from the top of the BB to the bottom, reuse of the expr are 855 // guaranteed to happen later. 856 BasicBlock::iterator InsertPt = LdStInst; 857 858 // Now that we determined the addressing expression we want to use and know 859 // that we have to sink it into this block. Check to see if we have already 860 // done this for some other load/store instr in this block. If so, reuse the 861 // computation. 862 Value *&SunkAddr = SunkAddrs[Addr]; 863 if (SunkAddr) { 864 DEBUG(cerr << "CGP: Reusing nonlocal addrmode: " << AddrMode << "\n"); 865 if (SunkAddr->getType() != Addr->getType()) 866 SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt); 867 } else { 868 DEBUG(cerr << "CGP: SINKING nonlocal addrmode: " << AddrMode << "\n"); 869 const Type *IntPtrTy = TLI->getTargetData()->getIntPtrType(); 870 871 Value *Result = 0; 872 // Start with the scale value. 873 if (AddrMode.Scale) { 874 Value *V = AddrMode.ScaledReg; 875 if (V->getType() == IntPtrTy) { 876 // done. 877 } else if (isa<PointerType>(V->getType())) { 878 V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt); 879 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < 880 cast<IntegerType>(V->getType())->getBitWidth()) { 881 V = new TruncInst(V, IntPtrTy, "sunkaddr", InsertPt); 882 } else { 883 V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt); 884 } 885 if (AddrMode.Scale != 1) 886 V = BinaryOperator::createMul(V, ConstantInt::get(IntPtrTy, 887 AddrMode.Scale), 888 "sunkaddr", InsertPt); 889 Result = V; 890 } 891 892 // Add in the base register. 893 if (AddrMode.BaseReg) { 894 Value *V = AddrMode.BaseReg; 895 if (V->getType() != IntPtrTy) 896 V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt); 897 if (Result) 898 Result = BinaryOperator::createAdd(Result, V, "sunkaddr", InsertPt); 899 else 900 Result = V; 901 } 902 903 // Add in the BaseGV if present. 904 if (AddrMode.BaseGV) { 905 Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, "sunkaddr", 906 InsertPt); 907 if (Result) 908 Result = BinaryOperator::createAdd(Result, V, "sunkaddr", InsertPt); 909 else 910 Result = V; 911 } 912 913 // Add in the Base Offset if present. 914 if (AddrMode.BaseOffs) { 915 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); 916 if (Result) 917 Result = BinaryOperator::createAdd(Result, V, "sunkaddr", InsertPt); 918 else 919 Result = V; 920 } 921 922 if (Result == 0) 923 SunkAddr = Constant::getNullValue(Addr->getType()); 924 else 925 SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt); 926 } 927 928 LdStInst->replaceUsesOfWith(Addr, SunkAddr); 929 930 if (Addr->use_empty()) 931 EraseDeadInstructions(Addr); 932 return true; 933} 934 935/// OptimizeInlineAsmInst - If there are any memory operands, use 936/// OptimizeLoadStoreInt to sink their address computing into the block when 937/// possible / profitable. 938bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS, 939 DenseMap<Value*,Value*> &SunkAddrs) { 940 bool MadeChange = false; 941 InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); 942 943 // Do a prepass over the constraints, canonicalizing them, and building up the 944 // ConstraintOperands list. 945 std::vector<InlineAsm::ConstraintInfo> 946 ConstraintInfos = IA->ParseConstraints(); 947 948 /// ConstraintOperands - Information about all of the constraints. 949 std::vector<TargetLowering::AsmOperandInfo> ConstraintOperands; 950 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. 951 for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { 952 ConstraintOperands. 953 push_back(TargetLowering::AsmOperandInfo(ConstraintInfos[i])); 954 TargetLowering::AsmOperandInfo &OpInfo = ConstraintOperands.back(); 955 956 // Compute the value type for each operand. 957 switch (OpInfo.Type) { 958 case InlineAsm::isOutput: 959 if (OpInfo.isIndirect) 960 OpInfo.CallOperandVal = CS.getArgument(ArgNo++); 961 break; 962 case InlineAsm::isInput: 963 OpInfo.CallOperandVal = CS.getArgument(ArgNo++); 964 break; 965 case InlineAsm::isClobber: 966 // Nothing to do. 967 break; 968 } 969 970 // Compute the constraint code and ConstraintType to use. 971 OpInfo.ComputeConstraintToUse(*TLI); 972 973 if (OpInfo.ConstraintType == TargetLowering::C_Memory && 974 OpInfo.isIndirect) { 975 Value *OpVal = OpInfo.CallOperandVal; 976 MadeChange |= OptimizeLoadStoreInst(I, OpVal, OpVal->getType(), 977 SunkAddrs); 978 } 979 } 980 981 return MadeChange; 982} 983 984bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { 985 BasicBlock *DefBB = I->getParent(); 986 987 // If both result of the {s|z}xt and its source are live out, rewrite all 988 // other uses of the source with result of extension. 989 Value *Src = I->getOperand(0); 990 if (Src->hasOneUse()) 991 return false; 992 993 // Only do this xform if truncating is free. 994 if (!TLI->isTruncateFree(I->getType(), Src->getType())) 995 return false; 996 997 // Only safe to perform the optimization if the source is also defined in 998 // this block. 999 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent()) 1000 return false; 1001 1002 bool DefIsLiveOut = false; 1003 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); 1004 UI != E; ++UI) { 1005 Instruction *User = cast<Instruction>(*UI); 1006 1007 // Figure out which BB this ext is used in. 1008 BasicBlock *UserBB = User->getParent(); 1009 if (UserBB == DefBB) continue; 1010 DefIsLiveOut = true; 1011 break; 1012 } 1013 if (!DefIsLiveOut) 1014 return false; 1015 1016 // Make sure non of the uses are PHI nodes. 1017 for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); 1018 UI != E; ++UI) { 1019 Instruction *User = cast<Instruction>(*UI); 1020 BasicBlock *UserBB = User->getParent(); 1021 if (UserBB == DefBB) continue; 1022 // Be conservative. We don't want this xform to end up introducing 1023 // reloads just before load / store instructions. 1024 if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User)) 1025 return false; 1026 } 1027 1028 // InsertedTruncs - Only insert one trunc in each block once. 1029 DenseMap<BasicBlock*, Instruction*> InsertedTruncs; 1030 1031 bool MadeChange = false; 1032 for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); 1033 UI != E; ++UI) { 1034 Use &TheUse = UI.getUse(); 1035 Instruction *User = cast<Instruction>(*UI); 1036 1037 // Figure out which BB this ext is used in. 1038 BasicBlock *UserBB = User->getParent(); 1039 if (UserBB == DefBB) continue; 1040 1041 // Both src and def are live in this block. Rewrite the use. 1042 Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; 1043 1044 if (!InsertedTrunc) { 1045 BasicBlock::iterator InsertPt = UserBB->begin(); 1046 while (isa<PHINode>(InsertPt)) ++InsertPt; 1047 1048 InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt); 1049 } 1050 1051 // Replace a use of the {s|z}ext source with a use of the result. 1052 TheUse = InsertedTrunc; 1053 1054 MadeChange = true; 1055 } 1056 1057 return MadeChange; 1058} 1059 1060// In this pass we look for GEP and cast instructions that are used 1061// across basic blocks and rewrite them to improve basic-block-at-a-time 1062// selection. 1063bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { 1064 bool MadeChange = false; 1065 1066 // Split all critical edges where the dest block has a PHI and where the phi 1067 // has shared immediate operands. 1068 TerminatorInst *BBTI = BB.getTerminator(); 1069 if (BBTI->getNumSuccessors() > 1) { 1070 for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) 1071 if (isa<PHINode>(BBTI->getSuccessor(i)->begin()) && 1072 isCriticalEdge(BBTI, i, true)) 1073 SplitEdgeNicely(BBTI, i, this); 1074 } 1075 1076 1077 // Keep track of non-local addresses that have been sunk into this block. 1078 // This allows us to avoid inserting duplicate code for blocks with multiple 1079 // load/stores of the same address. 1080 DenseMap<Value*, Value*> SunkAddrs; 1081 1082 for (BasicBlock::iterator BBI = BB.begin(), E = BB.end(); BBI != E; ) { 1083 Instruction *I = BBI++; 1084 1085 if (CastInst *CI = dyn_cast<CastInst>(I)) { 1086 // If the source of the cast is a constant, then this should have 1087 // already been constant folded. The only reason NOT to constant fold 1088 // it is if something (e.g. LSR) was careful to place the constant 1089 // evaluation in a block other than then one that uses it (e.g. to hoist 1090 // the address of globals out of a loop). If this is the case, we don't 1091 // want to forward-subst the cast. 1092 if (isa<Constant>(CI->getOperand(0))) 1093 continue; 1094 1095 bool Change = false; 1096 if (TLI) { 1097 Change = OptimizeNoopCopyExpression(CI, *TLI); 1098 MadeChange |= Change; 1099 } 1100 1101 if (OptExtUses && !Change && (isa<ZExtInst>(I) || isa<SExtInst>(I))) 1102 MadeChange |= OptimizeExtUses(I); 1103 } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) { 1104 MadeChange |= OptimizeCmpExpression(CI); 1105 } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 1106 if (TLI) 1107 MadeChange |= OptimizeLoadStoreInst(I, I->getOperand(0), LI->getType(), 1108 SunkAddrs); 1109 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 1110 if (TLI) 1111 MadeChange |= OptimizeLoadStoreInst(I, SI->getOperand(1), 1112 SI->getOperand(0)->getType(), 1113 SunkAddrs); 1114 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { 1115 if (GEPI->hasAllZeroIndices()) { 1116 /// The GEP operand must be a pointer, so must its result -> BitCast 1117 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), 1118 GEPI->getName(), GEPI); 1119 GEPI->replaceAllUsesWith(NC); 1120 GEPI->eraseFromParent(); 1121 MadeChange = true; 1122 BBI = NC; 1123 } 1124 } else if (CallInst *CI = dyn_cast<CallInst>(I)) { 1125 // If we found an inline asm expession, and if the target knows how to 1126 // lower it to normal LLVM code, do so now. 1127 if (TLI && isa<InlineAsm>(CI->getCalledValue())) 1128 if (const TargetAsmInfo *TAI = 1129 TLI->getTargetMachine().getTargetAsmInfo()) { 1130 if (TAI->ExpandInlineAsm(CI)) 1131 BBI = BB.begin(); 1132 else 1133 // Sink address computing for memory operands into the block. 1134 MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs); 1135 } 1136 } 1137 } 1138 1139 return MadeChange; 1140} 1141 1142