InlineCost.cpp revision fe45fd084db872f9c7106c26e52c1cc8c9cba3a5
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//===- InlineCost.cpp - Cost analysis for inliner -------------------------===// 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The LLVM Compiler Infrastructure 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 52a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// This file is distributed under the University of Illinois Open Source 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// License. See LICENSE.TXT for details. 72a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// 82a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//===----------------------------------------------------------------------===// 92a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// 102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// This file implements inline cost analysis. 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//===----------------------------------------------------------------------===// 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define DEBUG_TYPE "inline-cost" 15868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "llvm/Analysis/InlineCost.h" 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/ADT/STLExtras.h" 17868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "llvm/ADT/SetVector.h" 18868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "llvm/ADT/SmallPtrSet.h" 19868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "llvm/ADT/SmallVector.h" 20868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "llvm/ADT/Statistic.h" 212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/Analysis/ConstantFolding.h" 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Analysis/InstructionSimplify.h" 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Analysis/TargetTransformInfo.h" 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/IR/CallingConv.h" 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/IR/DataLayout.h" 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/IR/GlobalAlias.h" 2790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)#include "llvm/IR/IntrinsicInst.h" 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/IR/Operator.h" 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/InstVisitor.h" 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Support/CallSite.h" 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Support/Debug.h" 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Support/GetElementPtrTypeIterator.h" 332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/Support/raw_ostream.h" 342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using namespace llvm; 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace { 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 41c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { 422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) typedef InstVisitor<CallAnalyzer, bool> Base; 432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) friend class InstVisitor<CallAnalyzer, bool>; 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // DataLayout if available, or null. 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const DataLayout *const TD; 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /// The TargetTransformInfo available for this compilation. 492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const TargetTransformInfo &TTI; 502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // The called function. 522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Function &F; 532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) int Threshold; 552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) int Cost; 562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bool IsCallerRecursive; 582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bool IsRecursiveCall; 592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bool ExposesReturnsTwice; 602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bool HasDynamicAlloca; 612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bool ContainsNoDuplicateCall; 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /// Number of bytes allocated statically by the callee. 642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) uint64_t AllocatedSize; 652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) unsigned NumInstructions, NumVectorInstructions; 662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) int FiftyPercentVectorBonus, TenPercentVectorBonus; 672a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) int VectorBonus; 682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // While we walk the potentially-inlined instructions, we build up and 702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // maintain a mapping of simplified values specific to this callsite. The 712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // idea is to propagate any special information we have about arguments to 722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // this call through the inlinable section of the function, and account for 732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // likely simplifications post-inlining. The most important aspect we track 742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // is CFG altering simplifications -- when we prove a basic block dead, that 752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // can cause dramatic shifts in the cost of inlining a function. 762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) DenseMap<Value *, Constant *> SimplifiedValues; 772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Keep track of the values which map back (through function arguments) to 792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // allocas on the caller stack which could be simplified through SROA. 802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) DenseMap<Value *, Value *> SROAArgValues; 812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // The mapping of caller Alloca values to their accumulated cost savings. If 832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // we have to disable SROA for one of the allocas, this tells us how much 842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // cost must be added. 852a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) DenseMap<Value *, int> SROAArgCosts; 862a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Keep track of values which map to a pointer base and constant offset. 882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) DenseMap<Value *, std::pair<Value *, APInt> > ConstantOffsetPtrs; 892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Custom simplification helper routines. 912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bool isAllocaDerivedArg(Value *V); 922a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bool lookupSROAArgAndCost(Value *V, Value *&Arg, 932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) DenseMap<Value *, int>::iterator &CostIt); 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void disableSROA(DenseMap<Value *, int>::iterator CostIt); 952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) void disableSROA(Value *V); 962a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, 972a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) int InstructionCost); 982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bool handleSROACandidate(bool IsSROAValid, 992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) DenseMap<Value *, int>::iterator CostIt, 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int InstructionCost); 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool isGEPOffsetConstant(GetElementPtrInst &GEP); 1022a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); 1032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bool simplifyCallSite(Function *F, CallSite CS); 1042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Custom analysis routines. 1072a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bool analyzeBlock(BasicBlock *BB); 1082a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1092a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Disable several entry points to the visitor so we don't accidentally use 1102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // them by declaring but not defining them here. 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void visit(Module *); void visit(Module &); 1122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) void visit(Function *); void visit(Function &); 1132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) void visit(BasicBlock *); void visit(BasicBlock &); 1142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Provide base case for our instruction visit. 1162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bool visitInstruction(Instruction &I); 1172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Our visit overrides. 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool visitAlloca(AllocaInst &I); 1202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bool visitPHI(PHINode &I); 1212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bool visitGetElementPtr(GetElementPtrInst &I); 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool visitBitCast(BitCastInst &I); 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool visitPtrToInt(PtrToIntInst &I); 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool visitIntToPtr(IntToPtrInst &I); 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool visitCastInst(CastInst &I); 1262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bool visitUnaryInstruction(UnaryInstruction &I); 1272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bool visitCmpInst(CmpInst &I); 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool visitSub(BinaryOperator &I); 129b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) bool visitBinaryOperator(BinaryOperator &I); 1302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bool visitLoad(LoadInst &I); 1312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bool visitStore(StoreInst &I); 1322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bool visitExtractValue(ExtractValueInst &I); 1332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bool visitInsertValue(InsertValueInst &I); 1342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bool visitCallSite(CallSite CS); 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)public: 1372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) CallAnalyzer(const DataLayout *TD, const TargetTransformInfo &TTI, 1382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Function &Callee, int Threshold) 1392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) : TD(TD), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0), 1402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) IsCallerRecursive(false), IsRecursiveCall(false), 1412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) ExposesReturnsTwice(false), HasDynamicAlloca(false), 1422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) ContainsNoDuplicateCall(false), AllocatedSize(0), NumInstructions(0), 1432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) NumVectorInstructions(0), FiftyPercentVectorBonus(0), 1442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), 1452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), 1462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) NumConstantPtrDiffs(0), NumInstructionsSimplified(0), 1472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) SROACostSavings(0), SROACostSavingsLost(0) {} 1482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bool analyzeCall(CallSite CS); 1502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 151b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) int getThreshold() { return Threshold; } 1522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) int getCost() { return Cost; } 1532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 154c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Keep a bunch of stats about the cost savings found so we can print them 155c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // out when debugging. 156c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) unsigned NumConstantArgs; 1572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) unsigned NumConstantOffsetPtrArgs; 1582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) unsigned NumAllocaArgs; 159b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) unsigned NumConstantPtrCmps; 160c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) unsigned NumConstantPtrDiffs; 161c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) unsigned NumInstructionsSimplified; 162c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) unsigned SROACostSavings; 1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned SROACostSavingsLost; 1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) void dump(); 1662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}; 167c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 1682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} // namespace 1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// \brief Test whether the given value is an Alloca-derived function argument. 1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool CallAnalyzer::isAllocaDerivedArg(Value *V) { 1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return SROAArgValues.count(V); 1732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 1742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to. 1762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// Returns false if V does not map to a SROA-candidate. 1772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::lookupSROAArgAndCost( 1782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) { 1792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (SROAArgValues.empty() || SROAArgCosts.empty()) 1802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return false; 1812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V); 1832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (ArgIt == SROAArgValues.end()) 1842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return false; 1852a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1862a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Arg = ArgIt->second; 1872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) CostIt = SROAArgCosts.find(Arg); 1882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return CostIt != SROAArgCosts.end(); 1892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 1902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// \brief Disable SROA for the candidate marked by this cost iterator. 1922a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// 1932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// This marks the candidate as no longer viable for SROA, and adds the cost 1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// savings associated with it back into the inline cost measurement. 1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) { 1962a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // If we're no longer able to perform SROA we need to undo its cost savings 1972a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // and prevent subsequent analysis. 1982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Cost += CostIt->second; 1992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) SROACostSavings -= CostIt->second; 2002a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) SROACostSavingsLost += CostIt->second; 2012a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) SROAArgCosts.erase(CostIt); 2022a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 2032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 2042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// \brief If 'V' maps to a SROA candidate, disable SROA for it. 2052a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)void CallAnalyzer::disableSROA(Value *V) { 2062a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Value *SROAArg; 2072a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) DenseMap<Value *, int>::iterator CostIt; 2082a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (lookupSROAArgAndCost(V, SROAArg, CostIt)) 2092a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) disableSROA(CostIt); 2102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 2112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 2122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// \brief Accumulate the given cost for a particular SROA candidate. 2132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, 2142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) int InstructionCost) { 2152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) CostIt->second += InstructionCost; 2162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) SROACostSavings += InstructionCost; 2172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 2182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 2192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// \brief Helper for the common pattern of handling a SROA candidate. 2202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// Either accumulates the cost savings if the SROA remains valid, or disables 2212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// SROA for the candidate. 2222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::handleSROACandidate(bool IsSROAValid, 2232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) DenseMap<Value *, int>::iterator CostIt, 2242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) int InstructionCost) { 2252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (IsSROAValid) { 2262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) accumulateSROACost(CostIt, InstructionCost); 2272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return true; 2282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 2292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 2302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) disableSROA(CostIt); 2312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return false; 2322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 2332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 2342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// \brief Check whether a GEP's indices are all constant. 2352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// 2362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// Respects any simplified values known during the analysis of this callsite. 237868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) { 238868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) 2392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I)) 2402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return false; 2412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 2422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return true; 2432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 2442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 2452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// \brief Accumulate a constant GEP offset into an APInt if possible. 2462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// 2472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// Returns false if unable to compute the offset for any reason. Respects any 2482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// simplified values known during the analysis of this callsite. 2492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { 2502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (!TD) 2512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return false; 2522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned IntPtrWidth = TD->getPointerSizeInBits(); 2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) assert(IntPtrWidth == Offset.getBitWidth()); 255c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 256c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); 257c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) GTI != GTE; ++GTI) { 258eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand()); 259c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (!OpC) 260c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand())) 261c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) OpC = dyn_cast<ConstantInt>(SimpleOp); 262c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (!OpC) 263c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return false; 264c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (OpC->isZero()) continue; 265c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 266c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Handle a struct index, which adds its field offset to the pointer. 267c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (StructType *STy = dyn_cast<StructType>(*GTI)) { 268c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) unsigned ElementIdx = OpC->getZExtValue(); 269c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const StructLayout *SL = TD->getStructLayout(STy); 2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); 2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 2742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) APInt TypeSize(IntPtrWidth, TD->getTypeAllocSize(GTI.getIndexedType())); 2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; 2762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 2772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return true; 2782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 2792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 2802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::visitAlloca(AllocaInst &I) { 2812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // FIXME: Check whether inlining will turn a dynamic alloca into a static 2822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // alloca, and handle that case. 2832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 2842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Accumulate the allocated size. 2852a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (I.isStaticAlloca()) { 2862a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Type *Ty = I.getAllocatedType(); 2872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) AllocatedSize += (TD ? TD->getTypeAllocSize(Ty) : 2882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Ty->getPrimitiveSizeInBits()); 2897dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch } 2902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 2912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // We will happily inline static alloca instructions. 2922a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (I.isStaticAlloca()) 2932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return Base::visitAlloca(I); 2947dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch 2957dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // FIXME: This is overly conservative. Dynamic allocas are inefficient for 296c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // a variety of reasons, and so we would like to not inline them into 2972a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // functions which don't currently have a dynamic alloca. This simply 2982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // disables inlining altogether in the presence of a dynamic alloca. 2992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) HasDynamicAlloca = true; 3002a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return false; 3012a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 3022a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 3032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::visitPHI(PHINode &I) { 3042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // FIXME: We should potentially be tracking values through phi nodes, 3052a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // especially when they collapse to a single value due to deleted CFG edges 3062a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // during inlining. 3077dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch 3087dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // FIXME: We need to propagate SROA *disabling* through phi nodes, even 3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // though we don't want to propagate it's bonuses. The idea is to disable 3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // SROA if it *might* be used in an inappropriate manner. 3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Phi nodes are always zero-cost. 3132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return true; 3142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 3152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 3162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { 3172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Value *SROAArg; 3182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) DenseMap<Value *, int>::iterator CostIt; 3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(), 3202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) SROAArg, CostIt); 3212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 3222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Try to fold GEPs of constant-offset call site argument pointers. This 3232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // requires target data and inbounds GEPs. 3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (TD && I.isInBounds()) { 3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Check if we have a base + offset for the pointer. 3262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Value *Ptr = I.getPointerOperand(); 3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr); 3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (BaseAndOffset.first) { 3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Check if the offset of this GEP is constant, and if so accumulate it 3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // into Offset. 3312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) { 3322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Non-constant GEPs aren't folded, and disable SROA. 3332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (SROACandidate) 3342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) disableSROA(CostIt); 3352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return false; 3362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 3372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 3382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Add the result as a new mapping to Base + Offset. 3392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) ConstantOffsetPtrs[&I] = BaseAndOffset; 3402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 3412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Also handle SROA candidates here, we already know that the GEP is 3422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // all-constant indexed. 3432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (SROACandidate) 3442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) SROAArgValues[&I] = SROAArg; 3452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 3462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return true; 3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (isGEPOffsetConstant(I)) { 3512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (SROACandidate) 3522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) SROAArgValues[&I] = SROAArg; 3532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 3542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Constant GEPs are modeled as free. 3552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return true; 3562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 3572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 3582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Variable GEPs will require math and will disable SROA. 3592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (SROACandidate) 3602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) disableSROA(CostIt); 3612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return false; 3622a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 3632a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 3642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::visitBitCast(BitCastInst &I) { 3652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Propagate constants through bitcasts. 3662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Constant *COp = dyn_cast<Constant>(I.getOperand(0)); 3672a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (!COp) 3682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) COp = SimplifiedValues.lookup(I.getOperand(0)); 3692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (COp) 3702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) { 3712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) SimplifiedValues[&I] = C; 37290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) return true; 37390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) } 37490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Track base/offsets through casts 3762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) std::pair<Value *, APInt> BaseAndOffset 3772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) = ConstantOffsetPtrs.lookup(I.getOperand(0)); 3782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Casts don't change the offset, just wrap it up. 3792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (BaseAndOffset.first) 3802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) ConstantOffsetPtrs[&I] = BaseAndOffset; 3812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 3822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Also look for SROA candidates here. 3832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Value *SROAArg; 3842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) DenseMap<Value *, int>::iterator CostIt; 3852a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) 3862a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) SROAArgValues[&I] = SROAArg; 3872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 3882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Bitcasts are always zero cost. 3892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return true; 3902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 3912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 3922a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { 3932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Propagate constants through ptrtoint. 3942a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Constant *COp = dyn_cast<Constant>(I.getOperand(0)); 3952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (!COp) 3962a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) COp = SimplifiedValues.lookup(I.getOperand(0)); 3972a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (COp) 3982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) { 3992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) SimplifiedValues[&I] = C; 4002a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return true; 4012a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 4022a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 4032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Track base/offset pairs when converted to a plain integer provided the 4042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // integer is large enough to represent the pointer. 4052a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) unsigned IntegerSize = I.getType()->getScalarSizeInBits(); 4062a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (TD && IntegerSize >= TD->getPointerSizeInBits()) { 407868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) std::pair<Value *, APInt> BaseAndOffset 408868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) = ConstantOffsetPtrs.lookup(I.getOperand(0)); 4092a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (BaseAndOffset.first) 4102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) ConstantOffsetPtrs[&I] = BaseAndOffset; 4112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 4122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 4132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // This is really weird. Technically, ptrtoint will disable SROA. However, 4142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // unless that ptrtoint is *used* somewhere in the live basic blocks after 4152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // inlining, it will be nuked, and SROA should proceed. All of the uses which 4162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // would block SROA would also block SROA if applied directly to a pointer, 4172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // and so we can just add the integer in here. The only places where SROA is 4182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // preserved either cannot fire on an integer, or won't in-and-of themselves 4192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // disable SROA (ext) w/o some later use that we would see and disable. 4202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Value *SROAArg; 4212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) DenseMap<Value *, int>::iterator CostIt; 4222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) 4232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) SROAArgValues[&I] = SROAArg; 4242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 4252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); 4262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 4272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 4282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { 4292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Propagate constants through ptrtoint. 4302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Constant *COp = dyn_cast<Constant>(I.getOperand(0)); 4312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (!COp) 4322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) COp = SimplifiedValues.lookup(I.getOperand(0)); 4332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (COp) 4342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) { 4352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) SimplifiedValues[&I] = C; 4362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return true; 4372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 4382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 4392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Track base/offset pairs when round-tripped through a pointer without 4407dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch // modifications provided the integer is not too large. 4412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Value *Op = I.getOperand(0); 4427dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); 4432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (TD && IntegerSize <= TD->getPointerSizeInBits()) { 4442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op); 4452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (BaseAndOffset.first) 4462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) ConstantOffsetPtrs[&I] = BaseAndOffset; 4472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 4482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 4492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // "Propagate" SROA here in the same manner as we do for ptrtoint above. 4507dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch Value *SROAArg; 4512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) DenseMap<Value *, int>::iterator CostIt; 4522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (lookupSROAArgAndCost(Op, SROAArg, CostIt)) 4532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) SROAArgValues[&I] = SROAArg; 4542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 4552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); 4562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 4572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 4582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::visitCastInst(CastInst &I) { 4592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Propagate constants through ptrtoint. 4602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Constant *COp = dyn_cast<Constant>(I.getOperand(0)); 4612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (!COp) 4622a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) COp = SimplifiedValues.lookup(I.getOperand(0)); 4632a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (COp) 4642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) { 4652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) SimplifiedValues[&I] = C; 4662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return true; 4672a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 4682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 4692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere. 4702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) disableSROA(I.getOperand(0)); 4712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 4722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); 4732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 4742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 4752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { 4762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Value *Operand = I.getOperand(0); 4772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Constant *COp = dyn_cast<Constant>(Operand); 4782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (!COp) 4792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) COp = SimplifiedValues.lookup(Operand); 4802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (COp) 4812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(), 4822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) COp, TD)) { 4832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) SimplifiedValues[&I] = C; 4842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return true; 4852a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 4862a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 4872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Disable any SROA on the argument to arbitrary unary operators. 4882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) disableSROA(Operand); 4892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 4902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return false; 4912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 4922a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 4932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::visitCmpInst(CmpInst &I) { 4942a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 4952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // First try to handle simplified comparisons. 4962a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (!isa<Constant>(LHS)) 4972a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) 4982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) LHS = SimpleLHS; 4992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (!isa<Constant>(RHS)) 5002a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) 5012a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) RHS = SimpleRHS; 5022a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (Constant *CLHS = dyn_cast<Constant>(LHS)) { 5032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (Constant *CRHS = dyn_cast<Constant>(RHS)) 5042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) { 5052a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) SimplifiedValues[&I] = C; 5062a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return true; 5072a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 5082a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 5092a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 5102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (I.getOpcode() == Instruction::FCmp) 5112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return false; 5122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 5132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Otherwise look for a comparison between constant offset pointers with 5142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // a common base. 5152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Value *LHSBase, *RHSBase; 5162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) APInt LHSOffset, RHSOffset; 5172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); 5182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (LHSBase) { 5192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); 5202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (RHSBase && LHSBase == RHSBase) { 5212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // We have common bases, fold the icmp to a constant based on the 5222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // offsets. 5232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); 5242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); 5252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { 5265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SimplifiedValues[&I] = C; 5275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++NumConstantPtrCmps; 5285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 5295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 5312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 5322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 5332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // If the comparison is an equality comparison with null, we can simplify it 5342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // for any alloca-derived argument. 5352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1))) 5362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (isAllocaDerivedArg(I.getOperand(0))) { 5372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // We can actually predict the result of comparisons between an 5382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // alloca-derived value and null. Note that this fires regardless of 5392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // SROA firing. 5402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE; 5412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType()) 5422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) : ConstantInt::getFalse(I.getType()); 5432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return true; 5442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 5452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 5462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Finally check for SROA candidates in comparisons. 5472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Value *SROAArg; 5482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) DenseMap<Value *, int>::iterator CostIt; 5492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { 5502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (isa<ConstantPointerNull>(I.getOperand(1))) { 5512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) accumulateSROACost(CostIt, InlineConstants::InstrCost); 5522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return true; 5532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 5542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 5552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) disableSROA(CostIt); 5565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 5592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 560eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch 5615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool CallAnalyzer::visitSub(BinaryOperator &I) { 5625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Try to handle a special case: we can fold computing the difference of two 5635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // constant-related pointers. 5645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 5655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Value *LHSBase, *RHSBase; 5665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) APInt LHSOffset, RHSOffset; 5675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); 5685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (LHSBase) { 5695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); 5705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (RHSBase && LHSBase == RHSBase) { 5712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // We have common bases, fold the subtract to a constant based on the 5722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // offsets. 5732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); 5742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); 5752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) { 5765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SimplifiedValues[&I] = C; 5775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++NumConstantPtrDiffs; 5785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 5792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 5805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Otherwise, fall back to the generic logic for simplifying and handling 5842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // instructions. 5852a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return Base::visitSub(I); 5862a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 5872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 5882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { 5892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 5902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (!isa<Constant>(LHS)) 5912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) 5922a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) LHS = SimpleLHS; 5932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (!isa<Constant>(RHS)) 5942a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) 5952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) RHS = SimpleRHS; 5962a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, TD); 5972a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) { 5982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) SimplifiedValues[&I] = C; 5992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return true; 6002a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 6015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Disable any SROA on arguments to arbitrary, unsimplified binary operators. 6032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) disableSROA(LHS); 6042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) disableSROA(RHS); 60590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 60690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) return false; 60790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)} 6082a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 6092a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::visitLoad(LoadInst &I) { 6102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Value *SROAArg; 6115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DenseMap<Value *, int>::iterator CostIt; 6125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { 6135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (I.isSimple()) { 6145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) accumulateSROACost(CostIt, InlineConstants::InstrCost); 6152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return true; 6165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 6172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 6185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) disableSROA(CostIt); 6195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 6202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 6212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return false; 6222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 6232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 6242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::visitStore(StoreInst &I) { 6255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Value *SROAArg; 6265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DenseMap<Value *, int>::iterator CostIt; 6272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { 6282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (I.isSimple()) { 6292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) accumulateSROACost(CostIt, InlineConstants::InstrCost); 6302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return true; 6315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 6325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) disableSROA(CostIt); 6342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 6355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return false; 6372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 6385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) { 6405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Constant folding for extract value is trivial. 6412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Constant *C = dyn_cast<Constant>(I.getAggregateOperand()); 6422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (!C) 6432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) C = SimplifiedValues.lookup(I.getAggregateOperand()); 6445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (C) { 6455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices()); 6462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return true; 6472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 6482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 6492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // SROA can look through these but give them a cost. 6505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 6515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 6522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 6535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool CallAnalyzer::visitInsertValue(InsertValueInst &I) { 6542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Constant folding for insert value is trivial. 6555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Constant *AggC = dyn_cast<Constant>(I.getAggregateOperand()); 6565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!AggC) 6572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) AggC = SimplifiedValues.lookup(I.getAggregateOperand()); 6582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Constant *InsertedC = dyn_cast<Constant>(I.getInsertedValueOperand()); 6592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (!InsertedC) 6602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand()); 6612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (AggC && InsertedC) { 6622a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) SimplifiedValues[&I] = ConstantExpr::getInsertValue(AggC, InsertedC, 6635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) I.getIndices()); 6642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return true; 6652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 6665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // SROA can look through these but give them a cost. 6682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return false; 6692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 6702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 6712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// \brief Try to simplify a call site. 6722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// 6732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// Takes a concrete function and callsite and tries to actually simplify it by 6742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// analyzing the arguments and call itself with instsimplify. Returns true if 6752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// it has simplified the callsite to some other entity (a constant), making it 6762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// free. 6775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) { 6785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // FIXME: Using the instsimplify logic directly for this is inefficient 6792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // because we have to continually rebuild the argument list even when no 6802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // simplifications can be performed. Until that is fixed with remapping 6812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // inside of instsimplify, directly constant fold calls here. 6825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!canConstantFoldCallTo(F)) 6832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return false; 6842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 6855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Try to re-map the arguments to constants. 6862a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) SmallVector<Constant *, 4> ConstantArgs; 6872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) ConstantArgs.reserve(CS.arg_size()); 6882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); 6892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) I != E; ++I) { 6902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Constant *C = dyn_cast<Constant>(*I); 6912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (!C) 6925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I)); 6932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (!C) 6942a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return false; // This argument doesn't map to a constant. 6955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ConstantArgs.push_back(C); 6975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 6982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (Constant *C = ConstantFoldCall(F, ConstantArgs)) { 6992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) SimplifiedValues[CS.getInstruction()] = C; 7002a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return true; 7012a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 7025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return false; 7042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 7055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 706bool CallAnalyzer::visitCallSite(CallSite CS) { 707 if (CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice() && 708 !F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, 709 Attribute::ReturnsTwice)) { 710 // This aborts the entire analysis. 711 ExposesReturnsTwice = true; 712 return false; 713 } 714 if (CS.isCall() && 715 cast<CallInst>(CS.getInstruction())->hasFnAttr(Attribute::NoDuplicate)) 716 ContainsNoDuplicateCall = true; 717 718 if (Function *F = CS.getCalledFunction()) { 719 // When we have a concrete function, first try to simplify it directly. 720 if (simplifyCallSite(F, CS)) 721 return true; 722 723 // Next check if it is an intrinsic we know about. 724 // FIXME: Lift this into part of the InstVisitor. 725 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) { 726 switch (II->getIntrinsicID()) { 727 default: 728 return Base::visitCallSite(CS); 729 730 case Intrinsic::memset: 731 case Intrinsic::memcpy: 732 case Intrinsic::memmove: 733 // SROA can usually chew through these intrinsics, but they aren't free. 734 return false; 735 } 736 } 737 738 if (F == CS.getInstruction()->getParent()->getParent()) { 739 // This flag will fully abort the analysis, so don't bother with anything 740 // else. 741 IsRecursiveCall = true; 742 return false; 743 } 744 745 if (TTI.isLoweredToCall(F)) { 746 // We account for the average 1 instruction per call argument setup 747 // here. 748 Cost += CS.arg_size() * InlineConstants::InstrCost; 749 750 // Everything other than inline ASM will also have a significant cost 751 // merely from making the call. 752 if (!isa<InlineAsm>(CS.getCalledValue())) 753 Cost += InlineConstants::CallPenalty; 754 } 755 756 return Base::visitCallSite(CS); 757 } 758 759 // Otherwise we're in a very special case -- an indirect function call. See 760 // if we can be particularly clever about this. 761 Value *Callee = CS.getCalledValue(); 762 763 // First, pay the price of the argument setup. We account for the average 764 // 1 instruction per call argument setup here. 765 Cost += CS.arg_size() * InlineConstants::InstrCost; 766 767 // Next, check if this happens to be an indirect function call to a known 768 // function in this inline context. If not, we've done all we can. 769 Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee)); 770 if (!F) 771 return Base::visitCallSite(CS); 772 773 // If we have a constant that we are calling as a function, we can peer 774 // through it and see the function target. This happens not infrequently 775 // during devirtualization and so we want to give it a hefty bonus for 776 // inlining, but cap that bonus in the event that inlining wouldn't pan 777 // out. Pretend to inline the function, with a custom threshold. 778 CallAnalyzer CA(TD, TTI, *F, InlineConstants::IndirectCallThreshold); 779 if (CA.analyzeCall(CS)) { 780 // We were able to inline the indirect call! Subtract the cost from the 781 // bonus we want to apply, but don't go below zero. 782 Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost()); 783 } 784 785 return Base::visitCallSite(CS); 786} 787 788bool CallAnalyzer::visitInstruction(Instruction &I) { 789 // Some instructions are free. All of the free intrinsics can also be 790 // handled by SROA, etc. 791 if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I)) 792 return true; 793 794 // We found something we don't understand or can't handle. Mark any SROA-able 795 // values in the operand list as no longer viable. 796 for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI) 797 disableSROA(*OI); 798 799 return false; 800} 801 802 803/// \brief Analyze a basic block for its contribution to the inline cost. 804/// 805/// This method walks the analyzer over every instruction in the given basic 806/// block and accounts for their cost during inlining at this callsite. It 807/// aborts early if the threshold has been exceeded or an impossible to inline 808/// construct has been detected. It returns false if inlining is no longer 809/// viable, and true if inlining remains viable. 810bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { 811 for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end()); 812 I != E; ++I) { 813 ++NumInstructions; 814 if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy()) 815 ++NumVectorInstructions; 816 817 // If the instruction simplified to a constant, there is no cost to this 818 // instruction. Visit the instructions using our InstVisitor to account for 819 // all of the per-instruction logic. The visit tree returns true if we 820 // consumed the instruction in any way, and false if the instruction's base 821 // cost should count against inlining. 822 if (Base::visit(I)) 823 ++NumInstructionsSimplified; 824 else 825 Cost += InlineConstants::InstrCost; 826 827 // If the visit this instruction detected an uninlinable pattern, abort. 828 if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) 829 return false; 830 831 // If the caller is a recursive function then we don't want to inline 832 // functions which allocate a lot of stack space because it would increase 833 // the caller stack usage dramatically. 834 if (IsCallerRecursive && 835 AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) 836 return false; 837 838 if (NumVectorInstructions > NumInstructions/2) 839 VectorBonus = FiftyPercentVectorBonus; 840 else if (NumVectorInstructions > NumInstructions/10) 841 VectorBonus = TenPercentVectorBonus; 842 else 843 VectorBonus = 0; 844 845 // Check if we've past the threshold so we don't spin in huge basic 846 // blocks that will never inline. 847 if (Cost > (Threshold + VectorBonus)) 848 return false; 849 } 850 851 return true; 852} 853 854/// \brief Compute the base pointer and cumulative constant offsets for V. 855/// 856/// This strips all constant offsets off of V, leaving it the base pointer, and 857/// accumulates the total constant offset applied in the returned constant. It 858/// returns 0 if V is not a pointer, and returns the constant '0' if there are 859/// no constant offsets applied. 860ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { 861 if (!TD || !V->getType()->isPointerTy()) 862 return 0; 863 864 unsigned IntPtrWidth = TD->getPointerSizeInBits(); 865 APInt Offset = APInt::getNullValue(IntPtrWidth); 866 867 // Even though we don't look through PHI nodes, we could be called on an 868 // instruction in an unreachable block, which may be on a cycle. 869 SmallPtrSet<Value *, 4> Visited; 870 Visited.insert(V); 871 do { 872 if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { 873 if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset)) 874 return 0; 875 V = GEP->getPointerOperand(); 876 } else if (Operator::getOpcode(V) == Instruction::BitCast) { 877 V = cast<Operator>(V)->getOperand(0); 878 } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { 879 if (GA->mayBeOverridden()) 880 break; 881 V = GA->getAliasee(); 882 } else { 883 break; 884 } 885 assert(V->getType()->isPointerTy() && "Unexpected operand type!"); 886 } while (Visited.insert(V)); 887 888 Type *IntPtrTy = TD->getIntPtrType(V->getContext()); 889 return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset)); 890} 891 892/// \brief Analyze a call site for potential inlining. 893/// 894/// Returns true if inlining this call is viable, and false if it is not 895/// viable. It computes the cost and adjusts the threshold based on numerous 896/// factors and heuristics. If this method returns false but the computed cost 897/// is below the computed threshold, then inlining was forcibly disabled by 898/// some artifact of the routine. 899bool CallAnalyzer::analyzeCall(CallSite CS) { 900 ++NumCallsAnalyzed; 901 902 // Track whether the post-inlining function would have more than one basic 903 // block. A single basic block is often intended for inlining. Balloon the 904 // threshold by 50% until we pass the single-BB phase. 905 bool SingleBB = true; 906 int SingleBBBonus = Threshold / 2; 907 Threshold += SingleBBBonus; 908 909 // Perform some tweaks to the cost and threshold based on the direct 910 // callsite information. 911 912 // We want to more aggressively inline vector-dense kernels, so up the 913 // threshold, and we'll lower it if the % of vector instructions gets too 914 // low. 915 assert(NumInstructions == 0); 916 assert(NumVectorInstructions == 0); 917 FiftyPercentVectorBonus = Threshold; 918 TenPercentVectorBonus = Threshold / 2; 919 920 // Give out bonuses per argument, as the instructions setting them up will 921 // be gone after inlining. 922 for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { 923 if (TD && CS.isByValArgument(I)) { 924 // We approximate the number of loads and stores needed by dividing the 925 // size of the byval type by the target's pointer size. 926 PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType()); 927 unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType()); 928 unsigned PointerSize = TD->getPointerSizeInBits(); 929 // Ceiling division. 930 unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; 931 932 // If it generates more than 8 stores it is likely to be expanded as an 933 // inline memcpy so we take that as an upper bound. Otherwise we assume 934 // one load and one store per word copied. 935 // FIXME: The maxStoresPerMemcpy setting from the target should be used 936 // here instead of a magic number of 8, but it's not available via 937 // DataLayout. 938 NumStores = std::min(NumStores, 8U); 939 940 Cost -= 2 * NumStores * InlineConstants::InstrCost; 941 } else { 942 // For non-byval arguments subtract off one instruction per call 943 // argument. 944 Cost -= InlineConstants::InstrCost; 945 } 946 } 947 948 // If there is only one call of the function, and it has internal linkage, 949 // the cost of inlining it drops dramatically. 950 bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() && 951 &F == CS.getCalledFunction(); 952 if (OnlyOneCallAndLocalLinkage) 953 Cost += InlineConstants::LastCallToStaticBonus; 954 955 // If the instruction after the call, or if the normal destination of the 956 // invoke is an unreachable instruction, the function is noreturn. As such, 957 // there is little point in inlining this unless there is literally zero 958 // cost. 959 Instruction *Instr = CS.getInstruction(); 960 if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) { 961 if (isa<UnreachableInst>(II->getNormalDest()->begin())) 962 Threshold = 1; 963 } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr))) 964 Threshold = 1; 965 966 // If this function uses the coldcc calling convention, prefer not to inline 967 // it. 968 if (F.getCallingConv() == CallingConv::Cold) 969 Cost += InlineConstants::ColdccPenalty; 970 971 // Check if we're done. This can happen due to bonuses and penalties. 972 if (Cost > Threshold) 973 return false; 974 975 if (F.empty()) 976 return true; 977 978 Function *Caller = CS.getInstruction()->getParent()->getParent(); 979 // Check if the caller function is recursive itself. 980 for (Value::use_iterator U = Caller->use_begin(), E = Caller->use_end(); 981 U != E; ++U) { 982 CallSite Site(cast<Value>(*U)); 983 if (!Site) 984 continue; 985 Instruction *I = Site.getInstruction(); 986 if (I->getParent()->getParent() == Caller) { 987 IsCallerRecursive = true; 988 break; 989 } 990 } 991 992 // Track whether we've seen a return instruction. The first return 993 // instruction is free, as at least one will usually disappear in inlining. 994 bool HasReturn = false; 995 996 // Populate our simplified values by mapping from function arguments to call 997 // arguments with known important simplifications. 998 CallSite::arg_iterator CAI = CS.arg_begin(); 999 for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end(); 1000 FAI != FAE; ++FAI, ++CAI) { 1001 assert(CAI != CS.arg_end()); 1002 if (Constant *C = dyn_cast<Constant>(CAI)) 1003 SimplifiedValues[FAI] = C; 1004 1005 Value *PtrArg = *CAI; 1006 if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) { 1007 ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue()); 1008 1009 // We can SROA any pointer arguments derived from alloca instructions. 1010 if (isa<AllocaInst>(PtrArg)) { 1011 SROAArgValues[FAI] = PtrArg; 1012 SROAArgCosts[PtrArg] = 0; 1013 } 1014 } 1015 } 1016 NumConstantArgs = SimplifiedValues.size(); 1017 NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); 1018 NumAllocaArgs = SROAArgValues.size(); 1019 1020 // The worklist of live basic blocks in the callee *after* inlining. We avoid 1021 // adding basic blocks of the callee which can be proven to be dead for this 1022 // particular call site in order to get more accurate cost estimates. This 1023 // requires a somewhat heavyweight iteration pattern: we need to walk the 1024 // basic blocks in a breadth-first order as we insert live successors. To 1025 // accomplish this, prioritizing for small iterations because we exit after 1026 // crossing our threshold, we use a small-size optimized SetVector. 1027 typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>, 1028 SmallPtrSet<BasicBlock *, 16> > BBSetVector; 1029 BBSetVector BBWorklist; 1030 BBWorklist.insert(&F.getEntryBlock()); 1031 // Note that we *must not* cache the size, this loop grows the worklist. 1032 for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { 1033 // Bail out the moment we cross the threshold. This means we'll under-count 1034 // the cost, but only when undercounting doesn't matter. 1035 if (Cost > (Threshold + VectorBonus)) 1036 break; 1037 1038 BasicBlock *BB = BBWorklist[Idx]; 1039 if (BB->empty()) 1040 continue; 1041 1042 // Handle the terminator cost here where we can track returns and other 1043 // function-wide constructs. 1044 TerminatorInst *TI = BB->getTerminator(); 1045 1046 // We never want to inline functions that contain an indirectbr. This is 1047 // incorrect because all the blockaddress's (in static global initializers 1048 // for example) would be referring to the original function, and this 1049 // indirect jump would jump from the inlined copy of the function into the 1050 // original function which is extremely undefined behavior. 1051 // FIXME: This logic isn't really right; we can safely inline functions 1052 // with indirectbr's as long as no other function or global references the 1053 // blockaddress of a block within the current function. And as a QOI issue, 1054 // if someone is using a blockaddress without an indirectbr, and that 1055 // reference somehow ends up in another function or global, we probably 1056 // don't want to inline this function. 1057 if (isa<IndirectBrInst>(TI)) 1058 return false; 1059 1060 if (!HasReturn && isa<ReturnInst>(TI)) 1061 HasReturn = true; 1062 else 1063 Cost += InlineConstants::InstrCost; 1064 1065 // Analyze the cost of this block. If we blow through the threshold, this 1066 // returns false, and we can bail on out. 1067 if (!analyzeBlock(BB)) { 1068 if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) 1069 return false; 1070 1071 // If the caller is a recursive function then we don't want to inline 1072 // functions which allocate a lot of stack space because it would increase 1073 // the caller stack usage dramatically. 1074 if (IsCallerRecursive && 1075 AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) 1076 return false; 1077 1078 break; 1079 } 1080 1081 // Add in the live successors by first checking whether we have terminator 1082 // that may be simplified based on the values simplified by this call. 1083 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 1084 if (BI->isConditional()) { 1085 Value *Cond = BI->getCondition(); 1086 if (ConstantInt *SimpleCond 1087 = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { 1088 BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0)); 1089 continue; 1090 } 1091 } 1092 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { 1093 Value *Cond = SI->getCondition(); 1094 if (ConstantInt *SimpleCond 1095 = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { 1096 BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor()); 1097 continue; 1098 } 1099 } 1100 1101 // If we're unable to select a particular successor, just count all of 1102 // them. 1103 for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; 1104 ++TIdx) 1105 BBWorklist.insert(TI->getSuccessor(TIdx)); 1106 1107 // If we had any successors at this point, than post-inlining is likely to 1108 // have them as well. Note that we assume any basic blocks which existed 1109 // due to branches or switches which folded above will also fold after 1110 // inlining. 1111 if (SingleBB && TI->getNumSuccessors() > 1) { 1112 // Take off the bonus we applied to the threshold. 1113 Threshold -= SingleBBBonus; 1114 SingleBB = false; 1115 } 1116 } 1117 1118 // If this is a noduplicate call, we can still inline as long as 1119 // inlining this would cause the removal of the caller (so the instruction 1120 // is not actually duplicated, just moved). 1121 if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall) 1122 return false; 1123 1124 Threshold += VectorBonus; 1125 1126 return Cost < Threshold; 1127} 1128 1129#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1130/// \brief Dump stats about this call's analysis. 1131void CallAnalyzer::dump() { 1132#define DEBUG_PRINT_STAT(x) llvm::dbgs() << " " #x ": " << x << "\n" 1133 DEBUG_PRINT_STAT(NumConstantArgs); 1134 DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs); 1135 DEBUG_PRINT_STAT(NumAllocaArgs); 1136 DEBUG_PRINT_STAT(NumConstantPtrCmps); 1137 DEBUG_PRINT_STAT(NumConstantPtrDiffs); 1138 DEBUG_PRINT_STAT(NumInstructionsSimplified); 1139 DEBUG_PRINT_STAT(SROACostSavings); 1140 DEBUG_PRINT_STAT(SROACostSavingsLost); 1141 DEBUG_PRINT_STAT(ContainsNoDuplicateCall); 1142#undef DEBUG_PRINT_STAT 1143} 1144#endif 1145 1146INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", 1147 true, true) 1148INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) 1149INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", 1150 true, true) 1151 1152char InlineCostAnalysis::ID = 0; 1153 1154InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID), TD(0) {} 1155 1156InlineCostAnalysis::~InlineCostAnalysis() {} 1157 1158void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { 1159 AU.setPreservesAll(); 1160 AU.addRequired<TargetTransformInfo>(); 1161 CallGraphSCCPass::getAnalysisUsage(AU); 1162} 1163 1164bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) { 1165 TD = getAnalysisIfAvailable<DataLayout>(); 1166 TTI = &getAnalysis<TargetTransformInfo>(); 1167 return false; 1168} 1169 1170InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) { 1171 return getInlineCost(CS, CS.getCalledFunction(), Threshold); 1172} 1173 1174/// \brief Test that two functions either have or have not the given attribute 1175/// at the same time. 1176static bool attributeMatches(Function *F1, Function *F2, 1177 Attribute::AttrKind Attr) { 1178 return F1->hasFnAttribute(Attr) == F2->hasFnAttribute(Attr); 1179} 1180 1181/// \brief Test that there are no attribute conflicts between Caller and Callee 1182/// that prevent inlining. 1183static bool functionsHaveCompatibleAttributes(Function *Caller, 1184 Function *Callee) { 1185 return attributeMatches(Caller, Callee, Attribute::SanitizeAddress) && 1186 attributeMatches(Caller, Callee, Attribute::SanitizeMemory) && 1187 attributeMatches(Caller, Callee, Attribute::SanitizeThread); 1188} 1189 1190InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, 1191 int Threshold) { 1192 // Cannot inline indirect calls. 1193 if (!Callee) 1194 return llvm::InlineCost::getNever(); 1195 1196 // Calls to functions with always-inline attributes should be inlined 1197 // whenever possible. 1198 if (Callee->hasFnAttribute(Attribute::AlwaysInline)) { 1199 if (isInlineViable(*Callee)) 1200 return llvm::InlineCost::getAlways(); 1201 return llvm::InlineCost::getNever(); 1202 } 1203 1204 // Never inline functions with conflicting attributes (unless callee has 1205 // always-inline attribute). 1206 if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee)) 1207 return llvm::InlineCost::getNever(); 1208 1209 // Don't inline this call if the caller has the optnone attribute. 1210 if (CS.getCaller()->hasFnAttribute(Attribute::OptimizeNone)) 1211 return llvm::InlineCost::getNever(); 1212 1213 // Don't inline functions which can be redefined at link-time to mean 1214 // something else. Don't inline functions marked noinline or call sites 1215 // marked noinline. 1216 if (Callee->mayBeOverridden() || 1217 Callee->hasFnAttribute(Attribute::NoInline) || CS.isNoInline()) 1218 return llvm::InlineCost::getNever(); 1219 1220 DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() 1221 << "...\n"); 1222 1223 CallAnalyzer CA(TD, *TTI, *Callee, Threshold); 1224 bool ShouldInline = CA.analyzeCall(CS); 1225 1226 DEBUG(CA.dump()); 1227 1228 // Check if there was a reason to force inlining or no inlining. 1229 if (!ShouldInline && CA.getCost() < CA.getThreshold()) 1230 return InlineCost::getNever(); 1231 if (ShouldInline && CA.getCost() >= CA.getThreshold()) 1232 return InlineCost::getAlways(); 1233 1234 return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); 1235} 1236 1237bool InlineCostAnalysis::isInlineViable(Function &F) { 1238 bool ReturnsTwice = 1239 F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, 1240 Attribute::ReturnsTwice); 1241 for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { 1242 // Disallow inlining of functions which contain an indirect branch. 1243 if (isa<IndirectBrInst>(BI->getTerminator())) 1244 return false; 1245 1246 for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; 1247 ++II) { 1248 CallSite CS(II); 1249 if (!CS) 1250 continue; 1251 1252 // Disallow recursive calls. 1253 if (&F == CS.getCalledFunction()) 1254 return false; 1255 1256 // Disallow calls which expose returns-twice to a function not previously 1257 // attributed as such. 1258 if (!ReturnsTwice && CS.isCall() && 1259 cast<CallInst>(CS.getInstruction())->canReturnTwice()) 1260 return false; 1261 } 1262 } 1263 1264 return true; 1265} 1266