InlineCost.cpp revision fe45fd084db872f9c7106c26e52c1cc8c9cba3a5
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//                     The LLVM Compiler Infrastructure
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
52a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// This file is distributed under the University of Illinois Open Source
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// License. See LICENSE.TXT for details.
72a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//
82a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//===----------------------------------------------------------------------===//
92a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//
102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// This file implements inline cost analysis.
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//===----------------------------------------------------------------------===//
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define DEBUG_TYPE "inline-cost"
15868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "llvm/Analysis/InlineCost.h"
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/ADT/STLExtras.h"
17868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "llvm/ADT/SetVector.h"
18868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "llvm/ADT/SmallPtrSet.h"
19868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "llvm/ADT/SmallVector.h"
20868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "llvm/ADT/Statistic.h"
212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/Analysis/ConstantFolding.h"
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Analysis/InstructionSimplify.h"
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Analysis/TargetTransformInfo.h"
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/IR/CallingConv.h"
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/IR/DataLayout.h"
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/IR/GlobalAlias.h"
2790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)#include "llvm/IR/IntrinsicInst.h"
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/IR/Operator.h"
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/InstVisitor.h"
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Support/CallSite.h"
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Support/Debug.h"
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Support/GetElementPtrTypeIterator.h"
332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/Support/raw_ostream.h"
342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using namespace llvm;
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace {
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
41c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  typedef InstVisitor<CallAnalyzer, bool> Base;
432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  friend class InstVisitor<CallAnalyzer, bool>;
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // DataLayout if available, or null.
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const DataLayout *const TD;
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  /// The TargetTransformInfo available for this compilation.
492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  const TargetTransformInfo &TTI;
502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // The called function.
522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  Function &F;
532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  int Threshold;
552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  int Cost;
562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  bool IsCallerRecursive;
582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  bool IsRecursiveCall;
592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  bool ExposesReturnsTwice;
602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  bool HasDynamicAlloca;
612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  bool ContainsNoDuplicateCall;
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  /// Number of bytes allocated statically by the callee.
642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  uint64_t AllocatedSize;
652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  unsigned NumInstructions, NumVectorInstructions;
662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  int FiftyPercentVectorBonus, TenPercentVectorBonus;
672a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  int VectorBonus;
682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // While we walk the potentially-inlined instructions, we build up and
702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // maintain a mapping of simplified values specific to this callsite. The
712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // idea is to propagate any special information we have about arguments to
722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // this call through the inlinable section of the function, and account for
732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // likely simplifications post-inlining. The most important aspect we track
742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // is CFG altering simplifications -- when we prove a basic block dead, that
752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // can cause dramatic shifts in the cost of inlining a function.
762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  DenseMap<Value *, Constant *> SimplifiedValues;
772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Keep track of the values which map back (through function arguments) to
792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // allocas on the caller stack which could be simplified through SROA.
802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  DenseMap<Value *, Value *> SROAArgValues;
812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // The mapping of caller Alloca values to their accumulated cost savings. If
832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // we have to disable SROA for one of the allocas, this tells us how much
842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // cost must be added.
852a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  DenseMap<Value *, int> SROAArgCosts;
862a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Keep track of values which map to a pointer base and constant offset.
882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  DenseMap<Value *, std::pair<Value *, APInt> > ConstantOffsetPtrs;
892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Custom simplification helper routines.
912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  bool isAllocaDerivedArg(Value *V);
922a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  bool lookupSROAArgAndCost(Value *V, Value *&Arg,
932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                            DenseMap<Value *, int>::iterator &CostIt);
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void disableSROA(DenseMap<Value *, int>::iterator CostIt);
952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  void disableSROA(Value *V);
962a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
972a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                          int InstructionCost);
982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  bool handleSROACandidate(bool IsSROAValid,
992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                           DenseMap<Value *, int>::iterator CostIt,
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                           int InstructionCost);
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool isGEPOffsetConstant(GetElementPtrInst &GEP);
1022a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
1032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  bool simplifyCallSite(Function *F, CallSite CS);
1042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Custom analysis routines.
1072a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  bool analyzeBlock(BasicBlock *BB);
1082a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1092a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Disable several entry points to the visitor so we don't accidentally use
1102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // them by declaring but not defining them here.
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void visit(Module *);     void visit(Module &);
1122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  void visit(Function *);   void visit(Function &);
1132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  void visit(BasicBlock *); void visit(BasicBlock &);
1142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Provide base case for our instruction visit.
1162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  bool visitInstruction(Instruction &I);
1172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Our visit overrides.
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool visitAlloca(AllocaInst &I);
1202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  bool visitPHI(PHINode &I);
1212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  bool visitGetElementPtr(GetElementPtrInst &I);
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool visitBitCast(BitCastInst &I);
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool visitPtrToInt(PtrToIntInst &I);
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool visitIntToPtr(IntToPtrInst &I);
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool visitCastInst(CastInst &I);
1262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  bool visitUnaryInstruction(UnaryInstruction &I);
1272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  bool visitCmpInst(CmpInst &I);
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool visitSub(BinaryOperator &I);
129b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)  bool visitBinaryOperator(BinaryOperator &I);
1302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  bool visitLoad(LoadInst &I);
1312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  bool visitStore(StoreInst &I);
1322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  bool visitExtractValue(ExtractValueInst &I);
1332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  bool visitInsertValue(InsertValueInst &I);
1342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  bool visitCallSite(CallSite CS);
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)public:
1372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  CallAnalyzer(const DataLayout *TD, const TargetTransformInfo &TTI,
1382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)               Function &Callee, int Threshold)
1392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      : TD(TD), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0),
1402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        IsCallerRecursive(false), IsRecursiveCall(false),
1412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        ExposesReturnsTwice(false), HasDynamicAlloca(false),
1422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        ContainsNoDuplicateCall(false), AllocatedSize(0), NumInstructions(0),
1432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        NumVectorInstructions(0), FiftyPercentVectorBonus(0),
1442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
1452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
1462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
1472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        SROACostSavings(0), SROACostSavingsLost(0) {}
1482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  bool analyzeCall(CallSite CS);
1502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
151b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)  int getThreshold() { return Threshold; }
1522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  int getCost() { return Cost; }
1532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
154c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Keep a bunch of stats about the cost savings found so we can print them
155c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // out when debugging.
156c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  unsigned NumConstantArgs;
1572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  unsigned NumConstantOffsetPtrArgs;
1582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  unsigned NumAllocaArgs;
159b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles)  unsigned NumConstantPtrCmps;
160c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  unsigned NumConstantPtrDiffs;
161c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  unsigned NumInstructionsSimplified;
162c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  unsigned SROACostSavings;
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  unsigned SROACostSavingsLost;
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  void dump();
1662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)};
167c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
1682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} // namespace
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// \brief Test whether the given value is an Alloca-derived function argument.
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return SROAArgValues.count(V);
1732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
1742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to.
1762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// Returns false if V does not map to a SROA-candidate.
1772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::lookupSROAArgAndCost(
1782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) {
1792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (SROAArgValues.empty() || SROAArgCosts.empty())
1802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return false;
1812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
1832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (ArgIt == SROAArgValues.end())
1842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return false;
1852a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1862a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  Arg = ArgIt->second;
1872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  CostIt = SROAArgCosts.find(Arg);
1882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return CostIt != SROAArgCosts.end();
1892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
1902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// \brief Disable SROA for the candidate marked by this cost iterator.
1922a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)///
1932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// This marks the candidate as no longer viable for SROA, and adds the cost
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// savings associated with it back into the inline cost measurement.
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
1962a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // If we're no longer able to perform SROA we need to undo its cost savings
1972a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // and prevent subsequent analysis.
1982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  Cost += CostIt->second;
1992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  SROACostSavings -= CostIt->second;
2002a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  SROACostSavingsLost += CostIt->second;
2012a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  SROAArgCosts.erase(CostIt);
2022a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
2032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
2042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// \brief If 'V' maps to a SROA candidate, disable SROA for it.
2052a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)void CallAnalyzer::disableSROA(Value *V) {
2062a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  Value *SROAArg;
2072a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  DenseMap<Value *, int>::iterator CostIt;
2082a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (lookupSROAArgAndCost(V, SROAArg, CostIt))
2092a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    disableSROA(CostIt);
2102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
2112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
2122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// \brief Accumulate the given cost for a particular SROA candidate.
2132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
2142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                      int InstructionCost) {
2152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  CostIt->second += InstructionCost;
2162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  SROACostSavings += InstructionCost;
2172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
2182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
2192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// \brief Helper for the common pattern of handling a SROA candidate.
2202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// Either accumulates the cost savings if the SROA remains valid, or disables
2212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// SROA for the candidate.
2222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::handleSROACandidate(bool IsSROAValid,
2232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                       DenseMap<Value *, int>::iterator CostIt,
2242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                       int InstructionCost) {
2252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (IsSROAValid) {
2262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    accumulateSROACost(CostIt, InstructionCost);
2272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return true;
2282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
2292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
2302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  disableSROA(CostIt);
2312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return false;
2322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
2332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
2342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// \brief Check whether a GEP's indices are all constant.
2352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)///
2362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// Respects any simplified values known during the analysis of this callsite.
237868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) {
238868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
2392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
2402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      return false;
2412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
2422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return true;
2432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
2442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
2452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// \brief Accumulate a constant GEP offset into an APInt if possible.
2462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)///
2472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// Returns false if unable to compute the offset for any reason. Respects any
2482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// simplified values known during the analysis of this callsite.
2492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
2502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (!TD)
2512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return false;
2522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  unsigned IntPtrWidth = TD->getPointerSizeInBits();
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  assert(IntPtrWidth == Offset.getBitWidth());
255c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
256c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
257c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)       GTI != GTE; ++GTI) {
258eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
259c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (!OpC)
260c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
261c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)        OpC = dyn_cast<ConstantInt>(SimpleOp);
262c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (!OpC)
263c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      return false;
264c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (OpC->isZero()) continue;
265c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
266c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    // Handle a struct index, which adds its field offset to the pointer.
267c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
268c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      unsigned ElementIdx = OpC->getZExtValue();
269c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      const StructLayout *SL = TD->getStructLayout(STy);
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      continue;
2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
2742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    APInt TypeSize(IntPtrWidth, TD->getTypeAllocSize(GTI.getIndexedType()));
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
2762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
2772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return true;
2782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
2792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
2802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::visitAlloca(AllocaInst &I) {
2812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // FIXME: Check whether inlining will turn a dynamic alloca into a static
2822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // alloca, and handle that case.
2832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
2842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Accumulate the allocated size.
2852a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (I.isStaticAlloca()) {
2862a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    Type *Ty = I.getAllocatedType();
2872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    AllocatedSize += (TD ? TD->getTypeAllocSize(Ty) :
2882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                      Ty->getPrimitiveSizeInBits());
2897dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  }
2902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
2912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // We will happily inline static alloca instructions.
2922a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (I.isStaticAlloca())
2932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return Base::visitAlloca(I);
2947dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
2957dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  // FIXME: This is overly conservative. Dynamic allocas are inefficient for
296c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // a variety of reasons, and so we would like to not inline them into
2972a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // functions which don't currently have a dynamic alloca. This simply
2982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // disables inlining altogether in the presence of a dynamic alloca.
2992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  HasDynamicAlloca = true;
3002a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return false;
3012a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
3022a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
3032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::visitPHI(PHINode &I) {
3042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // FIXME: We should potentially be tracking values through phi nodes,
3052a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // especially when they collapse to a single value due to deleted CFG edges
3062a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // during inlining.
3077dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch
3087dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  // FIXME: We need to propagate SROA *disabling* through phi nodes, even
3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // though we don't want to propagate it's bonuses. The idea is to disable
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // SROA if it *might* be used in an inappropriate manner.
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Phi nodes are always zero-cost.
3132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return true;
3142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
3152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
3162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
3172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  Value *SROAArg;
3182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  DenseMap<Value *, int>::iterator CostIt;
3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(),
3202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                            SROAArg, CostIt);
3212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
3222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Try to fold GEPs of constant-offset call site argument pointers. This
3232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // requires target data and inbounds GEPs.
3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (TD && I.isInBounds()) {
3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Check if we have a base + offset for the pointer.
3262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    Value *Ptr = I.getPointerOperand();
3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (BaseAndOffset.first) {
3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Check if the offset of this GEP is constant, and if so accumulate it
3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // into Offset.
3312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) {
3322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        // Non-constant GEPs aren't folded, and disable SROA.
3332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        if (SROACandidate)
3342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)          disableSROA(CostIt);
3352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        return false;
3362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      }
3372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
3382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      // Add the result as a new mapping to Base + Offset.
3392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      ConstantOffsetPtrs[&I] = BaseAndOffset;
3402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
3412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      // Also handle SROA candidates here, we already know that the GEP is
3422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      // all-constant indexed.
3432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      if (SROACandidate)
3442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        SROAArgValues[&I] = SROAArg;
3452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
3462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      return true;
3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (isGEPOffsetConstant(I)) {
3512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (SROACandidate)
3522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      SROAArgValues[&I] = SROAArg;
3532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
3542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // Constant GEPs are modeled as free.
3552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return true;
3562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
3572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
3582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Variable GEPs will require math and will disable SROA.
3592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (SROACandidate)
3602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    disableSROA(CostIt);
3612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return false;
3622a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
3632a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
3642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::visitBitCast(BitCastInst &I) {
3652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Propagate constants through bitcasts.
3662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  Constant *COp = dyn_cast<Constant>(I.getOperand(0));
3672a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (!COp)
3682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    COp = SimplifiedValues.lookup(I.getOperand(0));
3692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (COp)
3702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) {
3712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      SimplifiedValues[&I] = C;
37290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      return true;
37390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)    }
37490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Track base/offsets through casts
3762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  std::pair<Value *, APInt> BaseAndOffset
3772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    = ConstantOffsetPtrs.lookup(I.getOperand(0));
3782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Casts don't change the offset, just wrap it up.
3792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (BaseAndOffset.first)
3802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    ConstantOffsetPtrs[&I] = BaseAndOffset;
3812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
3822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Also look for SROA candidates here.
3832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  Value *SROAArg;
3842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  DenseMap<Value *, int>::iterator CostIt;
3852a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
3862a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    SROAArgValues[&I] = SROAArg;
3872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
3882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Bitcasts are always zero cost.
3892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return true;
3902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
3912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
3922a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
3932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Propagate constants through ptrtoint.
3942a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  Constant *COp = dyn_cast<Constant>(I.getOperand(0));
3952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (!COp)
3962a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    COp = SimplifiedValues.lookup(I.getOperand(0));
3972a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (COp)
3982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) {
3992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      SimplifiedValues[&I] = C;
4002a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      return true;
4012a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
4022a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
4032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Track base/offset pairs when converted to a plain integer provided the
4042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // integer is large enough to represent the pointer.
4052a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  unsigned IntegerSize = I.getType()->getScalarSizeInBits();
4062a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (TD && IntegerSize >= TD->getPointerSizeInBits()) {
407868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    std::pair<Value *, APInt> BaseAndOffset
408868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)      = ConstantOffsetPtrs.lookup(I.getOperand(0));
4092a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (BaseAndOffset.first)
4102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      ConstantOffsetPtrs[&I] = BaseAndOffset;
4112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
4122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
4132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // This is really weird. Technically, ptrtoint will disable SROA. However,
4142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // unless that ptrtoint is *used* somewhere in the live basic blocks after
4152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // inlining, it will be nuked, and SROA should proceed. All of the uses which
4162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // would block SROA would also block SROA if applied directly to a pointer,
4172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // and so we can just add the integer in here. The only places where SROA is
4182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // preserved either cannot fire on an integer, or won't in-and-of themselves
4192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // disable SROA (ext) w/o some later use that we would see and disable.
4202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  Value *SROAArg;
4212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  DenseMap<Value *, int>::iterator CostIt;
4222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
4232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    SROAArgValues[&I] = SROAArg;
4242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
4252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
4262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
4272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
4282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
4292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Propagate constants through ptrtoint.
4302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  Constant *COp = dyn_cast<Constant>(I.getOperand(0));
4312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (!COp)
4322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    COp = SimplifiedValues.lookup(I.getOperand(0));
4332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (COp)
4342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) {
4352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      SimplifiedValues[&I] = C;
4362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      return true;
4372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
4382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
4392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Track base/offset pairs when round-tripped through a pointer without
4407dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  // modifications provided the integer is not too large.
4412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  Value *Op = I.getOperand(0);
4427dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
4432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (TD && IntegerSize <= TD->getPointerSizeInBits()) {
4442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
4452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (BaseAndOffset.first)
4462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      ConstantOffsetPtrs[&I] = BaseAndOffset;
4472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
4482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
4492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // "Propagate" SROA here in the same manner as we do for ptrtoint above.
4507dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch  Value *SROAArg;
4512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  DenseMap<Value *, int>::iterator CostIt;
4522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
4532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    SROAArgValues[&I] = SROAArg;
4542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
4552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
4562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
4572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
4582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::visitCastInst(CastInst &I) {
4592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Propagate constants through ptrtoint.
4602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  Constant *COp = dyn_cast<Constant>(I.getOperand(0));
4612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (!COp)
4622a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    COp = SimplifiedValues.lookup(I.getOperand(0));
4632a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (COp)
4642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) {
4652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      SimplifiedValues[&I] = C;
4662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      return true;
4672a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
4682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
4692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
4702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  disableSROA(I.getOperand(0));
4712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
4722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
4732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
4742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
4752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
4762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  Value *Operand = I.getOperand(0);
4772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  Constant *COp = dyn_cast<Constant>(Operand);
4782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (!COp)
4792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    COp = SimplifiedValues.lookup(Operand);
4802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (COp)
4812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(),
4822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                               COp, TD)) {
4832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      SimplifiedValues[&I] = C;
4842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      return true;
4852a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
4862a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
4872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Disable any SROA on the argument to arbitrary unary operators.
4882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  disableSROA(Operand);
4892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
4902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return false;
4912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
4922a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
4932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::visitCmpInst(CmpInst &I) {
4942a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
4952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // First try to handle simplified comparisons.
4962a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (!isa<Constant>(LHS))
4972a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
4982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      LHS = SimpleLHS;
4992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (!isa<Constant>(RHS))
5002a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
5012a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      RHS = SimpleRHS;
5022a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
5032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (Constant *CRHS = dyn_cast<Constant>(RHS))
5042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) {
5052a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        SimplifiedValues[&I] = C;
5062a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        return true;
5072a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      }
5082a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
5092a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
5102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (I.getOpcode() == Instruction::FCmp)
5112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return false;
5122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
5132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Otherwise look for a comparison between constant offset pointers with
5142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // a common base.
5152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  Value *LHSBase, *RHSBase;
5162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  APInt LHSOffset, RHSOffset;
5172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
5182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (LHSBase) {
5192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
5202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (RHSBase && LHSBase == RHSBase) {
5212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      // We have common bases, fold the icmp to a constant based on the
5222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      // offsets.
5232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
5242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
5252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
5265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        SimplifiedValues[&I] = C;
5275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ++NumConstantPtrCmps;
5285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return true;
5295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
5302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
5312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
5322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
5332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // If the comparison is an equality comparison with null, we can simplify it
5342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // for any alloca-derived argument.
5352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)))
5362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (isAllocaDerivedArg(I.getOperand(0))) {
5372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      // We can actually predict the result of comparisons between an
5382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      // alloca-derived value and null. Note that this fires regardless of
5392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      // SROA firing.
5402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
5412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
5422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                        : ConstantInt::getFalse(I.getType());
5432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      return true;
5442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
5452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
5462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Finally check for SROA candidates in comparisons.
5472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  Value *SROAArg;
5482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  DenseMap<Value *, int>::iterator CostIt;
5492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
5502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (isa<ConstantPointerNull>(I.getOperand(1))) {
5512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      accumulateSROACost(CostIt, InlineConstants::InstrCost);
5522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      return true;
5532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
5542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
5552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    disableSROA(CostIt);
5565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return false;
5592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
560eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch
5615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool CallAnalyzer::visitSub(BinaryOperator &I) {
5625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Try to handle a special case: we can fold computing the difference of two
5635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // constant-related pointers.
5645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
5655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Value *LHSBase, *RHSBase;
5665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  APInt LHSOffset, RHSOffset;
5675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
5685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (LHSBase) {
5695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
5705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (RHSBase && LHSBase == RHSBase) {
5712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      // We have common bases, fold the subtract to a constant based on the
5722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      // offsets.
5732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
5742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
5752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
5765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        SimplifiedValues[&I] = C;
5775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ++NumConstantPtrDiffs;
5785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return true;
5792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      }
5805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
5815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Otherwise, fall back to the generic logic for simplifying and handling
5842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // instructions.
5852a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return Base::visitSub(I);
5862a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
5872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
5882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
5892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
5902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (!isa<Constant>(LHS))
5912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
5922a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      LHS = SimpleLHS;
5932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (!isa<Constant>(RHS))
5942a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
5952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      RHS = SimpleRHS;
5962a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, TD);
5972a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) {
5982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    SimplifiedValues[&I] = C;
5992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return true;
6002a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
6015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
6032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  disableSROA(LHS);
6042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  disableSROA(RHS);
60590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
60690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  return false;
60790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)}
6082a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
6092a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::visitLoad(LoadInst &I) {
6102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  Value *SROAArg;
6115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DenseMap<Value *, int>::iterator CostIt;
6125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
6135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (I.isSimple()) {
6145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      accumulateSROACost(CostIt, InlineConstants::InstrCost);
6152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      return true;
6165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
6172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
6185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    disableSROA(CostIt);
6195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
6202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
6212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return false;
6222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
6232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
6242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool CallAnalyzer::visitStore(StoreInst &I) {
6255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Value *SROAArg;
6265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DenseMap<Value *, int>::iterator CostIt;
6272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
6282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (I.isSimple()) {
6292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      accumulateSROACost(CostIt, InlineConstants::InstrCost);
6302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      return true;
6315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
6325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    disableSROA(CostIt);
6342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
6355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return false;
6372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
6385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
6405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Constant folding for extract value is trivial.
6412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  Constant *C = dyn_cast<Constant>(I.getAggregateOperand());
6422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (!C)
6432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    C = SimplifiedValues.lookup(I.getAggregateOperand());
6445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (C) {
6455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices());
6462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return true;
6472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
6482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
6492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // SROA can look through these but give them a cost.
6505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return false;
6515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
6535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
6542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Constant folding for insert value is trivial.
6555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Constant *AggC = dyn_cast<Constant>(I.getAggregateOperand());
6565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!AggC)
6572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    AggC = SimplifiedValues.lookup(I.getAggregateOperand());
6582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  Constant *InsertedC = dyn_cast<Constant>(I.getInsertedValueOperand());
6592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (!InsertedC)
6602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand());
6612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (AggC && InsertedC) {
6622a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    SimplifiedValues[&I] = ConstantExpr::getInsertValue(AggC, InsertedC,
6635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                                        I.getIndices());
6642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return true;
6652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
6665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // SROA can look through these but give them a cost.
6682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return false;
6692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
6702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
6712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// \brief Try to simplify a call site.
6722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)///
6732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// Takes a concrete function and callsite and tries to actually simplify it by
6742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// analyzing the arguments and call itself with instsimplify. Returns true if
6752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// it has simplified the callsite to some other entity (a constant), making it
6762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// free.
6775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
6785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // FIXME: Using the instsimplify logic directly for this is inefficient
6792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // because we have to continually rebuild the argument list even when no
6802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // simplifications can be performed. Until that is fixed with remapping
6812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // inside of instsimplify, directly constant fold calls here.
6825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!canConstantFoldCallTo(F))
6832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return false;
6842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
6855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Try to re-map the arguments to constants.
6862a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  SmallVector<Constant *, 4> ConstantArgs;
6872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  ConstantArgs.reserve(CS.arg_size());
6882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
6892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)       I != E; ++I) {
6902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    Constant *C = dyn_cast<Constant>(*I);
6912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (!C)
6925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
6932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (!C)
6942a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      return false; // This argument doesn't map to a constant.
6955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ConstantArgs.push_back(C);
6975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
6982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (Constant *C = ConstantFoldCall(F, ConstantArgs)) {
6992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    SimplifiedValues[CS.getInstruction()] = C;
7002a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return true;
7012a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
7025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return false;
7042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
7055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
706bool CallAnalyzer::visitCallSite(CallSite CS) {
707  if (CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice() &&
708      !F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
709                                      Attribute::ReturnsTwice)) {
710    // This aborts the entire analysis.
711    ExposesReturnsTwice = true;
712    return false;
713  }
714  if (CS.isCall() &&
715      cast<CallInst>(CS.getInstruction())->hasFnAttr(Attribute::NoDuplicate))
716    ContainsNoDuplicateCall = true;
717
718  if (Function *F = CS.getCalledFunction()) {
719    // When we have a concrete function, first try to simplify it directly.
720    if (simplifyCallSite(F, CS))
721      return true;
722
723    // Next check if it is an intrinsic we know about.
724    // FIXME: Lift this into part of the InstVisitor.
725    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
726      switch (II->getIntrinsicID()) {
727      default:
728        return Base::visitCallSite(CS);
729
730      case Intrinsic::memset:
731      case Intrinsic::memcpy:
732      case Intrinsic::memmove:
733        // SROA can usually chew through these intrinsics, but they aren't free.
734        return false;
735      }
736    }
737
738    if (F == CS.getInstruction()->getParent()->getParent()) {
739      // This flag will fully abort the analysis, so don't bother with anything
740      // else.
741      IsRecursiveCall = true;
742      return false;
743    }
744
745    if (TTI.isLoweredToCall(F)) {
746      // We account for the average 1 instruction per call argument setup
747      // here.
748      Cost += CS.arg_size() * InlineConstants::InstrCost;
749
750      // Everything other than inline ASM will also have a significant cost
751      // merely from making the call.
752      if (!isa<InlineAsm>(CS.getCalledValue()))
753        Cost += InlineConstants::CallPenalty;
754    }
755
756    return Base::visitCallSite(CS);
757  }
758
759  // Otherwise we're in a very special case -- an indirect function call. See
760  // if we can be particularly clever about this.
761  Value *Callee = CS.getCalledValue();
762
763  // First, pay the price of the argument setup. We account for the average
764  // 1 instruction per call argument setup here.
765  Cost += CS.arg_size() * InlineConstants::InstrCost;
766
767  // Next, check if this happens to be an indirect function call to a known
768  // function in this inline context. If not, we've done all we can.
769  Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
770  if (!F)
771    return Base::visitCallSite(CS);
772
773  // If we have a constant that we are calling as a function, we can peer
774  // through it and see the function target. This happens not infrequently
775  // during devirtualization and so we want to give it a hefty bonus for
776  // inlining, but cap that bonus in the event that inlining wouldn't pan
777  // out. Pretend to inline the function, with a custom threshold.
778  CallAnalyzer CA(TD, TTI, *F, InlineConstants::IndirectCallThreshold);
779  if (CA.analyzeCall(CS)) {
780    // We were able to inline the indirect call! Subtract the cost from the
781    // bonus we want to apply, but don't go below zero.
782    Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost());
783  }
784
785  return Base::visitCallSite(CS);
786}
787
788bool CallAnalyzer::visitInstruction(Instruction &I) {
789  // Some instructions are free. All of the free intrinsics can also be
790  // handled by SROA, etc.
791  if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
792    return true;
793
794  // We found something we don't understand or can't handle. Mark any SROA-able
795  // values in the operand list as no longer viable.
796  for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
797    disableSROA(*OI);
798
799  return false;
800}
801
802
803/// \brief Analyze a basic block for its contribution to the inline cost.
804///
805/// This method walks the analyzer over every instruction in the given basic
806/// block and accounts for their cost during inlining at this callsite. It
807/// aborts early if the threshold has been exceeded or an impossible to inline
808/// construct has been detected. It returns false if inlining is no longer
809/// viable, and true if inlining remains viable.
810bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
811  for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end());
812       I != E; ++I) {
813    ++NumInstructions;
814    if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
815      ++NumVectorInstructions;
816
817    // If the instruction simplified to a constant, there is no cost to this
818    // instruction. Visit the instructions using our InstVisitor to account for
819    // all of the per-instruction logic. The visit tree returns true if we
820    // consumed the instruction in any way, and false if the instruction's base
821    // cost should count against inlining.
822    if (Base::visit(I))
823      ++NumInstructionsSimplified;
824    else
825      Cost += InlineConstants::InstrCost;
826
827    // If the visit this instruction detected an uninlinable pattern, abort.
828    if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
829      return false;
830
831    // If the caller is a recursive function then we don't want to inline
832    // functions which allocate a lot of stack space because it would increase
833    // the caller stack usage dramatically.
834    if (IsCallerRecursive &&
835        AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
836      return false;
837
838    if (NumVectorInstructions > NumInstructions/2)
839      VectorBonus = FiftyPercentVectorBonus;
840    else if (NumVectorInstructions > NumInstructions/10)
841      VectorBonus = TenPercentVectorBonus;
842    else
843      VectorBonus = 0;
844
845    // Check if we've past the threshold so we don't spin in huge basic
846    // blocks that will never inline.
847    if (Cost > (Threshold + VectorBonus))
848      return false;
849  }
850
851  return true;
852}
853
854/// \brief Compute the base pointer and cumulative constant offsets for V.
855///
856/// This strips all constant offsets off of V, leaving it the base pointer, and
857/// accumulates the total constant offset applied in the returned constant. It
858/// returns 0 if V is not a pointer, and returns the constant '0' if there are
859/// no constant offsets applied.
860ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
861  if (!TD || !V->getType()->isPointerTy())
862    return 0;
863
864  unsigned IntPtrWidth = TD->getPointerSizeInBits();
865  APInt Offset = APInt::getNullValue(IntPtrWidth);
866
867  // Even though we don't look through PHI nodes, we could be called on an
868  // instruction in an unreachable block, which may be on a cycle.
869  SmallPtrSet<Value *, 4> Visited;
870  Visited.insert(V);
871  do {
872    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
873      if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
874        return 0;
875      V = GEP->getPointerOperand();
876    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
877      V = cast<Operator>(V)->getOperand(0);
878    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
879      if (GA->mayBeOverridden())
880        break;
881      V = GA->getAliasee();
882    } else {
883      break;
884    }
885    assert(V->getType()->isPointerTy() && "Unexpected operand type!");
886  } while (Visited.insert(V));
887
888  Type *IntPtrTy = TD->getIntPtrType(V->getContext());
889  return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
890}
891
892/// \brief Analyze a call site for potential inlining.
893///
894/// Returns true if inlining this call is viable, and false if it is not
895/// viable. It computes the cost and adjusts the threshold based on numerous
896/// factors and heuristics. If this method returns false but the computed cost
897/// is below the computed threshold, then inlining was forcibly disabled by
898/// some artifact of the routine.
899bool CallAnalyzer::analyzeCall(CallSite CS) {
900  ++NumCallsAnalyzed;
901
902  // Track whether the post-inlining function would have more than one basic
903  // block. A single basic block is often intended for inlining. Balloon the
904  // threshold by 50% until we pass the single-BB phase.
905  bool SingleBB = true;
906  int SingleBBBonus = Threshold / 2;
907  Threshold += SingleBBBonus;
908
909  // Perform some tweaks to the cost and threshold based on the direct
910  // callsite information.
911
912  // We want to more aggressively inline vector-dense kernels, so up the
913  // threshold, and we'll lower it if the % of vector instructions gets too
914  // low.
915  assert(NumInstructions == 0);
916  assert(NumVectorInstructions == 0);
917  FiftyPercentVectorBonus = Threshold;
918  TenPercentVectorBonus = Threshold / 2;
919
920  // Give out bonuses per argument, as the instructions setting them up will
921  // be gone after inlining.
922  for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
923    if (TD && CS.isByValArgument(I)) {
924      // We approximate the number of loads and stores needed by dividing the
925      // size of the byval type by the target's pointer size.
926      PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
927      unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType());
928      unsigned PointerSize = TD->getPointerSizeInBits();
929      // Ceiling division.
930      unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
931
932      // If it generates more than 8 stores it is likely to be expanded as an
933      // inline memcpy so we take that as an upper bound. Otherwise we assume
934      // one load and one store per word copied.
935      // FIXME: The maxStoresPerMemcpy setting from the target should be used
936      // here instead of a magic number of 8, but it's not available via
937      // DataLayout.
938      NumStores = std::min(NumStores, 8U);
939
940      Cost -= 2 * NumStores * InlineConstants::InstrCost;
941    } else {
942      // For non-byval arguments subtract off one instruction per call
943      // argument.
944      Cost -= InlineConstants::InstrCost;
945    }
946  }
947
948  // If there is only one call of the function, and it has internal linkage,
949  // the cost of inlining it drops dramatically.
950  bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() &&
951    &F == CS.getCalledFunction();
952  if (OnlyOneCallAndLocalLinkage)
953    Cost += InlineConstants::LastCallToStaticBonus;
954
955  // If the instruction after the call, or if the normal destination of the
956  // invoke is an unreachable instruction, the function is noreturn. As such,
957  // there is little point in inlining this unless there is literally zero
958  // cost.
959  Instruction *Instr = CS.getInstruction();
960  if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
961    if (isa<UnreachableInst>(II->getNormalDest()->begin()))
962      Threshold = 1;
963  } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr)))
964    Threshold = 1;
965
966  // If this function uses the coldcc calling convention, prefer not to inline
967  // it.
968  if (F.getCallingConv() == CallingConv::Cold)
969    Cost += InlineConstants::ColdccPenalty;
970
971  // Check if we're done. This can happen due to bonuses and penalties.
972  if (Cost > Threshold)
973    return false;
974
975  if (F.empty())
976    return true;
977
978  Function *Caller = CS.getInstruction()->getParent()->getParent();
979  // Check if the caller function is recursive itself.
980  for (Value::use_iterator U = Caller->use_begin(), E = Caller->use_end();
981       U != E; ++U) {
982    CallSite Site(cast<Value>(*U));
983    if (!Site)
984      continue;
985    Instruction *I = Site.getInstruction();
986    if (I->getParent()->getParent() == Caller) {
987      IsCallerRecursive = true;
988      break;
989    }
990  }
991
992  // Track whether we've seen a return instruction. The first return
993  // instruction is free, as at least one will usually disappear in inlining.
994  bool HasReturn = false;
995
996  // Populate our simplified values by mapping from function arguments to call
997  // arguments with known important simplifications.
998  CallSite::arg_iterator CAI = CS.arg_begin();
999  for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
1000       FAI != FAE; ++FAI, ++CAI) {
1001    assert(CAI != CS.arg_end());
1002    if (Constant *C = dyn_cast<Constant>(CAI))
1003      SimplifiedValues[FAI] = C;
1004
1005    Value *PtrArg = *CAI;
1006    if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
1007      ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue());
1008
1009      // We can SROA any pointer arguments derived from alloca instructions.
1010      if (isa<AllocaInst>(PtrArg)) {
1011        SROAArgValues[FAI] = PtrArg;
1012        SROAArgCosts[PtrArg] = 0;
1013      }
1014    }
1015  }
1016  NumConstantArgs = SimplifiedValues.size();
1017  NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
1018  NumAllocaArgs = SROAArgValues.size();
1019
1020  // The worklist of live basic blocks in the callee *after* inlining. We avoid
1021  // adding basic blocks of the callee which can be proven to be dead for this
1022  // particular call site in order to get more accurate cost estimates. This
1023  // requires a somewhat heavyweight iteration pattern: we need to walk the
1024  // basic blocks in a breadth-first order as we insert live successors. To
1025  // accomplish this, prioritizing for small iterations because we exit after
1026  // crossing our threshold, we use a small-size optimized SetVector.
1027  typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>,
1028                                  SmallPtrSet<BasicBlock *, 16> > BBSetVector;
1029  BBSetVector BBWorklist;
1030  BBWorklist.insert(&F.getEntryBlock());
1031  // Note that we *must not* cache the size, this loop grows the worklist.
1032  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
1033    // Bail out the moment we cross the threshold. This means we'll under-count
1034    // the cost, but only when undercounting doesn't matter.
1035    if (Cost > (Threshold + VectorBonus))
1036      break;
1037
1038    BasicBlock *BB = BBWorklist[Idx];
1039    if (BB->empty())
1040      continue;
1041
1042    // Handle the terminator cost here where we can track returns and other
1043    // function-wide constructs.
1044    TerminatorInst *TI = BB->getTerminator();
1045
1046    // We never want to inline functions that contain an indirectbr.  This is
1047    // incorrect because all the blockaddress's (in static global initializers
1048    // for example) would be referring to the original function, and this
1049    // indirect jump would jump from the inlined copy of the function into the
1050    // original function which is extremely undefined behavior.
1051    // FIXME: This logic isn't really right; we can safely inline functions
1052    // with indirectbr's as long as no other function or global references the
1053    // blockaddress of a block within the current function.  And as a QOI issue,
1054    // if someone is using a blockaddress without an indirectbr, and that
1055    // reference somehow ends up in another function or global, we probably
1056    // don't want to inline this function.
1057    if (isa<IndirectBrInst>(TI))
1058      return false;
1059
1060    if (!HasReturn && isa<ReturnInst>(TI))
1061      HasReturn = true;
1062    else
1063      Cost += InlineConstants::InstrCost;
1064
1065    // Analyze the cost of this block. If we blow through the threshold, this
1066    // returns false, and we can bail on out.
1067    if (!analyzeBlock(BB)) {
1068      if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
1069        return false;
1070
1071      // If the caller is a recursive function then we don't want to inline
1072      // functions which allocate a lot of stack space because it would increase
1073      // the caller stack usage dramatically.
1074      if (IsCallerRecursive &&
1075          AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
1076        return false;
1077
1078      break;
1079    }
1080
1081    // Add in the live successors by first checking whether we have terminator
1082    // that may be simplified based on the values simplified by this call.
1083    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1084      if (BI->isConditional()) {
1085        Value *Cond = BI->getCondition();
1086        if (ConstantInt *SimpleCond
1087              = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1088          BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0));
1089          continue;
1090        }
1091      }
1092    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
1093      Value *Cond = SI->getCondition();
1094      if (ConstantInt *SimpleCond
1095            = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1096        BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor());
1097        continue;
1098      }
1099    }
1100
1101    // If we're unable to select a particular successor, just count all of
1102    // them.
1103    for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
1104         ++TIdx)
1105      BBWorklist.insert(TI->getSuccessor(TIdx));
1106
1107    // If we had any successors at this point, than post-inlining is likely to
1108    // have them as well. Note that we assume any basic blocks which existed
1109    // due to branches or switches which folded above will also fold after
1110    // inlining.
1111    if (SingleBB && TI->getNumSuccessors() > 1) {
1112      // Take off the bonus we applied to the threshold.
1113      Threshold -= SingleBBBonus;
1114      SingleBB = false;
1115    }
1116  }
1117
1118  // If this is a noduplicate call, we can still inline as long as
1119  // inlining this would cause the removal of the caller (so the instruction
1120  // is not actually duplicated, just moved).
1121  if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
1122    return false;
1123
1124  Threshold += VectorBonus;
1125
1126  return Cost < Threshold;
1127}
1128
1129#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1130/// \brief Dump stats about this call's analysis.
1131void CallAnalyzer::dump() {
1132#define DEBUG_PRINT_STAT(x) llvm::dbgs() << "      " #x ": " << x << "\n"
1133  DEBUG_PRINT_STAT(NumConstantArgs);
1134  DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
1135  DEBUG_PRINT_STAT(NumAllocaArgs);
1136  DEBUG_PRINT_STAT(NumConstantPtrCmps);
1137  DEBUG_PRINT_STAT(NumConstantPtrDiffs);
1138  DEBUG_PRINT_STAT(NumInstructionsSimplified);
1139  DEBUG_PRINT_STAT(SROACostSavings);
1140  DEBUG_PRINT_STAT(SROACostSavingsLost);
1141  DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
1142#undef DEBUG_PRINT_STAT
1143}
1144#endif
1145
1146INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
1147                      true, true)
1148INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
1149INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
1150                    true, true)
1151
1152char InlineCostAnalysis::ID = 0;
1153
1154InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID), TD(0) {}
1155
1156InlineCostAnalysis::~InlineCostAnalysis() {}
1157
1158void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
1159  AU.setPreservesAll();
1160  AU.addRequired<TargetTransformInfo>();
1161  CallGraphSCCPass::getAnalysisUsage(AU);
1162}
1163
1164bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) {
1165  TD = getAnalysisIfAvailable<DataLayout>();
1166  TTI = &getAnalysis<TargetTransformInfo>();
1167  return false;
1168}
1169
1170InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) {
1171  return getInlineCost(CS, CS.getCalledFunction(), Threshold);
1172}
1173
1174/// \brief Test that two functions either have or have not the given attribute
1175///        at the same time.
1176static bool attributeMatches(Function *F1, Function *F2,
1177                             Attribute::AttrKind Attr) {
1178  return F1->hasFnAttribute(Attr) == F2->hasFnAttribute(Attr);
1179}
1180
1181/// \brief Test that there are no attribute conflicts between Caller and Callee
1182///        that prevent inlining.
1183static bool functionsHaveCompatibleAttributes(Function *Caller,
1184                                              Function *Callee) {
1185  return attributeMatches(Caller, Callee, Attribute::SanitizeAddress) &&
1186         attributeMatches(Caller, Callee, Attribute::SanitizeMemory) &&
1187         attributeMatches(Caller, Callee, Attribute::SanitizeThread);
1188}
1189
1190InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
1191                                             int Threshold) {
1192  // Cannot inline indirect calls.
1193  if (!Callee)
1194    return llvm::InlineCost::getNever();
1195
1196  // Calls to functions with always-inline attributes should be inlined
1197  // whenever possible.
1198  if (Callee->hasFnAttribute(Attribute::AlwaysInline)) {
1199    if (isInlineViable(*Callee))
1200      return llvm::InlineCost::getAlways();
1201    return llvm::InlineCost::getNever();
1202  }
1203
1204  // Never inline functions with conflicting attributes (unless callee has
1205  // always-inline attribute).
1206  if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee))
1207    return llvm::InlineCost::getNever();
1208
1209  // Don't inline this call if the caller has the optnone attribute.
1210  if (CS.getCaller()->hasFnAttribute(Attribute::OptimizeNone))
1211    return llvm::InlineCost::getNever();
1212
1213  // Don't inline functions which can be redefined at link-time to mean
1214  // something else.  Don't inline functions marked noinline or call sites
1215  // marked noinline.
1216  if (Callee->mayBeOverridden() ||
1217      Callee->hasFnAttribute(Attribute::NoInline) || CS.isNoInline())
1218    return llvm::InlineCost::getNever();
1219
1220  DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
1221        << "...\n");
1222
1223  CallAnalyzer CA(TD, *TTI, *Callee, Threshold);
1224  bool ShouldInline = CA.analyzeCall(CS);
1225
1226  DEBUG(CA.dump());
1227
1228  // Check if there was a reason to force inlining or no inlining.
1229  if (!ShouldInline && CA.getCost() < CA.getThreshold())
1230    return InlineCost::getNever();
1231  if (ShouldInline && CA.getCost() >= CA.getThreshold())
1232    return InlineCost::getAlways();
1233
1234  return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
1235}
1236
1237bool InlineCostAnalysis::isInlineViable(Function &F) {
1238  bool ReturnsTwice =
1239    F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
1240                                   Attribute::ReturnsTwice);
1241  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
1242    // Disallow inlining of functions which contain an indirect branch.
1243    if (isa<IndirectBrInst>(BI->getTerminator()))
1244      return false;
1245
1246    for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
1247         ++II) {
1248      CallSite CS(II);
1249      if (!CS)
1250        continue;
1251
1252      // Disallow recursive calls.
1253      if (&F == CS.getCalledFunction())
1254        return false;
1255
1256      // Disallow calls which expose returns-twice to a function not previously
1257      // attributed as such.
1258      if (!ReturnsTwice && CS.isCall() &&
1259          cast<CallInst>(CS.getInstruction())->canReturnTwice())
1260        return false;
1261    }
1262  }
1263
1264  return true;
1265}
1266