1//===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements inline cost analysis.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Analysis/InlineCost.h"
15#include "llvm/ADT/STLExtras.h"
16#include "llvm/ADT/SetVector.h"
17#include "llvm/ADT/SmallPtrSet.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/Statistic.h"
20#include "llvm/Analysis/AssumptionCache.h"
21#include "llvm/Analysis/CodeMetrics.h"
22#include "llvm/Analysis/ConstantFolding.h"
23#include "llvm/Analysis/InstructionSimplify.h"
24#include "llvm/Analysis/TargetTransformInfo.h"
25#include "llvm/IR/CallSite.h"
26#include "llvm/IR/CallingConv.h"
27#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/GetElementPtrTypeIterator.h"
29#include "llvm/IR/GlobalAlias.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/IntrinsicInst.h"
32#include "llvm/IR/Operator.h"
33#include "llvm/Support/Debug.h"
34#include "llvm/Support/raw_ostream.h"
35
36using namespace llvm;
37
38#define DEBUG_TYPE "inline-cost"
39
40STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
41
42namespace {
43
44class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
45  typedef InstVisitor<CallAnalyzer, bool> Base;
46  friend class InstVisitor<CallAnalyzer, bool>;
47
48  /// The TargetTransformInfo available for this compilation.
49  const TargetTransformInfo &TTI;
50
51  /// The cache of @llvm.assume intrinsics.
52  AssumptionCacheTracker *ACT;
53
54  // The called function.
55  Function &F;
56
57  // The candidate callsite being analyzed. Please do not use this to do
58  // analysis in the caller function; we want the inline cost query to be
59  // easily cacheable. Instead, use the cover function paramHasAttr.
60  CallSite CandidateCS;
61
62  int Threshold;
63  int Cost;
64
65  bool IsCallerRecursive;
66  bool IsRecursiveCall;
67  bool ExposesReturnsTwice;
68  bool HasDynamicAlloca;
69  bool ContainsNoDuplicateCall;
70  bool HasReturn;
71  bool HasIndirectBr;
72  bool HasFrameEscape;
73
74  /// Number of bytes allocated statically by the callee.
75  uint64_t AllocatedSize;
76  unsigned NumInstructions, NumVectorInstructions;
77  int FiftyPercentVectorBonus, TenPercentVectorBonus;
78  int VectorBonus;
79
80  // While we walk the potentially-inlined instructions, we build up and
81  // maintain a mapping of simplified values specific to this callsite. The
82  // idea is to propagate any special information we have about arguments to
83  // this call through the inlinable section of the function, and account for
84  // likely simplifications post-inlining. The most important aspect we track
85  // is CFG altering simplifications -- when we prove a basic block dead, that
86  // can cause dramatic shifts in the cost of inlining a function.
87  DenseMap<Value *, Constant *> SimplifiedValues;
88
89  // Keep track of the values which map back (through function arguments) to
90  // allocas on the caller stack which could be simplified through SROA.
91  DenseMap<Value *, Value *> SROAArgValues;
92
93  // The mapping of caller Alloca values to their accumulated cost savings. If
94  // we have to disable SROA for one of the allocas, this tells us how much
95  // cost must be added.
96  DenseMap<Value *, int> SROAArgCosts;
97
98  // Keep track of values which map to a pointer base and constant offset.
99  DenseMap<Value *, std::pair<Value *, APInt> > ConstantOffsetPtrs;
100
101  // Custom simplification helper routines.
102  bool isAllocaDerivedArg(Value *V);
103  bool lookupSROAArgAndCost(Value *V, Value *&Arg,
104                            DenseMap<Value *, int>::iterator &CostIt);
105  void disableSROA(DenseMap<Value *, int>::iterator CostIt);
106  void disableSROA(Value *V);
107  void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
108                          int InstructionCost);
109  bool isGEPOffsetConstant(GetElementPtrInst &GEP);
110  bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
111  bool simplifyCallSite(Function *F, CallSite CS);
112  ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
113
114  /// Return true if the given argument to the function being considered for
115  /// inlining has the given attribute set either at the call site or the
116  /// function declaration.  Primarily used to inspect call site specific
117  /// attributes since these can be more precise than the ones on the callee
118  /// itself.
119  bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
120
121  /// Return true if the given value is known non null within the callee if
122  /// inlined through this particular callsite.
123  bool isKnownNonNullInCallee(Value *V);
124
125  // Custom analysis routines.
126  bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
127
128  // Disable several entry points to the visitor so we don't accidentally use
129  // them by declaring but not defining them here.
130  void visit(Module *);     void visit(Module &);
131  void visit(Function *);   void visit(Function &);
132  void visit(BasicBlock *); void visit(BasicBlock &);
133
134  // Provide base case for our instruction visit.
135  bool visitInstruction(Instruction &I);
136
137  // Our visit overrides.
138  bool visitAlloca(AllocaInst &I);
139  bool visitPHI(PHINode &I);
140  bool visitGetElementPtr(GetElementPtrInst &I);
141  bool visitBitCast(BitCastInst &I);
142  bool visitPtrToInt(PtrToIntInst &I);
143  bool visitIntToPtr(IntToPtrInst &I);
144  bool visitCastInst(CastInst &I);
145  bool visitUnaryInstruction(UnaryInstruction &I);
146  bool visitCmpInst(CmpInst &I);
147  bool visitSub(BinaryOperator &I);
148  bool visitBinaryOperator(BinaryOperator &I);
149  bool visitLoad(LoadInst &I);
150  bool visitStore(StoreInst &I);
151  bool visitExtractValue(ExtractValueInst &I);
152  bool visitInsertValue(InsertValueInst &I);
153  bool visitCallSite(CallSite CS);
154  bool visitReturnInst(ReturnInst &RI);
155  bool visitBranchInst(BranchInst &BI);
156  bool visitSwitchInst(SwitchInst &SI);
157  bool visitIndirectBrInst(IndirectBrInst &IBI);
158  bool visitResumeInst(ResumeInst &RI);
159  bool visitCleanupReturnInst(CleanupReturnInst &RI);
160  bool visitCatchReturnInst(CatchReturnInst &RI);
161  bool visitUnreachableInst(UnreachableInst &I);
162
163public:
164  CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT,
165               Function &Callee, int Threshold, CallSite CSArg)
166    : TTI(TTI), ACT(ACT), F(Callee), CandidateCS(CSArg), Threshold(Threshold),
167        Cost(0), IsCallerRecursive(false), IsRecursiveCall(false),
168        ExposesReturnsTwice(false), HasDynamicAlloca(false),
169        ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
170        HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
171        NumVectorInstructions(0), FiftyPercentVectorBonus(0),
172        TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
173        NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
174        NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
175        SROACostSavings(0), SROACostSavingsLost(0) {}
176
177  bool analyzeCall(CallSite CS);
178
179  int getThreshold() { return Threshold; }
180  int getCost() { return Cost; }
181
182  // Keep a bunch of stats about the cost savings found so we can print them
183  // out when debugging.
184  unsigned NumConstantArgs;
185  unsigned NumConstantOffsetPtrArgs;
186  unsigned NumAllocaArgs;
187  unsigned NumConstantPtrCmps;
188  unsigned NumConstantPtrDiffs;
189  unsigned NumInstructionsSimplified;
190  unsigned SROACostSavings;
191  unsigned SROACostSavingsLost;
192
193  void dump();
194};
195
196} // namespace
197
198/// \brief Test whether the given value is an Alloca-derived function argument.
199bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
200  return SROAArgValues.count(V);
201}
202
203/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to.
204/// Returns false if V does not map to a SROA-candidate.
205bool CallAnalyzer::lookupSROAArgAndCost(
206    Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) {
207  if (SROAArgValues.empty() || SROAArgCosts.empty())
208    return false;
209
210  DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
211  if (ArgIt == SROAArgValues.end())
212    return false;
213
214  Arg = ArgIt->second;
215  CostIt = SROAArgCosts.find(Arg);
216  return CostIt != SROAArgCosts.end();
217}
218
219/// \brief Disable SROA for the candidate marked by this cost iterator.
220///
221/// This marks the candidate as no longer viable for SROA, and adds the cost
222/// savings associated with it back into the inline cost measurement.
223void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
224  // If we're no longer able to perform SROA we need to undo its cost savings
225  // and prevent subsequent analysis.
226  Cost += CostIt->second;
227  SROACostSavings -= CostIt->second;
228  SROACostSavingsLost += CostIt->second;
229  SROAArgCosts.erase(CostIt);
230}
231
232/// \brief If 'V' maps to a SROA candidate, disable SROA for it.
233void CallAnalyzer::disableSROA(Value *V) {
234  Value *SROAArg;
235  DenseMap<Value *, int>::iterator CostIt;
236  if (lookupSROAArgAndCost(V, SROAArg, CostIt))
237    disableSROA(CostIt);
238}
239
240/// \brief Accumulate the given cost for a particular SROA candidate.
241void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
242                                      int InstructionCost) {
243  CostIt->second += InstructionCost;
244  SROACostSavings += InstructionCost;
245}
246
247/// \brief Check whether a GEP's indices are all constant.
248///
249/// Respects any simplified values known during the analysis of this callsite.
250bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) {
251  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
252    if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
253      return false;
254
255  return true;
256}
257
258/// \brief Accumulate a constant GEP offset into an APInt if possible.
259///
260/// Returns false if unable to compute the offset for any reason. Respects any
261/// simplified values known during the analysis of this callsite.
262bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
263  const DataLayout &DL = F.getParent()->getDataLayout();
264  unsigned IntPtrWidth = DL.getPointerSizeInBits();
265  assert(IntPtrWidth == Offset.getBitWidth());
266
267  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
268       GTI != GTE; ++GTI) {
269    ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
270    if (!OpC)
271      if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
272        OpC = dyn_cast<ConstantInt>(SimpleOp);
273    if (!OpC)
274      return false;
275    if (OpC->isZero()) continue;
276
277    // Handle a struct index, which adds its field offset to the pointer.
278    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
279      unsigned ElementIdx = OpC->getZExtValue();
280      const StructLayout *SL = DL.getStructLayout(STy);
281      Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
282      continue;
283    }
284
285    APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
286    Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
287  }
288  return true;
289}
290
291bool CallAnalyzer::visitAlloca(AllocaInst &I) {
292  // Check whether inlining will turn a dynamic alloca into a static
293  // alloca, and handle that case.
294  if (I.isArrayAllocation()) {
295    if (Constant *Size = SimplifiedValues.lookup(I.getArraySize())) {
296      ConstantInt *AllocSize = dyn_cast<ConstantInt>(Size);
297      assert(AllocSize && "Allocation size not a constant int?");
298      Type *Ty = I.getAllocatedType();
299      AllocatedSize += Ty->getPrimitiveSizeInBits() * AllocSize->getZExtValue();
300      return Base::visitAlloca(I);
301    }
302  }
303
304  // Accumulate the allocated size.
305  if (I.isStaticAlloca()) {
306    const DataLayout &DL = F.getParent()->getDataLayout();
307    Type *Ty = I.getAllocatedType();
308    AllocatedSize += DL.getTypeAllocSize(Ty);
309  }
310
311  // We will happily inline static alloca instructions.
312  if (I.isStaticAlloca())
313    return Base::visitAlloca(I);
314
315  // FIXME: This is overly conservative. Dynamic allocas are inefficient for
316  // a variety of reasons, and so we would like to not inline them into
317  // functions which don't currently have a dynamic alloca. This simply
318  // disables inlining altogether in the presence of a dynamic alloca.
319  HasDynamicAlloca = true;
320  return false;
321}
322
323bool CallAnalyzer::visitPHI(PHINode &I) {
324  // FIXME: We should potentially be tracking values through phi nodes,
325  // especially when they collapse to a single value due to deleted CFG edges
326  // during inlining.
327
328  // FIXME: We need to propagate SROA *disabling* through phi nodes, even
329  // though we don't want to propagate it's bonuses. The idea is to disable
330  // SROA if it *might* be used in an inappropriate manner.
331
332  // Phi nodes are always zero-cost.
333  return true;
334}
335
336bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
337  Value *SROAArg;
338  DenseMap<Value *, int>::iterator CostIt;
339  bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(),
340                                            SROAArg, CostIt);
341
342  // Try to fold GEPs of constant-offset call site argument pointers. This
343  // requires target data and inbounds GEPs.
344  if (I.isInBounds()) {
345    // Check if we have a base + offset for the pointer.
346    Value *Ptr = I.getPointerOperand();
347    std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
348    if (BaseAndOffset.first) {
349      // Check if the offset of this GEP is constant, and if so accumulate it
350      // into Offset.
351      if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) {
352        // Non-constant GEPs aren't folded, and disable SROA.
353        if (SROACandidate)
354          disableSROA(CostIt);
355        return false;
356      }
357
358      // Add the result as a new mapping to Base + Offset.
359      ConstantOffsetPtrs[&I] = BaseAndOffset;
360
361      // Also handle SROA candidates here, we already know that the GEP is
362      // all-constant indexed.
363      if (SROACandidate)
364        SROAArgValues[&I] = SROAArg;
365
366      return true;
367    }
368  }
369
370  if (isGEPOffsetConstant(I)) {
371    if (SROACandidate)
372      SROAArgValues[&I] = SROAArg;
373
374    // Constant GEPs are modeled as free.
375    return true;
376  }
377
378  // Variable GEPs will require math and will disable SROA.
379  if (SROACandidate)
380    disableSROA(CostIt);
381  return false;
382}
383
384bool CallAnalyzer::visitBitCast(BitCastInst &I) {
385  // Propagate constants through bitcasts.
386  Constant *COp = dyn_cast<Constant>(I.getOperand(0));
387  if (!COp)
388    COp = SimplifiedValues.lookup(I.getOperand(0));
389  if (COp)
390    if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) {
391      SimplifiedValues[&I] = C;
392      return true;
393    }
394
395  // Track base/offsets through casts
396  std::pair<Value *, APInt> BaseAndOffset
397    = ConstantOffsetPtrs.lookup(I.getOperand(0));
398  // Casts don't change the offset, just wrap it up.
399  if (BaseAndOffset.first)
400    ConstantOffsetPtrs[&I] = BaseAndOffset;
401
402  // Also look for SROA candidates here.
403  Value *SROAArg;
404  DenseMap<Value *, int>::iterator CostIt;
405  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
406    SROAArgValues[&I] = SROAArg;
407
408  // Bitcasts are always zero cost.
409  return true;
410}
411
412bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
413  // Propagate constants through ptrtoint.
414  Constant *COp = dyn_cast<Constant>(I.getOperand(0));
415  if (!COp)
416    COp = SimplifiedValues.lookup(I.getOperand(0));
417  if (COp)
418    if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) {
419      SimplifiedValues[&I] = C;
420      return true;
421    }
422
423  // Track base/offset pairs when converted to a plain integer provided the
424  // integer is large enough to represent the pointer.
425  unsigned IntegerSize = I.getType()->getScalarSizeInBits();
426  const DataLayout &DL = F.getParent()->getDataLayout();
427  if (IntegerSize >= DL.getPointerSizeInBits()) {
428    std::pair<Value *, APInt> BaseAndOffset
429      = ConstantOffsetPtrs.lookup(I.getOperand(0));
430    if (BaseAndOffset.first)
431      ConstantOffsetPtrs[&I] = BaseAndOffset;
432  }
433
434  // This is really weird. Technically, ptrtoint will disable SROA. However,
435  // unless that ptrtoint is *used* somewhere in the live basic blocks after
436  // inlining, it will be nuked, and SROA should proceed. All of the uses which
437  // would block SROA would also block SROA if applied directly to a pointer,
438  // and so we can just add the integer in here. The only places where SROA is
439  // preserved either cannot fire on an integer, or won't in-and-of themselves
440  // disable SROA (ext) w/o some later use that we would see and disable.
441  Value *SROAArg;
442  DenseMap<Value *, int>::iterator CostIt;
443  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
444    SROAArgValues[&I] = SROAArg;
445
446  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
447}
448
449bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
450  // Propagate constants through ptrtoint.
451  Constant *COp = dyn_cast<Constant>(I.getOperand(0));
452  if (!COp)
453    COp = SimplifiedValues.lookup(I.getOperand(0));
454  if (COp)
455    if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) {
456      SimplifiedValues[&I] = C;
457      return true;
458    }
459
460  // Track base/offset pairs when round-tripped through a pointer without
461  // modifications provided the integer is not too large.
462  Value *Op = I.getOperand(0);
463  unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
464  const DataLayout &DL = F.getParent()->getDataLayout();
465  if (IntegerSize <= DL.getPointerSizeInBits()) {
466    std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
467    if (BaseAndOffset.first)
468      ConstantOffsetPtrs[&I] = BaseAndOffset;
469  }
470
471  // "Propagate" SROA here in the same manner as we do for ptrtoint above.
472  Value *SROAArg;
473  DenseMap<Value *, int>::iterator CostIt;
474  if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
475    SROAArgValues[&I] = SROAArg;
476
477  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
478}
479
480bool CallAnalyzer::visitCastInst(CastInst &I) {
481  // Propagate constants through ptrtoint.
482  Constant *COp = dyn_cast<Constant>(I.getOperand(0));
483  if (!COp)
484    COp = SimplifiedValues.lookup(I.getOperand(0));
485  if (COp)
486    if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) {
487      SimplifiedValues[&I] = C;
488      return true;
489    }
490
491  // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
492  disableSROA(I.getOperand(0));
493
494  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
495}
496
497bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
498  Value *Operand = I.getOperand(0);
499  Constant *COp = dyn_cast<Constant>(Operand);
500  if (!COp)
501    COp = SimplifiedValues.lookup(Operand);
502  if (COp) {
503    const DataLayout &DL = F.getParent()->getDataLayout();
504    if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(),
505                                               COp, DL)) {
506      SimplifiedValues[&I] = C;
507      return true;
508    }
509  }
510
511  // Disable any SROA on the argument to arbitrary unary operators.
512  disableSROA(Operand);
513
514  return false;
515}
516
517bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
518  unsigned ArgNo = A->getArgNo();
519  return CandidateCS.paramHasAttr(ArgNo+1, Attr);
520}
521
522bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
523  // Does the *call site* have the NonNull attribute set on an argument?  We
524  // use the attribute on the call site to memoize any analysis done in the
525  // caller. This will also trip if the callee function has a non-null
526  // parameter attribute, but that's a less interesting case because hopefully
527  // the callee would already have been simplified based on that.
528  if (Argument *A = dyn_cast<Argument>(V))
529    if (paramHasAttr(A, Attribute::NonNull))
530      return true;
531
532  // Is this an alloca in the caller?  This is distinct from the attribute case
533  // above because attributes aren't updated within the inliner itself and we
534  // always want to catch the alloca derived case.
535  if (isAllocaDerivedArg(V))
536    // We can actually predict the result of comparisons between an
537    // alloca-derived value and null. Note that this fires regardless of
538    // SROA firing.
539    return true;
540
541  return false;
542}
543
544bool CallAnalyzer::visitCmpInst(CmpInst &I) {
545  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
546  // First try to handle simplified comparisons.
547  if (!isa<Constant>(LHS))
548    if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
549      LHS = SimpleLHS;
550  if (!isa<Constant>(RHS))
551    if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
552      RHS = SimpleRHS;
553  if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
554    if (Constant *CRHS = dyn_cast<Constant>(RHS))
555      if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) {
556        SimplifiedValues[&I] = C;
557        return true;
558      }
559  }
560
561  if (I.getOpcode() == Instruction::FCmp)
562    return false;
563
564  // Otherwise look for a comparison between constant offset pointers with
565  // a common base.
566  Value *LHSBase, *RHSBase;
567  APInt LHSOffset, RHSOffset;
568  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
569  if (LHSBase) {
570    std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
571    if (RHSBase && LHSBase == RHSBase) {
572      // We have common bases, fold the icmp to a constant based on the
573      // offsets.
574      Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
575      Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
576      if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
577        SimplifiedValues[&I] = C;
578        ++NumConstantPtrCmps;
579        return true;
580      }
581    }
582  }
583
584  // If the comparison is an equality comparison with null, we can simplify it
585  // if we know the value (argument) can't be null
586  if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
587      isKnownNonNullInCallee(I.getOperand(0))) {
588    bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
589    SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
590                                      : ConstantInt::getFalse(I.getType());
591    return true;
592  }
593  // Finally check for SROA candidates in comparisons.
594  Value *SROAArg;
595  DenseMap<Value *, int>::iterator CostIt;
596  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
597    if (isa<ConstantPointerNull>(I.getOperand(1))) {
598      accumulateSROACost(CostIt, InlineConstants::InstrCost);
599      return true;
600    }
601
602    disableSROA(CostIt);
603  }
604
605  return false;
606}
607
608bool CallAnalyzer::visitSub(BinaryOperator &I) {
609  // Try to handle a special case: we can fold computing the difference of two
610  // constant-related pointers.
611  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
612  Value *LHSBase, *RHSBase;
613  APInt LHSOffset, RHSOffset;
614  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
615  if (LHSBase) {
616    std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
617    if (RHSBase && LHSBase == RHSBase) {
618      // We have common bases, fold the subtract to a constant based on the
619      // offsets.
620      Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
621      Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
622      if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
623        SimplifiedValues[&I] = C;
624        ++NumConstantPtrDiffs;
625        return true;
626      }
627    }
628  }
629
630  // Otherwise, fall back to the generic logic for simplifying and handling
631  // instructions.
632  return Base::visitSub(I);
633}
634
635bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
636  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
637  const DataLayout &DL = F.getParent()->getDataLayout();
638  if (!isa<Constant>(LHS))
639    if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
640      LHS = SimpleLHS;
641  if (!isa<Constant>(RHS))
642    if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
643      RHS = SimpleRHS;
644  Value *SimpleV = nullptr;
645  if (auto FI = dyn_cast<FPMathOperator>(&I))
646    SimpleV =
647        SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
648  else
649    SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
650
651  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) {
652    SimplifiedValues[&I] = C;
653    return true;
654  }
655
656  // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
657  disableSROA(LHS);
658  disableSROA(RHS);
659
660  return false;
661}
662
663bool CallAnalyzer::visitLoad(LoadInst &I) {
664  Value *SROAArg;
665  DenseMap<Value *, int>::iterator CostIt;
666  if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
667    if (I.isSimple()) {
668      accumulateSROACost(CostIt, InlineConstants::InstrCost);
669      return true;
670    }
671
672    disableSROA(CostIt);
673  }
674
675  return false;
676}
677
678bool CallAnalyzer::visitStore(StoreInst &I) {
679  Value *SROAArg;
680  DenseMap<Value *, int>::iterator CostIt;
681  if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
682    if (I.isSimple()) {
683      accumulateSROACost(CostIt, InlineConstants::InstrCost);
684      return true;
685    }
686
687    disableSROA(CostIt);
688  }
689
690  return false;
691}
692
693bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
694  // Constant folding for extract value is trivial.
695  Constant *C = dyn_cast<Constant>(I.getAggregateOperand());
696  if (!C)
697    C = SimplifiedValues.lookup(I.getAggregateOperand());
698  if (C) {
699    SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices());
700    return true;
701  }
702
703  // SROA can look through these but give them a cost.
704  return false;
705}
706
707bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
708  // Constant folding for insert value is trivial.
709  Constant *AggC = dyn_cast<Constant>(I.getAggregateOperand());
710  if (!AggC)
711    AggC = SimplifiedValues.lookup(I.getAggregateOperand());
712  Constant *InsertedC = dyn_cast<Constant>(I.getInsertedValueOperand());
713  if (!InsertedC)
714    InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand());
715  if (AggC && InsertedC) {
716    SimplifiedValues[&I] = ConstantExpr::getInsertValue(AggC, InsertedC,
717                                                        I.getIndices());
718    return true;
719  }
720
721  // SROA can look through these but give them a cost.
722  return false;
723}
724
725/// \brief Try to simplify a call site.
726///
727/// Takes a concrete function and callsite and tries to actually simplify it by
728/// analyzing the arguments and call itself with instsimplify. Returns true if
729/// it has simplified the callsite to some other entity (a constant), making it
730/// free.
731bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
732  // FIXME: Using the instsimplify logic directly for this is inefficient
733  // because we have to continually rebuild the argument list even when no
734  // simplifications can be performed. Until that is fixed with remapping
735  // inside of instsimplify, directly constant fold calls here.
736  if (!canConstantFoldCallTo(F))
737    return false;
738
739  // Try to re-map the arguments to constants.
740  SmallVector<Constant *, 4> ConstantArgs;
741  ConstantArgs.reserve(CS.arg_size());
742  for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
743       I != E; ++I) {
744    Constant *C = dyn_cast<Constant>(*I);
745    if (!C)
746      C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
747    if (!C)
748      return false; // This argument doesn't map to a constant.
749
750    ConstantArgs.push_back(C);
751  }
752  if (Constant *C = ConstantFoldCall(F, ConstantArgs)) {
753    SimplifiedValues[CS.getInstruction()] = C;
754    return true;
755  }
756
757  return false;
758}
759
760bool CallAnalyzer::visitCallSite(CallSite CS) {
761  if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
762      !F.hasFnAttribute(Attribute::ReturnsTwice)) {
763    // This aborts the entire analysis.
764    ExposesReturnsTwice = true;
765    return false;
766  }
767  if (CS.isCall() &&
768      cast<CallInst>(CS.getInstruction())->cannotDuplicate())
769    ContainsNoDuplicateCall = true;
770
771  if (Function *F = CS.getCalledFunction()) {
772    // When we have a concrete function, first try to simplify it directly.
773    if (simplifyCallSite(F, CS))
774      return true;
775
776    // Next check if it is an intrinsic we know about.
777    // FIXME: Lift this into part of the InstVisitor.
778    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
779      switch (II->getIntrinsicID()) {
780      default:
781        return Base::visitCallSite(CS);
782
783      case Intrinsic::memset:
784      case Intrinsic::memcpy:
785      case Intrinsic::memmove:
786        // SROA can usually chew through these intrinsics, but they aren't free.
787        return false;
788      case Intrinsic::localescape:
789        HasFrameEscape = true;
790        return false;
791      }
792    }
793
794    if (F == CS.getInstruction()->getParent()->getParent()) {
795      // This flag will fully abort the analysis, so don't bother with anything
796      // else.
797      IsRecursiveCall = true;
798      return false;
799    }
800
801    if (TTI.isLoweredToCall(F)) {
802      // We account for the average 1 instruction per call argument setup
803      // here.
804      Cost += CS.arg_size() * InlineConstants::InstrCost;
805
806      // Everything other than inline ASM will also have a significant cost
807      // merely from making the call.
808      if (!isa<InlineAsm>(CS.getCalledValue()))
809        Cost += InlineConstants::CallPenalty;
810    }
811
812    return Base::visitCallSite(CS);
813  }
814
815  // Otherwise we're in a very special case -- an indirect function call. See
816  // if we can be particularly clever about this.
817  Value *Callee = CS.getCalledValue();
818
819  // First, pay the price of the argument setup. We account for the average
820  // 1 instruction per call argument setup here.
821  Cost += CS.arg_size() * InlineConstants::InstrCost;
822
823  // Next, check if this happens to be an indirect function call to a known
824  // function in this inline context. If not, we've done all we can.
825  Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
826  if (!F)
827    return Base::visitCallSite(CS);
828
829  // If we have a constant that we are calling as a function, we can peer
830  // through it and see the function target. This happens not infrequently
831  // during devirtualization and so we want to give it a hefty bonus for
832  // inlining, but cap that bonus in the event that inlining wouldn't pan
833  // out. Pretend to inline the function, with a custom threshold.
834  CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS);
835  if (CA.analyzeCall(CS)) {
836    // We were able to inline the indirect call! Subtract the cost from the
837    // threshold to get the bonus we want to apply, but don't go below zero.
838    Cost -= std::max(0, CA.getThreshold() - CA.getCost());
839  }
840
841  return Base::visitCallSite(CS);
842}
843
844bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
845  // At least one return instruction will be free after inlining.
846  bool Free = !HasReturn;
847  HasReturn = true;
848  return Free;
849}
850
851bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
852  // We model unconditional branches as essentially free -- they really
853  // shouldn't exist at all, but handling them makes the behavior of the
854  // inliner more regular and predictable. Interestingly, conditional branches
855  // which will fold away are also free.
856  return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
857         dyn_cast_or_null<ConstantInt>(
858             SimplifiedValues.lookup(BI.getCondition()));
859}
860
861bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
862  // We model unconditional switches as free, see the comments on handling
863  // branches.
864  if (isa<ConstantInt>(SI.getCondition()))
865    return true;
866  if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
867    if (isa<ConstantInt>(V))
868      return true;
869
870  // Otherwise, we need to accumulate a cost proportional to the number of
871  // distinct successor blocks. This fan-out in the CFG cannot be represented
872  // for free even if we can represent the core switch as a jumptable that
873  // takes a single instruction.
874  //
875  // NB: We convert large switches which are just used to initialize large phi
876  // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
877  // inlining those. It will prevent inlining in cases where the optimization
878  // does not (yet) fire.
879  SmallPtrSet<BasicBlock *, 8> SuccessorBlocks;
880  SuccessorBlocks.insert(SI.getDefaultDest());
881  for (auto I = SI.case_begin(), E = SI.case_end(); I != E; ++I)
882    SuccessorBlocks.insert(I.getCaseSuccessor());
883  // Add cost corresponding to the number of distinct destinations. The first
884  // we model as free because of fallthrough.
885  Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost;
886  return false;
887}
888
889bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
890  // We never want to inline functions that contain an indirectbr.  This is
891  // incorrect because all the blockaddress's (in static global initializers
892  // for example) would be referring to the original function, and this
893  // indirect jump would jump from the inlined copy of the function into the
894  // original function which is extremely undefined behavior.
895  // FIXME: This logic isn't really right; we can safely inline functions with
896  // indirectbr's as long as no other function or global references the
897  // blockaddress of a block within the current function.
898  HasIndirectBr = true;
899  return false;
900}
901
902bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
903  // FIXME: It's not clear that a single instruction is an accurate model for
904  // the inline cost of a resume instruction.
905  return false;
906}
907
908bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
909  // FIXME: It's not clear that a single instruction is an accurate model for
910  // the inline cost of a cleanupret instruction.
911  return false;
912}
913
914bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
915  // FIXME: It's not clear that a single instruction is an accurate model for
916  // the inline cost of a catchret instruction.
917  return false;
918}
919
920bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
921  // FIXME: It might be reasonably to discount the cost of instructions leading
922  // to unreachable as they have the lowest possible impact on both runtime and
923  // code size.
924  return true; // No actual code is needed for unreachable.
925}
926
927bool CallAnalyzer::visitInstruction(Instruction &I) {
928  // Some instructions are free. All of the free intrinsics can also be
929  // handled by SROA, etc.
930  if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
931    return true;
932
933  // We found something we don't understand or can't handle. Mark any SROA-able
934  // values in the operand list as no longer viable.
935  for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
936    disableSROA(*OI);
937
938  return false;
939}
940
941
942/// \brief Analyze a basic block for its contribution to the inline cost.
943///
944/// This method walks the analyzer over every instruction in the given basic
945/// block and accounts for their cost during inlining at this callsite. It
946/// aborts early if the threshold has been exceeded or an impossible to inline
947/// construct has been detected. It returns false if inlining is no longer
948/// viable, and true if inlining remains viable.
949bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
950                                SmallPtrSetImpl<const Value *> &EphValues) {
951  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
952    // FIXME: Currently, the number of instructions in a function regardless of
953    // our ability to simplify them during inline to constants or dead code,
954    // are actually used by the vector bonus heuristic. As long as that's true,
955    // we have to special case debug intrinsics here to prevent differences in
956    // inlining due to debug symbols. Eventually, the number of unsimplified
957    // instructions shouldn't factor into the cost computation, but until then,
958    // hack around it here.
959    if (isa<DbgInfoIntrinsic>(I))
960      continue;
961
962    // Skip ephemeral values.
963    if (EphValues.count(&*I))
964      continue;
965
966    ++NumInstructions;
967    if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
968      ++NumVectorInstructions;
969
970    // If the instruction is floating point, and the target says this operation
971    // is expensive or the function has the "use-soft-float" attribute, this may
972    // eventually become a library call. Treat the cost as such.
973    if (I->getType()->isFloatingPointTy()) {
974      bool hasSoftFloatAttr = false;
975
976      // If the function has the "use-soft-float" attribute, mark it as
977      // expensive.
978      if (F.hasFnAttribute("use-soft-float")) {
979        Attribute Attr = F.getFnAttribute("use-soft-float");
980        StringRef Val = Attr.getValueAsString();
981        if (Val == "true")
982          hasSoftFloatAttr = true;
983      }
984
985      if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive ||
986          hasSoftFloatAttr)
987        Cost += InlineConstants::CallPenalty;
988    }
989
990    // If the instruction simplified to a constant, there is no cost to this
991    // instruction. Visit the instructions using our InstVisitor to account for
992    // all of the per-instruction logic. The visit tree returns true if we
993    // consumed the instruction in any way, and false if the instruction's base
994    // cost should count against inlining.
995    if (Base::visit(&*I))
996      ++NumInstructionsSimplified;
997    else
998      Cost += InlineConstants::InstrCost;
999
1000    // If the visit this instruction detected an uninlinable pattern, abort.
1001    if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
1002        HasIndirectBr || HasFrameEscape)
1003      return false;
1004
1005    // If the caller is a recursive function then we don't want to inline
1006    // functions which allocate a lot of stack space because it would increase
1007    // the caller stack usage dramatically.
1008    if (IsCallerRecursive &&
1009        AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
1010      return false;
1011
1012    // Check if we've past the maximum possible threshold so we don't spin in
1013    // huge basic blocks that will never inline.
1014    if (Cost > Threshold)
1015      return false;
1016  }
1017
1018  return true;
1019}
1020
1021/// \brief Compute the base pointer and cumulative constant offsets for V.
1022///
1023/// This strips all constant offsets off of V, leaving it the base pointer, and
1024/// accumulates the total constant offset applied in the returned constant. It
1025/// returns 0 if V is not a pointer, and returns the constant '0' if there are
1026/// no constant offsets applied.
1027ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
1028  if (!V->getType()->isPointerTy())
1029    return nullptr;
1030
1031  const DataLayout &DL = F.getParent()->getDataLayout();
1032  unsigned IntPtrWidth = DL.getPointerSizeInBits();
1033  APInt Offset = APInt::getNullValue(IntPtrWidth);
1034
1035  // Even though we don't look through PHI nodes, we could be called on an
1036  // instruction in an unreachable block, which may be on a cycle.
1037  SmallPtrSet<Value *, 4> Visited;
1038  Visited.insert(V);
1039  do {
1040    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
1041      if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
1042        return nullptr;
1043      V = GEP->getPointerOperand();
1044    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
1045      V = cast<Operator>(V)->getOperand(0);
1046    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
1047      if (GA->mayBeOverridden())
1048        break;
1049      V = GA->getAliasee();
1050    } else {
1051      break;
1052    }
1053    assert(V->getType()->isPointerTy() && "Unexpected operand type!");
1054  } while (Visited.insert(V).second);
1055
1056  Type *IntPtrTy = DL.getIntPtrType(V->getContext());
1057  return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
1058}
1059
1060/// \brief Analyze a call site for potential inlining.
1061///
1062/// Returns true if inlining this call is viable, and false if it is not
1063/// viable. It computes the cost and adjusts the threshold based on numerous
1064/// factors and heuristics. If this method returns false but the computed cost
1065/// is below the computed threshold, then inlining was forcibly disabled by
1066/// some artifact of the routine.
1067bool CallAnalyzer::analyzeCall(CallSite CS) {
1068  ++NumCallsAnalyzed;
1069
1070  // Perform some tweaks to the cost and threshold based on the direct
1071  // callsite information.
1072
1073  // We want to more aggressively inline vector-dense kernels, so up the
1074  // threshold, and we'll lower it if the % of vector instructions gets too
1075  // low. Note that these bonuses are some what arbitrary and evolved over time
1076  // by accident as much as because they are principled bonuses.
1077  //
1078  // FIXME: It would be nice to remove all such bonuses. At least it would be
1079  // nice to base the bonus values on something more scientific.
1080  assert(NumInstructions == 0);
1081  assert(NumVectorInstructions == 0);
1082  FiftyPercentVectorBonus = 3 * Threshold / 2;
1083  TenPercentVectorBonus = 3 * Threshold / 4;
1084  const DataLayout &DL = F.getParent()->getDataLayout();
1085
1086  // Track whether the post-inlining function would have more than one basic
1087  // block. A single basic block is often intended for inlining. Balloon the
1088  // threshold by 50% until we pass the single-BB phase.
1089  bool SingleBB = true;
1090  int SingleBBBonus = Threshold / 2;
1091
1092  // Speculatively apply all possible bonuses to Threshold. If cost exceeds
1093  // this Threshold any time, and cost cannot decrease, we can stop processing
1094  // the rest of the function body.
1095  Threshold += (SingleBBBonus + FiftyPercentVectorBonus);
1096
1097  // Give out bonuses per argument, as the instructions setting them up will
1098  // be gone after inlining.
1099  for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
1100    if (CS.isByValArgument(I)) {
1101      // We approximate the number of loads and stores needed by dividing the
1102      // size of the byval type by the target's pointer size.
1103      PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
1104      unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
1105      unsigned PointerSize = DL.getPointerSizeInBits();
1106      // Ceiling division.
1107      unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
1108
1109      // If it generates more than 8 stores it is likely to be expanded as an
1110      // inline memcpy so we take that as an upper bound. Otherwise we assume
1111      // one load and one store per word copied.
1112      // FIXME: The maxStoresPerMemcpy setting from the target should be used
1113      // here instead of a magic number of 8, but it's not available via
1114      // DataLayout.
1115      NumStores = std::min(NumStores, 8U);
1116
1117      Cost -= 2 * NumStores * InlineConstants::InstrCost;
1118    } else {
1119      // For non-byval arguments subtract off one instruction per call
1120      // argument.
1121      Cost -= InlineConstants::InstrCost;
1122    }
1123  }
1124
1125  // If there is only one call of the function, and it has internal linkage,
1126  // the cost of inlining it drops dramatically.
1127  bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() &&
1128    &F == CS.getCalledFunction();
1129  if (OnlyOneCallAndLocalLinkage)
1130    Cost += InlineConstants::LastCallToStaticBonus;
1131
1132  // If the instruction after the call, or if the normal destination of the
1133  // invoke is an unreachable instruction, the function is noreturn. As such,
1134  // there is little point in inlining this unless there is literally zero
1135  // cost.
1136  Instruction *Instr = CS.getInstruction();
1137  if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
1138    if (isa<UnreachableInst>(II->getNormalDest()->begin()))
1139      Threshold = 0;
1140  } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr)))
1141    Threshold = 0;
1142
1143  // If this function uses the coldcc calling convention, prefer not to inline
1144  // it.
1145  if (F.getCallingConv() == CallingConv::Cold)
1146    Cost += InlineConstants::ColdccPenalty;
1147
1148  // Check if we're done. This can happen due to bonuses and penalties.
1149  if (Cost > Threshold)
1150    return false;
1151
1152  if (F.empty())
1153    return true;
1154
1155  Function *Caller = CS.getInstruction()->getParent()->getParent();
1156  // Check if the caller function is recursive itself.
1157  for (User *U : Caller->users()) {
1158    CallSite Site(U);
1159    if (!Site)
1160      continue;
1161    Instruction *I = Site.getInstruction();
1162    if (I->getParent()->getParent() == Caller) {
1163      IsCallerRecursive = true;
1164      break;
1165    }
1166  }
1167
1168  // Populate our simplified values by mapping from function arguments to call
1169  // arguments with known important simplifications.
1170  CallSite::arg_iterator CAI = CS.arg_begin();
1171  for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
1172       FAI != FAE; ++FAI, ++CAI) {
1173    assert(CAI != CS.arg_end());
1174    if (Constant *C = dyn_cast<Constant>(CAI))
1175      SimplifiedValues[&*FAI] = C;
1176
1177    Value *PtrArg = *CAI;
1178    if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
1179      ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue());
1180
1181      // We can SROA any pointer arguments derived from alloca instructions.
1182      if (isa<AllocaInst>(PtrArg)) {
1183        SROAArgValues[&*FAI] = PtrArg;
1184        SROAArgCosts[PtrArg] = 0;
1185      }
1186    }
1187  }
1188  NumConstantArgs = SimplifiedValues.size();
1189  NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
1190  NumAllocaArgs = SROAArgValues.size();
1191
1192  // FIXME: If a caller has multiple calls to a callee, we end up recomputing
1193  // the ephemeral values multiple times (and they're completely determined by
1194  // the callee, so this is purely duplicate work).
1195  SmallPtrSet<const Value *, 32> EphValues;
1196  CodeMetrics::collectEphemeralValues(&F, &ACT->getAssumptionCache(F), EphValues);
1197
1198  // The worklist of live basic blocks in the callee *after* inlining. We avoid
1199  // adding basic blocks of the callee which can be proven to be dead for this
1200  // particular call site in order to get more accurate cost estimates. This
1201  // requires a somewhat heavyweight iteration pattern: we need to walk the
1202  // basic blocks in a breadth-first order as we insert live successors. To
1203  // accomplish this, prioritizing for small iterations because we exit after
1204  // crossing our threshold, we use a small-size optimized SetVector.
1205  typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>,
1206                                  SmallPtrSet<BasicBlock *, 16> > BBSetVector;
1207  BBSetVector BBWorklist;
1208  BBWorklist.insert(&F.getEntryBlock());
1209  // Note that we *must not* cache the size, this loop grows the worklist.
1210  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
1211    // Bail out the moment we cross the threshold. This means we'll under-count
1212    // the cost, but only when undercounting doesn't matter.
1213    if (Cost > Threshold)
1214      break;
1215
1216    BasicBlock *BB = BBWorklist[Idx];
1217    if (BB->empty())
1218      continue;
1219
1220    // Disallow inlining a blockaddress. A blockaddress only has defined
1221    // behavior for an indirect branch in the same function, and we do not
1222    // currently support inlining indirect branches. But, the inliner may not
1223    // see an indirect branch that ends up being dead code at a particular call
1224    // site. If the blockaddress escapes the function, e.g., via a global
1225    // variable, inlining may lead to an invalid cross-function reference.
1226    if (BB->hasAddressTaken())
1227      return false;
1228
1229    // Analyze the cost of this block. If we blow through the threshold, this
1230    // returns false, and we can bail on out.
1231    if (!analyzeBlock(BB, EphValues)) {
1232      if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
1233          HasIndirectBr || HasFrameEscape)
1234        return false;
1235
1236      // If the caller is a recursive function then we don't want to inline
1237      // functions which allocate a lot of stack space because it would increase
1238      // the caller stack usage dramatically.
1239      if (IsCallerRecursive &&
1240          AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
1241        return false;
1242
1243      break;
1244    }
1245
1246    TerminatorInst *TI = BB->getTerminator();
1247
1248    // Add in the live successors by first checking whether we have terminator
1249    // that may be simplified based on the values simplified by this call.
1250    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1251      if (BI->isConditional()) {
1252        Value *Cond = BI->getCondition();
1253        if (ConstantInt *SimpleCond
1254              = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1255          BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0));
1256          continue;
1257        }
1258      }
1259    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
1260      Value *Cond = SI->getCondition();
1261      if (ConstantInt *SimpleCond
1262            = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1263        BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor());
1264        continue;
1265      }
1266    }
1267
1268    // If we're unable to select a particular successor, just count all of
1269    // them.
1270    for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
1271         ++TIdx)
1272      BBWorklist.insert(TI->getSuccessor(TIdx));
1273
1274    // If we had any successors at this point, than post-inlining is likely to
1275    // have them as well. Note that we assume any basic blocks which existed
1276    // due to branches or switches which folded above will also fold after
1277    // inlining.
1278    if (SingleBB && TI->getNumSuccessors() > 1) {
1279      // Take off the bonus we applied to the threshold.
1280      Threshold -= SingleBBBonus;
1281      SingleBB = false;
1282    }
1283  }
1284
1285  // If this is a noduplicate call, we can still inline as long as
1286  // inlining this would cause the removal of the caller (so the instruction
1287  // is not actually duplicated, just moved).
1288  if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
1289    return false;
1290
1291  // We applied the maximum possible vector bonus at the beginning. Now,
1292  // subtract the excess bonus, if any, from the Threshold before
1293  // comparing against Cost.
1294  if (NumVectorInstructions <= NumInstructions / 10)
1295    Threshold -= FiftyPercentVectorBonus;
1296  else if (NumVectorInstructions <= NumInstructions / 2)
1297    Threshold -= (FiftyPercentVectorBonus - TenPercentVectorBonus);
1298
1299  return Cost <= std::max(0, Threshold);
1300}
1301
1302#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1303/// \brief Dump stats about this call's analysis.
1304void CallAnalyzer::dump() {
1305#define DEBUG_PRINT_STAT(x) dbgs() << "      " #x ": " << x << "\n"
1306  DEBUG_PRINT_STAT(NumConstantArgs);
1307  DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
1308  DEBUG_PRINT_STAT(NumAllocaArgs);
1309  DEBUG_PRINT_STAT(NumConstantPtrCmps);
1310  DEBUG_PRINT_STAT(NumConstantPtrDiffs);
1311  DEBUG_PRINT_STAT(NumInstructionsSimplified);
1312  DEBUG_PRINT_STAT(NumInstructions);
1313  DEBUG_PRINT_STAT(SROACostSavings);
1314  DEBUG_PRINT_STAT(SROACostSavingsLost);
1315  DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
1316  DEBUG_PRINT_STAT(Cost);
1317  DEBUG_PRINT_STAT(Threshold);
1318#undef DEBUG_PRINT_STAT
1319}
1320#endif
1321
1322INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
1323                      true, true)
1324INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1325INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
1326INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
1327                    true, true)
1328
1329char InlineCostAnalysis::ID = 0;
1330
1331InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID) {}
1332
1333InlineCostAnalysis::~InlineCostAnalysis() {}
1334
1335void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
1336  AU.setPreservesAll();
1337  AU.addRequired<AssumptionCacheTracker>();
1338  AU.addRequired<TargetTransformInfoWrapperPass>();
1339  CallGraphSCCPass::getAnalysisUsage(AU);
1340}
1341
1342bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) {
1343  TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
1344  ACT = &getAnalysis<AssumptionCacheTracker>();
1345  return false;
1346}
1347
1348InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) {
1349  return getInlineCost(CS, CS.getCalledFunction(), Threshold);
1350}
1351
1352/// \brief Test that two functions either have or have not the given attribute
1353///        at the same time.
1354template<typename AttrKind>
1355static bool attributeMatches(Function *F1, Function *F2, AttrKind Attr) {
1356  return F1->getFnAttribute(Attr) == F2->getFnAttribute(Attr);
1357}
1358
1359/// \brief Test that there are no attribute conflicts between Caller and Callee
1360///        that prevent inlining.
1361static bool functionsHaveCompatibleAttributes(Function *Caller,
1362                                              Function *Callee,
1363                                              TargetTransformInfo &TTI) {
1364  return TTI.areInlineCompatible(Caller, Callee) &&
1365         attributeMatches(Caller, Callee, Attribute::SanitizeAddress) &&
1366         attributeMatches(Caller, Callee, Attribute::SanitizeMemory) &&
1367         attributeMatches(Caller, Callee, Attribute::SanitizeThread);
1368}
1369
1370InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
1371                                             int Threshold) {
1372  // Cannot inline indirect calls.
1373  if (!Callee)
1374    return llvm::InlineCost::getNever();
1375
1376  // Calls to functions with always-inline attributes should be inlined
1377  // whenever possible.
1378  if (CS.hasFnAttr(Attribute::AlwaysInline)) {
1379    if (isInlineViable(*Callee))
1380      return llvm::InlineCost::getAlways();
1381    return llvm::InlineCost::getNever();
1382  }
1383
1384  // Never inline functions with conflicting attributes (unless callee has
1385  // always-inline attribute).
1386  if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee,
1387                                         TTIWP->getTTI(*Callee)))
1388    return llvm::InlineCost::getNever();
1389
1390  // Don't inline this call if the caller has the optnone attribute.
1391  if (CS.getCaller()->hasFnAttribute(Attribute::OptimizeNone))
1392    return llvm::InlineCost::getNever();
1393
1394  // Don't inline functions which can be redefined at link-time to mean
1395  // something else.  Don't inline functions marked noinline or call sites
1396  // marked noinline.
1397  if (Callee->mayBeOverridden() ||
1398      Callee->hasFnAttribute(Attribute::NoInline) || CS.isNoInline())
1399    return llvm::InlineCost::getNever();
1400
1401  DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
1402        << "...\n");
1403
1404  CallAnalyzer CA(TTIWP->getTTI(*Callee), ACT, *Callee, Threshold, CS);
1405  bool ShouldInline = CA.analyzeCall(CS);
1406
1407  DEBUG(CA.dump());
1408
1409  // Check if there was a reason to force inlining or no inlining.
1410  if (!ShouldInline && CA.getCost() < CA.getThreshold())
1411    return InlineCost::getNever();
1412  if (ShouldInline && CA.getCost() >= CA.getThreshold())
1413    return InlineCost::getAlways();
1414
1415  return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
1416}
1417
1418bool InlineCostAnalysis::isInlineViable(Function &F) {
1419  bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
1420  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
1421    // Disallow inlining of functions which contain indirect branches or
1422    // blockaddresses.
1423    if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken())
1424      return false;
1425
1426    for (auto &II : *BI) {
1427      CallSite CS(&II);
1428      if (!CS)
1429        continue;
1430
1431      // Disallow recursive calls.
1432      if (&F == CS.getCalledFunction())
1433        return false;
1434
1435      // Disallow calls which expose returns-twice to a function not previously
1436      // attributed as such.
1437      if (!ReturnsTwice && CS.isCall() &&
1438          cast<CallInst>(CS.getInstruction())->canReturnTwice())
1439        return false;
1440
1441      // Disallow inlining functions that call @llvm.localescape. Doing this
1442      // correctly would require major changes to the inliner.
1443      if (CS.getCalledFunction() &&
1444          CS.getCalledFunction()->getIntrinsicID() ==
1445              llvm::Intrinsic::localescape)
1446        return false;
1447    }
1448  }
1449
1450  return true;
1451}
1452