InlineCost.cpp revision 37ed9c199ca639565f6ce88105f9e39e898d82d0
1//===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements inline cost analysis.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Analysis/InlineCost.h"
15#include "llvm/ADT/STLExtras.h"
16#include "llvm/ADT/SetVector.h"
17#include "llvm/ADT/SmallPtrSet.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/Statistic.h"
20#include "llvm/Analysis/AssumptionTracker.h"
21#include "llvm/Analysis/ConstantFolding.h"
22#include "llvm/Analysis/CodeMetrics.h"
23#include "llvm/Analysis/InstructionSimplify.h"
24#include "llvm/Analysis/TargetTransformInfo.h"
25#include "llvm/IR/CallSite.h"
26#include "llvm/IR/CallingConv.h"
27#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/GetElementPtrTypeIterator.h"
29#include "llvm/IR/GlobalAlias.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/IntrinsicInst.h"
32#include "llvm/IR/Operator.h"
33#include "llvm/Support/Debug.h"
34#include "llvm/Support/raw_ostream.h"
35
36using namespace llvm;
37
38#define DEBUG_TYPE "inline-cost"
39
40STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
41
42namespace {
43
44class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
45  typedef InstVisitor<CallAnalyzer, bool> Base;
46  friend class InstVisitor<CallAnalyzer, bool>;
47
48  // DataLayout if available, or null.
49  const DataLayout *const DL;
50
51  /// The TargetTransformInfo available for this compilation.
52  const TargetTransformInfo &TTI;
53
54  /// The cache of @llvm.assume intrinsics.
55  AssumptionTracker *AT;
56
57  // The called function.
58  Function &F;
59
60  int Threshold;
61  int Cost;
62
63  bool IsCallerRecursive;
64  bool IsRecursiveCall;
65  bool ExposesReturnsTwice;
66  bool HasDynamicAlloca;
67  bool ContainsNoDuplicateCall;
68  bool HasReturn;
69  bool HasIndirectBr;
70
71  /// Number of bytes allocated statically by the callee.
72  uint64_t AllocatedSize;
73  unsigned NumInstructions, NumVectorInstructions;
74  int FiftyPercentVectorBonus, TenPercentVectorBonus;
75  int VectorBonus;
76
77  // While we walk the potentially-inlined instructions, we build up and
78  // maintain a mapping of simplified values specific to this callsite. The
79  // idea is to propagate any special information we have about arguments to
80  // this call through the inlinable section of the function, and account for
81  // likely simplifications post-inlining. The most important aspect we track
82  // is CFG altering simplifications -- when we prove a basic block dead, that
83  // can cause dramatic shifts in the cost of inlining a function.
84  DenseMap<Value *, Constant *> SimplifiedValues;
85
86  // Keep track of the values which map back (through function arguments) to
87  // allocas on the caller stack which could be simplified through SROA.
88  DenseMap<Value *, Value *> SROAArgValues;
89
90  // The mapping of caller Alloca values to their accumulated cost savings. If
91  // we have to disable SROA for one of the allocas, this tells us how much
92  // cost must be added.
93  DenseMap<Value *, int> SROAArgCosts;
94
95  // Keep track of values which map to a pointer base and constant offset.
96  DenseMap<Value *, std::pair<Value *, APInt> > ConstantOffsetPtrs;
97
98  // Custom simplification helper routines.
99  bool isAllocaDerivedArg(Value *V);
100  bool lookupSROAArgAndCost(Value *V, Value *&Arg,
101                            DenseMap<Value *, int>::iterator &CostIt);
102  void disableSROA(DenseMap<Value *, int>::iterator CostIt);
103  void disableSROA(Value *V);
104  void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
105                          int InstructionCost);
106  bool isGEPOffsetConstant(GetElementPtrInst &GEP);
107  bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
108  bool simplifyCallSite(Function *F, CallSite CS);
109  ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
110
111  // Custom analysis routines.
112  bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
113
114  // Disable several entry points to the visitor so we don't accidentally use
115  // them by declaring but not defining them here.
116  void visit(Module *);     void visit(Module &);
117  void visit(Function *);   void visit(Function &);
118  void visit(BasicBlock *); void visit(BasicBlock &);
119
120  // Provide base case for our instruction visit.
121  bool visitInstruction(Instruction &I);
122
123  // Our visit overrides.
124  bool visitAlloca(AllocaInst &I);
125  bool visitPHI(PHINode &I);
126  bool visitGetElementPtr(GetElementPtrInst &I);
127  bool visitBitCast(BitCastInst &I);
128  bool visitPtrToInt(PtrToIntInst &I);
129  bool visitIntToPtr(IntToPtrInst &I);
130  bool visitCastInst(CastInst &I);
131  bool visitUnaryInstruction(UnaryInstruction &I);
132  bool visitCmpInst(CmpInst &I);
133  bool visitSub(BinaryOperator &I);
134  bool visitBinaryOperator(BinaryOperator &I);
135  bool visitLoad(LoadInst &I);
136  bool visitStore(StoreInst &I);
137  bool visitExtractValue(ExtractValueInst &I);
138  bool visitInsertValue(InsertValueInst &I);
139  bool visitCallSite(CallSite CS);
140  bool visitReturnInst(ReturnInst &RI);
141  bool visitBranchInst(BranchInst &BI);
142  bool visitSwitchInst(SwitchInst &SI);
143  bool visitIndirectBrInst(IndirectBrInst &IBI);
144  bool visitResumeInst(ResumeInst &RI);
145  bool visitUnreachableInst(UnreachableInst &I);
146
147public:
148  CallAnalyzer(const DataLayout *DL, const TargetTransformInfo &TTI,
149               AssumptionTracker *AT, Function &Callee, int Threshold)
150      : DL(DL), TTI(TTI), AT(AT), F(Callee), Threshold(Threshold), Cost(0),
151        IsCallerRecursive(false), IsRecursiveCall(false),
152        ExposesReturnsTwice(false), HasDynamicAlloca(false),
153        ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
154        AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
155        FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
156        NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
157        NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
158        NumInstructionsSimplified(0), SROACostSavings(0),
159        SROACostSavingsLost(0) {}
160
161  bool analyzeCall(CallSite CS);
162
163  int getThreshold() { return Threshold; }
164  int getCost() { return Cost; }
165
166  // Keep a bunch of stats about the cost savings found so we can print them
167  // out when debugging.
168  unsigned NumConstantArgs;
169  unsigned NumConstantOffsetPtrArgs;
170  unsigned NumAllocaArgs;
171  unsigned NumConstantPtrCmps;
172  unsigned NumConstantPtrDiffs;
173  unsigned NumInstructionsSimplified;
174  unsigned SROACostSavings;
175  unsigned SROACostSavingsLost;
176
177  void dump();
178};
179
180} // namespace
181
182/// \brief Test whether the given value is an Alloca-derived function argument.
183bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
184  return SROAArgValues.count(V);
185}
186
187/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to.
188/// Returns false if V does not map to a SROA-candidate.
189bool CallAnalyzer::lookupSROAArgAndCost(
190    Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) {
191  if (SROAArgValues.empty() || SROAArgCosts.empty())
192    return false;
193
194  DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
195  if (ArgIt == SROAArgValues.end())
196    return false;
197
198  Arg = ArgIt->second;
199  CostIt = SROAArgCosts.find(Arg);
200  return CostIt != SROAArgCosts.end();
201}
202
203/// \brief Disable SROA for the candidate marked by this cost iterator.
204///
205/// This marks the candidate as no longer viable for SROA, and adds the cost
206/// savings associated with it back into the inline cost measurement.
207void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
208  // If we're no longer able to perform SROA we need to undo its cost savings
209  // and prevent subsequent analysis.
210  Cost += CostIt->second;
211  SROACostSavings -= CostIt->second;
212  SROACostSavingsLost += CostIt->second;
213  SROAArgCosts.erase(CostIt);
214}
215
216/// \brief If 'V' maps to a SROA candidate, disable SROA for it.
217void CallAnalyzer::disableSROA(Value *V) {
218  Value *SROAArg;
219  DenseMap<Value *, int>::iterator CostIt;
220  if (lookupSROAArgAndCost(V, SROAArg, CostIt))
221    disableSROA(CostIt);
222}
223
224/// \brief Accumulate the given cost for a particular SROA candidate.
225void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
226                                      int InstructionCost) {
227  CostIt->second += InstructionCost;
228  SROACostSavings += InstructionCost;
229}
230
231/// \brief Check whether a GEP's indices are all constant.
232///
233/// Respects any simplified values known during the analysis of this callsite.
234bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) {
235  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
236    if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
237      return false;
238
239  return true;
240}
241
242/// \brief Accumulate a constant GEP offset into an APInt if possible.
243///
244/// Returns false if unable to compute the offset for any reason. Respects any
245/// simplified values known during the analysis of this callsite.
246bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
247  if (!DL)
248    return false;
249
250  unsigned IntPtrWidth = DL->getPointerSizeInBits();
251  assert(IntPtrWidth == Offset.getBitWidth());
252
253  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
254       GTI != GTE; ++GTI) {
255    ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
256    if (!OpC)
257      if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
258        OpC = dyn_cast<ConstantInt>(SimpleOp);
259    if (!OpC)
260      return false;
261    if (OpC->isZero()) continue;
262
263    // Handle a struct index, which adds its field offset to the pointer.
264    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
265      unsigned ElementIdx = OpC->getZExtValue();
266      const StructLayout *SL = DL->getStructLayout(STy);
267      Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
268      continue;
269    }
270
271    APInt TypeSize(IntPtrWidth, DL->getTypeAllocSize(GTI.getIndexedType()));
272    Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
273  }
274  return true;
275}
276
277bool CallAnalyzer::visitAlloca(AllocaInst &I) {
278  // Check whether inlining will turn a dynamic alloca into a static
279  // alloca, and handle that case.
280  if (I.isArrayAllocation()) {
281    if (Constant *Size = SimplifiedValues.lookup(I.getArraySize())) {
282      ConstantInt *AllocSize = dyn_cast<ConstantInt>(Size);
283      assert(AllocSize && "Allocation size not a constant int?");
284      Type *Ty = I.getAllocatedType();
285      AllocatedSize += Ty->getPrimitiveSizeInBits() * AllocSize->getZExtValue();
286      return Base::visitAlloca(I);
287    }
288  }
289
290  // Accumulate the allocated size.
291  if (I.isStaticAlloca()) {
292    Type *Ty = I.getAllocatedType();
293    AllocatedSize += (DL ? DL->getTypeAllocSize(Ty) :
294                      Ty->getPrimitiveSizeInBits());
295  }
296
297  // We will happily inline static alloca instructions.
298  if (I.isStaticAlloca())
299    return Base::visitAlloca(I);
300
301  // FIXME: This is overly conservative. Dynamic allocas are inefficient for
302  // a variety of reasons, and so we would like to not inline them into
303  // functions which don't currently have a dynamic alloca. This simply
304  // disables inlining altogether in the presence of a dynamic alloca.
305  HasDynamicAlloca = true;
306  return false;
307}
308
309bool CallAnalyzer::visitPHI(PHINode &I) {
310  // FIXME: We should potentially be tracking values through phi nodes,
311  // especially when they collapse to a single value due to deleted CFG edges
312  // during inlining.
313
314  // FIXME: We need to propagate SROA *disabling* through phi nodes, even
315  // though we don't want to propagate it's bonuses. The idea is to disable
316  // SROA if it *might* be used in an inappropriate manner.
317
318  // Phi nodes are always zero-cost.
319  return true;
320}
321
322bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
323  Value *SROAArg;
324  DenseMap<Value *, int>::iterator CostIt;
325  bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(),
326                                            SROAArg, CostIt);
327
328  // Try to fold GEPs of constant-offset call site argument pointers. This
329  // requires target data and inbounds GEPs.
330  if (DL && I.isInBounds()) {
331    // Check if we have a base + offset for the pointer.
332    Value *Ptr = I.getPointerOperand();
333    std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
334    if (BaseAndOffset.first) {
335      // Check if the offset of this GEP is constant, and if so accumulate it
336      // into Offset.
337      if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) {
338        // Non-constant GEPs aren't folded, and disable SROA.
339        if (SROACandidate)
340          disableSROA(CostIt);
341        return false;
342      }
343
344      // Add the result as a new mapping to Base + Offset.
345      ConstantOffsetPtrs[&I] = BaseAndOffset;
346
347      // Also handle SROA candidates here, we already know that the GEP is
348      // all-constant indexed.
349      if (SROACandidate)
350        SROAArgValues[&I] = SROAArg;
351
352      return true;
353    }
354  }
355
356  if (isGEPOffsetConstant(I)) {
357    if (SROACandidate)
358      SROAArgValues[&I] = SROAArg;
359
360    // Constant GEPs are modeled as free.
361    return true;
362  }
363
364  // Variable GEPs will require math and will disable SROA.
365  if (SROACandidate)
366    disableSROA(CostIt);
367  return false;
368}
369
370bool CallAnalyzer::visitBitCast(BitCastInst &I) {
371  // Propagate constants through bitcasts.
372  Constant *COp = dyn_cast<Constant>(I.getOperand(0));
373  if (!COp)
374    COp = SimplifiedValues.lookup(I.getOperand(0));
375  if (COp)
376    if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) {
377      SimplifiedValues[&I] = C;
378      return true;
379    }
380
381  // Track base/offsets through casts
382  std::pair<Value *, APInt> BaseAndOffset
383    = ConstantOffsetPtrs.lookup(I.getOperand(0));
384  // Casts don't change the offset, just wrap it up.
385  if (BaseAndOffset.first)
386    ConstantOffsetPtrs[&I] = BaseAndOffset;
387
388  // Also look for SROA candidates here.
389  Value *SROAArg;
390  DenseMap<Value *, int>::iterator CostIt;
391  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
392    SROAArgValues[&I] = SROAArg;
393
394  // Bitcasts are always zero cost.
395  return true;
396}
397
398bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
399  const DataLayout *DL = I.getDataLayout();
400  // Propagate constants through ptrtoint.
401  Constant *COp = dyn_cast<Constant>(I.getOperand(0));
402  if (!COp)
403    COp = SimplifiedValues.lookup(I.getOperand(0));
404  if (COp)
405    if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) {
406      SimplifiedValues[&I] = C;
407      return true;
408    }
409
410  // Track base/offset pairs when converted to a plain integer provided the
411  // integer is large enough to represent the pointer.
412  unsigned IntegerSize = I.getType()->getScalarSizeInBits();
413  if (DL && IntegerSize >= DL->getPointerSizeInBits()) {
414    std::pair<Value *, APInt> BaseAndOffset
415      = ConstantOffsetPtrs.lookup(I.getOperand(0));
416    if (BaseAndOffset.first)
417      ConstantOffsetPtrs[&I] = BaseAndOffset;
418  }
419
420  // This is really weird. Technically, ptrtoint will disable SROA. However,
421  // unless that ptrtoint is *used* somewhere in the live basic blocks after
422  // inlining, it will be nuked, and SROA should proceed. All of the uses which
423  // would block SROA would also block SROA if applied directly to a pointer,
424  // and so we can just add the integer in here. The only places where SROA is
425  // preserved either cannot fire on an integer, or won't in-and-of themselves
426  // disable SROA (ext) w/o some later use that we would see and disable.
427  Value *SROAArg;
428  DenseMap<Value *, int>::iterator CostIt;
429  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
430    SROAArgValues[&I] = SROAArg;
431
432  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
433}
434
435bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
436  const DataLayout *DL = I.getDataLayout();
437  // Propagate constants through ptrtoint.
438  Constant *COp = dyn_cast<Constant>(I.getOperand(0));
439  if (!COp)
440    COp = SimplifiedValues.lookup(I.getOperand(0));
441  if (COp)
442    if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) {
443      SimplifiedValues[&I] = C;
444      return true;
445    }
446
447  // Track base/offset pairs when round-tripped through a pointer without
448  // modifications provided the integer is not too large.
449  Value *Op = I.getOperand(0);
450  unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
451  if (DL && IntegerSize <= DL->getPointerSizeInBits()) {
452    std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
453    if (BaseAndOffset.first)
454      ConstantOffsetPtrs[&I] = BaseAndOffset;
455  }
456
457  // "Propagate" SROA here in the same manner as we do for ptrtoint above.
458  Value *SROAArg;
459  DenseMap<Value *, int>::iterator CostIt;
460  if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
461    SROAArgValues[&I] = SROAArg;
462
463  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
464}
465
466bool CallAnalyzer::visitCastInst(CastInst &I) {
467  // Propagate constants through ptrtoint.
468  Constant *COp = dyn_cast<Constant>(I.getOperand(0));
469  if (!COp)
470    COp = SimplifiedValues.lookup(I.getOperand(0));
471  if (COp)
472    if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) {
473      SimplifiedValues[&I] = C;
474      return true;
475    }
476
477  // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
478  disableSROA(I.getOperand(0));
479
480  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
481}
482
483bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
484  Value *Operand = I.getOperand(0);
485  Constant *COp = dyn_cast<Constant>(Operand);
486  if (!COp)
487    COp = SimplifiedValues.lookup(Operand);
488  if (COp)
489    if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(),
490                                               COp, DL)) {
491      SimplifiedValues[&I] = C;
492      return true;
493    }
494
495  // Disable any SROA on the argument to arbitrary unary operators.
496  disableSROA(Operand);
497
498  return false;
499}
500
501bool CallAnalyzer::visitCmpInst(CmpInst &I) {
502  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
503  // First try to handle simplified comparisons.
504  if (!isa<Constant>(LHS))
505    if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
506      LHS = SimpleLHS;
507  if (!isa<Constant>(RHS))
508    if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
509      RHS = SimpleRHS;
510  if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
511    if (Constant *CRHS = dyn_cast<Constant>(RHS))
512      if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) {
513        SimplifiedValues[&I] = C;
514        return true;
515      }
516  }
517
518  if (I.getOpcode() == Instruction::FCmp)
519    return false;
520
521  // Otherwise look for a comparison between constant offset pointers with
522  // a common base.
523  Value *LHSBase, *RHSBase;
524  APInt LHSOffset, RHSOffset;
525  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
526  if (LHSBase) {
527    std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
528    if (RHSBase && LHSBase == RHSBase) {
529      // We have common bases, fold the icmp to a constant based on the
530      // offsets.
531      Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
532      Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
533      if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
534        SimplifiedValues[&I] = C;
535        ++NumConstantPtrCmps;
536        return true;
537      }
538    }
539  }
540
541  // If the comparison is an equality comparison with null, we can simplify it
542  // for any alloca-derived argument.
543  if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)))
544    if (isAllocaDerivedArg(I.getOperand(0))) {
545      // We can actually predict the result of comparisons between an
546      // alloca-derived value and null. Note that this fires regardless of
547      // SROA firing.
548      bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
549      SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
550                                        : ConstantInt::getFalse(I.getType());
551      return true;
552    }
553
554  // Finally check for SROA candidates in comparisons.
555  Value *SROAArg;
556  DenseMap<Value *, int>::iterator CostIt;
557  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
558    if (isa<ConstantPointerNull>(I.getOperand(1))) {
559      accumulateSROACost(CostIt, InlineConstants::InstrCost);
560      return true;
561    }
562
563    disableSROA(CostIt);
564  }
565
566  return false;
567}
568
569bool CallAnalyzer::visitSub(BinaryOperator &I) {
570  // Try to handle a special case: we can fold computing the difference of two
571  // constant-related pointers.
572  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
573  Value *LHSBase, *RHSBase;
574  APInt LHSOffset, RHSOffset;
575  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
576  if (LHSBase) {
577    std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
578    if (RHSBase && LHSBase == RHSBase) {
579      // We have common bases, fold the subtract to a constant based on the
580      // offsets.
581      Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
582      Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
583      if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
584        SimplifiedValues[&I] = C;
585        ++NumConstantPtrDiffs;
586        return true;
587      }
588    }
589  }
590
591  // Otherwise, fall back to the generic logic for simplifying and handling
592  // instructions.
593  return Base::visitSub(I);
594}
595
596bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
597  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
598  if (!isa<Constant>(LHS))
599    if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
600      LHS = SimpleLHS;
601  if (!isa<Constant>(RHS))
602    if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
603      RHS = SimpleRHS;
604  Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
605  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) {
606    SimplifiedValues[&I] = C;
607    return true;
608  }
609
610  // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
611  disableSROA(LHS);
612  disableSROA(RHS);
613
614  return false;
615}
616
617bool CallAnalyzer::visitLoad(LoadInst &I) {
618  Value *SROAArg;
619  DenseMap<Value *, int>::iterator CostIt;
620  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
621    if (I.isSimple()) {
622      accumulateSROACost(CostIt, InlineConstants::InstrCost);
623      return true;
624    }
625
626    disableSROA(CostIt);
627  }
628
629  return false;
630}
631
632bool CallAnalyzer::visitStore(StoreInst &I) {
633  Value *SROAArg;
634  DenseMap<Value *, int>::iterator CostIt;
635  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
636    if (I.isSimple()) {
637      accumulateSROACost(CostIt, InlineConstants::InstrCost);
638      return true;
639    }
640
641    disableSROA(CostIt);
642  }
643
644  return false;
645}
646
647bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
648  // Constant folding for extract value is trivial.
649  Constant *C = dyn_cast<Constant>(I.getAggregateOperand());
650  if (!C)
651    C = SimplifiedValues.lookup(I.getAggregateOperand());
652  if (C) {
653    SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices());
654    return true;
655  }
656
657  // SROA can look through these but give them a cost.
658  return false;
659}
660
661bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
662  // Constant folding for insert value is trivial.
663  Constant *AggC = dyn_cast<Constant>(I.getAggregateOperand());
664  if (!AggC)
665    AggC = SimplifiedValues.lookup(I.getAggregateOperand());
666  Constant *InsertedC = dyn_cast<Constant>(I.getInsertedValueOperand());
667  if (!InsertedC)
668    InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand());
669  if (AggC && InsertedC) {
670    SimplifiedValues[&I] = ConstantExpr::getInsertValue(AggC, InsertedC,
671                                                        I.getIndices());
672    return true;
673  }
674
675  // SROA can look through these but give them a cost.
676  return false;
677}
678
679/// \brief Try to simplify a call site.
680///
681/// Takes a concrete function and callsite and tries to actually simplify it by
682/// analyzing the arguments and call itself with instsimplify. Returns true if
683/// it has simplified the callsite to some other entity (a constant), making it
684/// free.
685bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
686  // FIXME: Using the instsimplify logic directly for this is inefficient
687  // because we have to continually rebuild the argument list even when no
688  // simplifications can be performed. Until that is fixed with remapping
689  // inside of instsimplify, directly constant fold calls here.
690  if (!canConstantFoldCallTo(F))
691    return false;
692
693  // Try to re-map the arguments to constants.
694  SmallVector<Constant *, 4> ConstantArgs;
695  ConstantArgs.reserve(CS.arg_size());
696  for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
697       I != E; ++I) {
698    Constant *C = dyn_cast<Constant>(*I);
699    if (!C)
700      C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
701    if (!C)
702      return false; // This argument doesn't map to a constant.
703
704    ConstantArgs.push_back(C);
705  }
706  if (Constant *C = ConstantFoldCall(F, ConstantArgs)) {
707    SimplifiedValues[CS.getInstruction()] = C;
708    return true;
709  }
710
711  return false;
712}
713
714bool CallAnalyzer::visitCallSite(CallSite CS) {
715  if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
716      !F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
717                                      Attribute::ReturnsTwice)) {
718    // This aborts the entire analysis.
719    ExposesReturnsTwice = true;
720    return false;
721  }
722  if (CS.isCall() &&
723      cast<CallInst>(CS.getInstruction())->cannotDuplicate())
724    ContainsNoDuplicateCall = true;
725
726  if (Function *F = CS.getCalledFunction()) {
727    // When we have a concrete function, first try to simplify it directly.
728    if (simplifyCallSite(F, CS))
729      return true;
730
731    // Next check if it is an intrinsic we know about.
732    // FIXME: Lift this into part of the InstVisitor.
733    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
734      switch (II->getIntrinsicID()) {
735      default:
736        return Base::visitCallSite(CS);
737
738      case Intrinsic::memset:
739      case Intrinsic::memcpy:
740      case Intrinsic::memmove:
741        // SROA can usually chew through these intrinsics, but they aren't free.
742        return false;
743      }
744    }
745
746    if (F == CS.getInstruction()->getParent()->getParent()) {
747      // This flag will fully abort the analysis, so don't bother with anything
748      // else.
749      IsRecursiveCall = true;
750      return false;
751    }
752
753    if (TTI.isLoweredToCall(F)) {
754      // We account for the average 1 instruction per call argument setup
755      // here.
756      Cost += CS.arg_size() * InlineConstants::InstrCost;
757
758      // Everything other than inline ASM will also have a significant cost
759      // merely from making the call.
760      if (!isa<InlineAsm>(CS.getCalledValue()))
761        Cost += InlineConstants::CallPenalty;
762    }
763
764    return Base::visitCallSite(CS);
765  }
766
767  // Otherwise we're in a very special case -- an indirect function call. See
768  // if we can be particularly clever about this.
769  Value *Callee = CS.getCalledValue();
770
771  // First, pay the price of the argument setup. We account for the average
772  // 1 instruction per call argument setup here.
773  Cost += CS.arg_size() * InlineConstants::InstrCost;
774
775  // Next, check if this happens to be an indirect function call to a known
776  // function in this inline context. If not, we've done all we can.
777  Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
778  if (!F)
779    return Base::visitCallSite(CS);
780
781  // If we have a constant that we are calling as a function, we can peer
782  // through it and see the function target. This happens not infrequently
783  // during devirtualization and so we want to give it a hefty bonus for
784  // inlining, but cap that bonus in the event that inlining wouldn't pan
785  // out. Pretend to inline the function, with a custom threshold.
786  CallAnalyzer CA(DL, TTI, AT, *F, InlineConstants::IndirectCallThreshold);
787  if (CA.analyzeCall(CS)) {
788    // We were able to inline the indirect call! Subtract the cost from the
789    // bonus we want to apply, but don't go below zero.
790    Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost());
791  }
792
793  return Base::visitCallSite(CS);
794}
795
796bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
797  // At least one return instruction will be free after inlining.
798  bool Free = !HasReturn;
799  HasReturn = true;
800  return Free;
801}
802
803bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
804  // We model unconditional branches as essentially free -- they really
805  // shouldn't exist at all, but handling them makes the behavior of the
806  // inliner more regular and predictable. Interestingly, conditional branches
807  // which will fold away are also free.
808  return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
809         dyn_cast_or_null<ConstantInt>(
810             SimplifiedValues.lookup(BI.getCondition()));
811}
812
813bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
814  // We model unconditional switches as free, see the comments on handling
815  // branches.
816  if (isa<ConstantInt>(SI.getCondition()))
817    return true;
818  if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
819    if (isa<ConstantInt>(V))
820      return true;
821
822  // Otherwise, we need to accumulate a cost proportional to the number of
823  // distinct successor blocks. This fan-out in the CFG cannot be represented
824  // for free even if we can represent the core switch as a jumptable that
825  // takes a single instruction.
826  //
827  // NB: We convert large switches which are just used to initialize large phi
828  // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
829  // inlining those. It will prevent inlining in cases where the optimization
830  // does not (yet) fire.
831  SmallPtrSet<BasicBlock *, 8> SuccessorBlocks;
832  SuccessorBlocks.insert(SI.getDefaultDest());
833  for (auto I = SI.case_begin(), E = SI.case_end(); I != E; ++I)
834    SuccessorBlocks.insert(I.getCaseSuccessor());
835  // Add cost corresponding to the number of distinct destinations. The first
836  // we model as free because of fallthrough.
837  Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost;
838  return false;
839}
840
841bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
842  // We never want to inline functions that contain an indirectbr.  This is
843  // incorrect because all the blockaddress's (in static global initializers
844  // for example) would be referring to the original function, and this
845  // indirect jump would jump from the inlined copy of the function into the
846  // original function which is extremely undefined behavior.
847  // FIXME: This logic isn't really right; we can safely inline functions with
848  // indirectbr's as long as no other function or global references the
849  // blockaddress of a block within the current function.
850  HasIndirectBr = true;
851  return false;
852}
853
854bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
855  // FIXME: It's not clear that a single instruction is an accurate model for
856  // the inline cost of a resume instruction.
857  return false;
858}
859
860bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
861  // FIXME: It might be reasonably to discount the cost of instructions leading
862  // to unreachable as they have the lowest possible impact on both runtime and
863  // code size.
864  return true; // No actual code is needed for unreachable.
865}
866
867bool CallAnalyzer::visitInstruction(Instruction &I) {
868  // Some instructions are free. All of the free intrinsics can also be
869  // handled by SROA, etc.
870  if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
871    return true;
872
873  // We found something we don't understand or can't handle. Mark any SROA-able
874  // values in the operand list as no longer viable.
875  for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
876    disableSROA(*OI);
877
878  return false;
879}
880
881
882/// \brief Analyze a basic block for its contribution to the inline cost.
883///
884/// This method walks the analyzer over every instruction in the given basic
885/// block and accounts for their cost during inlining at this callsite. It
886/// aborts early if the threshold has been exceeded or an impossible to inline
887/// construct has been detected. It returns false if inlining is no longer
888/// viable, and true if inlining remains viable.
889bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
890                                SmallPtrSetImpl<const Value *> &EphValues) {
891  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
892    // FIXME: Currently, the number of instructions in a function regardless of
893    // our ability to simplify them during inline to constants or dead code,
894    // are actually used by the vector bonus heuristic. As long as that's true,
895    // we have to special case debug intrinsics here to prevent differences in
896    // inlining due to debug symbols. Eventually, the number of unsimplified
897    // instructions shouldn't factor into the cost computation, but until then,
898    // hack around it here.
899    if (isa<DbgInfoIntrinsic>(I))
900      continue;
901
902    // Skip ephemeral values.
903    if (EphValues.count(I))
904      continue;
905
906    ++NumInstructions;
907    if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
908      ++NumVectorInstructions;
909
910    // If the instruction simplified to a constant, there is no cost to this
911    // instruction. Visit the instructions using our InstVisitor to account for
912    // all of the per-instruction logic. The visit tree returns true if we
913    // consumed the instruction in any way, and false if the instruction's base
914    // cost should count against inlining.
915    if (Base::visit(I))
916      ++NumInstructionsSimplified;
917    else
918      Cost += InlineConstants::InstrCost;
919
920    // If the visit this instruction detected an uninlinable pattern, abort.
921    if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
922        HasIndirectBr)
923      return false;
924
925    // If the caller is a recursive function then we don't want to inline
926    // functions which allocate a lot of stack space because it would increase
927    // the caller stack usage dramatically.
928    if (IsCallerRecursive &&
929        AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
930      return false;
931
932    if (NumVectorInstructions > NumInstructions/2)
933      VectorBonus = FiftyPercentVectorBonus;
934    else if (NumVectorInstructions > NumInstructions/10)
935      VectorBonus = TenPercentVectorBonus;
936    else
937      VectorBonus = 0;
938
939    // Check if we've past the threshold so we don't spin in huge basic
940    // blocks that will never inline.
941    if (Cost > (Threshold + VectorBonus))
942      return false;
943  }
944
945  return true;
946}
947
948/// \brief Compute the base pointer and cumulative constant offsets for V.
949///
950/// This strips all constant offsets off of V, leaving it the base pointer, and
951/// accumulates the total constant offset applied in the returned constant. It
952/// returns 0 if V is not a pointer, and returns the constant '0' if there are
953/// no constant offsets applied.
954ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
955  if (!DL || !V->getType()->isPointerTy())
956    return nullptr;
957
958  unsigned IntPtrWidth = DL->getPointerSizeInBits();
959  APInt Offset = APInt::getNullValue(IntPtrWidth);
960
961  // Even though we don't look through PHI nodes, we could be called on an
962  // instruction in an unreachable block, which may be on a cycle.
963  SmallPtrSet<Value *, 4> Visited;
964  Visited.insert(V);
965  do {
966    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
967      if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
968        return nullptr;
969      V = GEP->getPointerOperand();
970    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
971      V = cast<Operator>(V)->getOperand(0);
972    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
973      if (GA->mayBeOverridden())
974        break;
975      V = GA->getAliasee();
976    } else {
977      break;
978    }
979    assert(V->getType()->isPointerTy() && "Unexpected operand type!");
980  } while (Visited.insert(V).second);
981
982  Type *IntPtrTy = DL->getIntPtrType(V->getContext());
983  return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
984}
985
986/// \brief Analyze a call site for potential inlining.
987///
988/// Returns true if inlining this call is viable, and false if it is not
989/// viable. It computes the cost and adjusts the threshold based on numerous
990/// factors and heuristics. If this method returns false but the computed cost
991/// is below the computed threshold, then inlining was forcibly disabled by
992/// some artifact of the routine.
993bool CallAnalyzer::analyzeCall(CallSite CS) {
994  ++NumCallsAnalyzed;
995
996  // Track whether the post-inlining function would have more than one basic
997  // block. A single basic block is often intended for inlining. Balloon the
998  // threshold by 50% until we pass the single-BB phase.
999  bool SingleBB = true;
1000  int SingleBBBonus = Threshold / 2;
1001  Threshold += SingleBBBonus;
1002
1003  // Perform some tweaks to the cost and threshold based on the direct
1004  // callsite information.
1005
1006  // We want to more aggressively inline vector-dense kernels, so up the
1007  // threshold, and we'll lower it if the % of vector instructions gets too
1008  // low.
1009  assert(NumInstructions == 0);
1010  assert(NumVectorInstructions == 0);
1011  FiftyPercentVectorBonus = Threshold;
1012  TenPercentVectorBonus = Threshold / 2;
1013
1014  // Give out bonuses per argument, as the instructions setting them up will
1015  // be gone after inlining.
1016  for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
1017    if (DL && CS.isByValArgument(I)) {
1018      // We approximate the number of loads and stores needed by dividing the
1019      // size of the byval type by the target's pointer size.
1020      PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
1021      unsigned TypeSize = DL->getTypeSizeInBits(PTy->getElementType());
1022      unsigned PointerSize = DL->getPointerSizeInBits();
1023      // Ceiling division.
1024      unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
1025
1026      // If it generates more than 8 stores it is likely to be expanded as an
1027      // inline memcpy so we take that as an upper bound. Otherwise we assume
1028      // one load and one store per word copied.
1029      // FIXME: The maxStoresPerMemcpy setting from the target should be used
1030      // here instead of a magic number of 8, but it's not available via
1031      // DataLayout.
1032      NumStores = std::min(NumStores, 8U);
1033
1034      Cost -= 2 * NumStores * InlineConstants::InstrCost;
1035    } else {
1036      // For non-byval arguments subtract off one instruction per call
1037      // argument.
1038      Cost -= InlineConstants::InstrCost;
1039    }
1040  }
1041
1042  // If there is only one call of the function, and it has internal linkage,
1043  // the cost of inlining it drops dramatically.
1044  bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() &&
1045    &F == CS.getCalledFunction();
1046  if (OnlyOneCallAndLocalLinkage)
1047    Cost += InlineConstants::LastCallToStaticBonus;
1048
1049  // If the instruction after the call, or if the normal destination of the
1050  // invoke is an unreachable instruction, the function is noreturn. As such,
1051  // there is little point in inlining this unless there is literally zero
1052  // cost.
1053  Instruction *Instr = CS.getInstruction();
1054  if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
1055    if (isa<UnreachableInst>(II->getNormalDest()->begin()))
1056      Threshold = 1;
1057  } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr)))
1058    Threshold = 1;
1059
1060  // If this function uses the coldcc calling convention, prefer not to inline
1061  // it.
1062  if (F.getCallingConv() == CallingConv::Cold)
1063    Cost += InlineConstants::ColdccPenalty;
1064
1065  // Check if we're done. This can happen due to bonuses and penalties.
1066  if (Cost > Threshold)
1067    return false;
1068
1069  if (F.empty())
1070    return true;
1071
1072  Function *Caller = CS.getInstruction()->getParent()->getParent();
1073  // Check if the caller function is recursive itself.
1074  for (User *U : Caller->users()) {
1075    CallSite Site(U);
1076    if (!Site)
1077      continue;
1078    Instruction *I = Site.getInstruction();
1079    if (I->getParent()->getParent() == Caller) {
1080      IsCallerRecursive = true;
1081      break;
1082    }
1083  }
1084
1085  // Populate our simplified values by mapping from function arguments to call
1086  // arguments with known important simplifications.
1087  CallSite::arg_iterator CAI = CS.arg_begin();
1088  for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
1089       FAI != FAE; ++FAI, ++CAI) {
1090    assert(CAI != CS.arg_end());
1091    if (Constant *C = dyn_cast<Constant>(CAI))
1092      SimplifiedValues[FAI] = C;
1093
1094    Value *PtrArg = *CAI;
1095    if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
1096      ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue());
1097
1098      // We can SROA any pointer arguments derived from alloca instructions.
1099      if (isa<AllocaInst>(PtrArg)) {
1100        SROAArgValues[FAI] = PtrArg;
1101        SROAArgCosts[PtrArg] = 0;
1102      }
1103    }
1104  }
1105  NumConstantArgs = SimplifiedValues.size();
1106  NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
1107  NumAllocaArgs = SROAArgValues.size();
1108
1109  // FIXME: If a caller has multiple calls to a callee, we end up recomputing
1110  // the ephemeral values multiple times (and they're completely determined by
1111  // the callee, so this is purely duplicate work).
1112  SmallPtrSet<const Value *, 32> EphValues;
1113  CodeMetrics::collectEphemeralValues(&F, AT, EphValues);
1114
1115  // The worklist of live basic blocks in the callee *after* inlining. We avoid
1116  // adding basic blocks of the callee which can be proven to be dead for this
1117  // particular call site in order to get more accurate cost estimates. This
1118  // requires a somewhat heavyweight iteration pattern: we need to walk the
1119  // basic blocks in a breadth-first order as we insert live successors. To
1120  // accomplish this, prioritizing for small iterations because we exit after
1121  // crossing our threshold, we use a small-size optimized SetVector.
1122  typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>,
1123                                  SmallPtrSet<BasicBlock *, 16> > BBSetVector;
1124  BBSetVector BBWorklist;
1125  BBWorklist.insert(&F.getEntryBlock());
1126  // Note that we *must not* cache the size, this loop grows the worklist.
1127  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
1128    // Bail out the moment we cross the threshold. This means we'll under-count
1129    // the cost, but only when undercounting doesn't matter.
1130    if (Cost > (Threshold + VectorBonus))
1131      break;
1132
1133    BasicBlock *BB = BBWorklist[Idx];
1134    if (BB->empty())
1135      continue;
1136
1137    // Disallow inlining a blockaddress. A blockaddress only has defined
1138    // behavior for an indirect branch in the same function, and we do not
1139    // currently support inlining indirect branches. But, the inliner may not
1140    // see an indirect branch that ends up being dead code at a particular call
1141    // site. If the blockaddress escapes the function, e.g., via a global
1142    // variable, inlining may lead to an invalid cross-function reference.
1143    if (BB->hasAddressTaken())
1144      return false;
1145
1146    // Analyze the cost of this block. If we blow through the threshold, this
1147    // returns false, and we can bail on out.
1148    if (!analyzeBlock(BB, EphValues)) {
1149      if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
1150          HasIndirectBr)
1151        return false;
1152
1153      // If the caller is a recursive function then we don't want to inline
1154      // functions which allocate a lot of stack space because it would increase
1155      // the caller stack usage dramatically.
1156      if (IsCallerRecursive &&
1157          AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
1158        return false;
1159
1160      break;
1161    }
1162
1163    TerminatorInst *TI = BB->getTerminator();
1164
1165    // Add in the live successors by first checking whether we have terminator
1166    // that may be simplified based on the values simplified by this call.
1167    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1168      if (BI->isConditional()) {
1169        Value *Cond = BI->getCondition();
1170        if (ConstantInt *SimpleCond
1171              = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1172          BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0));
1173          continue;
1174        }
1175      }
1176    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
1177      Value *Cond = SI->getCondition();
1178      if (ConstantInt *SimpleCond
1179            = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1180        BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor());
1181        continue;
1182      }
1183    }
1184
1185    // If we're unable to select a particular successor, just count all of
1186    // them.
1187    for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
1188         ++TIdx)
1189      BBWorklist.insert(TI->getSuccessor(TIdx));
1190
1191    // If we had any successors at this point, than post-inlining is likely to
1192    // have them as well. Note that we assume any basic blocks which existed
1193    // due to branches or switches which folded above will also fold after
1194    // inlining.
1195    if (SingleBB && TI->getNumSuccessors() > 1) {
1196      // Take off the bonus we applied to the threshold.
1197      Threshold -= SingleBBBonus;
1198      SingleBB = false;
1199    }
1200  }
1201
1202  // If this is a noduplicate call, we can still inline as long as
1203  // inlining this would cause the removal of the caller (so the instruction
1204  // is not actually duplicated, just moved).
1205  if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
1206    return false;
1207
1208  Threshold += VectorBonus;
1209
1210  return Cost < Threshold;
1211}
1212
1213#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1214/// \brief Dump stats about this call's analysis.
1215void CallAnalyzer::dump() {
1216#define DEBUG_PRINT_STAT(x) dbgs() << "      " #x ": " << x << "\n"
1217  DEBUG_PRINT_STAT(NumConstantArgs);
1218  DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
1219  DEBUG_PRINT_STAT(NumAllocaArgs);
1220  DEBUG_PRINT_STAT(NumConstantPtrCmps);
1221  DEBUG_PRINT_STAT(NumConstantPtrDiffs);
1222  DEBUG_PRINT_STAT(NumInstructionsSimplified);
1223  DEBUG_PRINT_STAT(SROACostSavings);
1224  DEBUG_PRINT_STAT(SROACostSavingsLost);
1225  DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
1226  DEBUG_PRINT_STAT(Cost);
1227  DEBUG_PRINT_STAT(Threshold);
1228  DEBUG_PRINT_STAT(VectorBonus);
1229#undef DEBUG_PRINT_STAT
1230}
1231#endif
1232
1233INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
1234                      true, true)
1235INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
1236INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
1237INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
1238                    true, true)
1239
1240char InlineCostAnalysis::ID = 0;
1241
1242InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID) {}
1243
1244InlineCostAnalysis::~InlineCostAnalysis() {}
1245
1246void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
1247  AU.setPreservesAll();
1248  AU.addRequired<AssumptionTracker>();
1249  AU.addRequired<TargetTransformInfo>();
1250  CallGraphSCCPass::getAnalysisUsage(AU);
1251}
1252
1253bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) {
1254  TTI = &getAnalysis<TargetTransformInfo>();
1255  AT = &getAnalysis<AssumptionTracker>();
1256  return false;
1257}
1258
1259InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) {
1260  return getInlineCost(CS, CS.getCalledFunction(), Threshold);
1261}
1262
1263/// \brief Test that two functions either have or have not the given attribute
1264///        at the same time.
1265static bool attributeMatches(Function *F1, Function *F2,
1266                             Attribute::AttrKind Attr) {
1267  return F1->hasFnAttribute(Attr) == F2->hasFnAttribute(Attr);
1268}
1269
1270/// \brief Test that there are no attribute conflicts between Caller and Callee
1271///        that prevent inlining.
1272static bool functionsHaveCompatibleAttributes(Function *Caller,
1273                                              Function *Callee) {
1274  return attributeMatches(Caller, Callee, Attribute::SanitizeAddress) &&
1275         attributeMatches(Caller, Callee, Attribute::SanitizeMemory) &&
1276         attributeMatches(Caller, Callee, Attribute::SanitizeThread);
1277}
1278
1279InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
1280                                             int Threshold) {
1281  // Cannot inline indirect calls.
1282  if (!Callee)
1283    return llvm::InlineCost::getNever();
1284
1285  // Calls to functions with always-inline attributes should be inlined
1286  // whenever possible.
1287  if (CS.hasFnAttr(Attribute::AlwaysInline)) {
1288    if (isInlineViable(*Callee))
1289      return llvm::InlineCost::getAlways();
1290    return llvm::InlineCost::getNever();
1291  }
1292
1293  // Never inline functions with conflicting attributes (unless callee has
1294  // always-inline attribute).
1295  if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee))
1296    return llvm::InlineCost::getNever();
1297
1298  // Don't inline this call if the caller has the optnone attribute.
1299  if (CS.getCaller()->hasFnAttribute(Attribute::OptimizeNone))
1300    return llvm::InlineCost::getNever();
1301
1302  // Don't inline functions which can be redefined at link-time to mean
1303  // something else.  Don't inline functions marked noinline or call sites
1304  // marked noinline.
1305  if (Callee->mayBeOverridden() ||
1306      Callee->hasFnAttribute(Attribute::NoInline) || CS.isNoInline())
1307    return llvm::InlineCost::getNever();
1308
1309  DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
1310        << "...\n");
1311
1312  CallAnalyzer CA(Callee->getDataLayout(), *TTI, AT, *Callee, Threshold);
1313  bool ShouldInline = CA.analyzeCall(CS);
1314
1315  DEBUG(CA.dump());
1316
1317  // Check if there was a reason to force inlining or no inlining.
1318  if (!ShouldInline && CA.getCost() < CA.getThreshold())
1319    return InlineCost::getNever();
1320  if (ShouldInline && CA.getCost() >= CA.getThreshold())
1321    return InlineCost::getAlways();
1322
1323  return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
1324}
1325
1326bool InlineCostAnalysis::isInlineViable(Function &F) {
1327  bool ReturnsTwice =
1328    F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
1329                                   Attribute::ReturnsTwice);
1330  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
1331    // Disallow inlining of functions which contain indirect branches or
1332    // blockaddresses.
1333    if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken())
1334      return false;
1335
1336    for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
1337         ++II) {
1338      CallSite CS(II);
1339      if (!CS)
1340        continue;
1341
1342      // Disallow recursive calls.
1343      if (&F == CS.getCalledFunction())
1344        return false;
1345
1346      // Disallow calls which expose returns-twice to a function not previously
1347      // attributed as such.
1348      if (!ReturnsTwice && CS.isCall() &&
1349          cast<CallInst>(CS.getInstruction())->canReturnTwice())
1350        return false;
1351    }
1352  }
1353
1354  return true;
1355}
1356