BasicTargetTransformInfo.cpp revision dce4a407a24b04eebc6a376f8e62b41aaa7b071f
1//===- BasicTargetTransformInfo.cpp - Basic target-independent TTI impl ---===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9/// \file
10/// This file provides the implementation of a basic TargetTransformInfo pass
11/// predicated on the target abstractions present in the target independent
12/// code generator. It uses these (primarily TargetLowering) to model as much
13/// of the TTI query interface as possible. It is included by most targets so
14/// that they can specialize only a small subset of the query space.
15///
16//===----------------------------------------------------------------------===//
17
18#include "llvm/CodeGen/Passes.h"
19#include "llvm/Analysis/LoopInfo.h"
20#include "llvm/Analysis/TargetTransformInfo.h"
21#include "llvm/Support/CommandLine.h"
22#include "llvm/Target/TargetLowering.h"
23#include "llvm/Target/TargetSubtargetInfo.h"
24#include <utility>
25using namespace llvm;
26
27static cl::opt<unsigned>
28PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0),
29  cl::desc("Threshold for partial unrolling"), cl::Hidden);
30
31#define DEBUG_TYPE "basictti"
32
33namespace {
34
35class BasicTTI final : public ImmutablePass, public TargetTransformInfo {
36  const TargetMachine *TM;
37
38  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
39  /// are set if the result needs to be inserted and/or extracted from vectors.
40  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
41
42  const TargetLoweringBase *getTLI() const { return TM->getTargetLowering(); }
43
44public:
45  BasicTTI() : ImmutablePass(ID), TM(nullptr) {
46    llvm_unreachable("This pass cannot be directly constructed");
47  }
48
49  BasicTTI(const TargetMachine *TM) : ImmutablePass(ID), TM(TM) {
50    initializeBasicTTIPass(*PassRegistry::getPassRegistry());
51  }
52
53  void initializePass() override {
54    pushTTIStack(this);
55  }
56
57  void getAnalysisUsage(AnalysisUsage &AU) const override {
58    TargetTransformInfo::getAnalysisUsage(AU);
59  }
60
61  /// Pass identification.
62  static char ID;
63
64  /// Provide necessary pointer adjustments for the two base classes.
65  void *getAdjustedAnalysisPointer(const void *ID) override {
66    if (ID == &TargetTransformInfo::ID)
67      return (TargetTransformInfo*)this;
68    return this;
69  }
70
71  bool hasBranchDivergence() const override;
72
73  /// \name Scalar TTI Implementations
74  /// @{
75
76  bool isLegalAddImmediate(int64_t imm) const override;
77  bool isLegalICmpImmediate(int64_t imm) const override;
78  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
79                             int64_t BaseOffset, bool HasBaseReg,
80                             int64_t Scale) const override;
81  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
82                           int64_t BaseOffset, bool HasBaseReg,
83                           int64_t Scale) const override;
84  bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
85  bool isTypeLegal(Type *Ty) const override;
86  unsigned getJumpBufAlignment() const override;
87  unsigned getJumpBufSize() const override;
88  bool shouldBuildLookupTables() const override;
89  bool haveFastSqrt(Type *Ty) const override;
90  void getUnrollingPreferences(Loop *L,
91                               UnrollingPreferences &UP) const override;
92
93  /// @}
94
95  /// \name Vector TTI Implementations
96  /// @{
97
98  unsigned getNumberOfRegisters(bool Vector) const override;
99  unsigned getMaximumUnrollFactor() const override;
100  unsigned getRegisterBitWidth(bool Vector) const override;
101  unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
102                                  OperandValueKind) const override;
103  unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
104                          int Index, Type *SubTp) const override;
105  unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
106                            Type *Src) const override;
107  unsigned getCFInstrCost(unsigned Opcode) const override;
108  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
109                              Type *CondTy) const override;
110  unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
111                              unsigned Index) const override;
112  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
113                           unsigned AddressSpace) const override;
114  unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy,
115                                 ArrayRef<Type*> Tys) const override;
116  unsigned getNumberOfParts(Type *Tp) const override;
117  unsigned getAddressComputationCost( Type *Ty, bool IsComplex) const override;
118  unsigned getReductionCost(unsigned Opcode, Type *Ty,
119                            bool IsPairwise) const override;
120
121  /// @}
122};
123
124}
125
126INITIALIZE_AG_PASS(BasicTTI, TargetTransformInfo, "basictti",
127                   "Target independent code generator's TTI", true, true, false)
128char BasicTTI::ID = 0;
129
130ImmutablePass *
131llvm::createBasicTargetTransformInfoPass(const TargetMachine *TM) {
132  return new BasicTTI(TM);
133}
134
135bool BasicTTI::hasBranchDivergence() const { return false; }
136
137bool BasicTTI::isLegalAddImmediate(int64_t imm) const {
138  return getTLI()->isLegalAddImmediate(imm);
139}
140
141bool BasicTTI::isLegalICmpImmediate(int64_t imm) const {
142  return getTLI()->isLegalICmpImmediate(imm);
143}
144
145bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
146                                     int64_t BaseOffset, bool HasBaseReg,
147                                     int64_t Scale) const {
148  TargetLoweringBase::AddrMode AM;
149  AM.BaseGV = BaseGV;
150  AM.BaseOffs = BaseOffset;
151  AM.HasBaseReg = HasBaseReg;
152  AM.Scale = Scale;
153  return getTLI()->isLegalAddressingMode(AM, Ty);
154}
155
156int BasicTTI::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
157                                   int64_t BaseOffset, bool HasBaseReg,
158                                   int64_t Scale) const {
159  TargetLoweringBase::AddrMode AM;
160  AM.BaseGV = BaseGV;
161  AM.BaseOffs = BaseOffset;
162  AM.HasBaseReg = HasBaseReg;
163  AM.Scale = Scale;
164  return getTLI()->getScalingFactorCost(AM, Ty);
165}
166
167bool BasicTTI::isTruncateFree(Type *Ty1, Type *Ty2) const {
168  return getTLI()->isTruncateFree(Ty1, Ty2);
169}
170
171bool BasicTTI::isTypeLegal(Type *Ty) const {
172  EVT T = getTLI()->getValueType(Ty);
173  return getTLI()->isTypeLegal(T);
174}
175
176unsigned BasicTTI::getJumpBufAlignment() const {
177  return getTLI()->getJumpBufAlignment();
178}
179
180unsigned BasicTTI::getJumpBufSize() const {
181  return getTLI()->getJumpBufSize();
182}
183
184bool BasicTTI::shouldBuildLookupTables() const {
185  const TargetLoweringBase *TLI = getTLI();
186  return TLI->supportJumpTables() &&
187      (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
188       TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
189}
190
191bool BasicTTI::haveFastSqrt(Type *Ty) const {
192  const TargetLoweringBase *TLI = getTLI();
193  EVT VT = TLI->getValueType(Ty);
194  return TLI->isTypeLegal(VT) && TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
195}
196
197void BasicTTI::getUnrollingPreferences(Loop *L,
198                                       UnrollingPreferences &UP) const {
199  // This unrolling functionality is target independent, but to provide some
200  // motivation for its intended use, for x86:
201
202  // According to the Intel 64 and IA-32 Architectures Optimization Reference
203  // Manual, Intel Core models and later have a loop stream detector
204  // (and associated uop queue) that can benefit from partial unrolling.
205  // The relevant requirements are:
206  //  - The loop must have no more than 4 (8 for Nehalem and later) branches
207  //    taken, and none of them may be calls.
208  //  - The loop can have no more than 18 (28 for Nehalem and later) uops.
209
210  // According to the Software Optimization Guide for AMD Family 15h Processors,
211  // models 30h-4fh (Steamroller and later) have a loop predictor and loop
212  // buffer which can benefit from partial unrolling.
213  // The relevant requirements are:
214  //  - The loop must have fewer than 16 branches
215  //  - The loop must have less than 40 uops in all executed loop branches
216
217  // The number of taken branches in a loop is hard to estimate here, and
218  // benchmarking has revealed that it is better not to be conservative when
219  // estimating the branch count. As a result, we'll ignore the branch limits
220  // until someone finds a case where it matters in practice.
221
222  unsigned MaxOps;
223  const TargetSubtargetInfo *ST = &TM->getSubtarget<TargetSubtargetInfo>();
224  if (PartialUnrollingThreshold.getNumOccurrences() > 0)
225    MaxOps = PartialUnrollingThreshold;
226  else if (ST->getSchedModel()->LoopMicroOpBufferSize > 0)
227    MaxOps = ST->getSchedModel()->LoopMicroOpBufferSize;
228  else
229    return;
230
231  // Scan the loop: don't unroll loops with calls.
232  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
233       I != E; ++I) {
234    BasicBlock *BB = *I;
235
236    for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
237      if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
238        ImmutableCallSite CS(J);
239        if (const Function *F = CS.getCalledFunction()) {
240          if (!TopTTI->isLoweredToCall(F))
241            continue;
242        }
243
244        return;
245      }
246  }
247
248  // Enable runtime and partial unrolling up to the specified size.
249  UP.Partial = UP.Runtime = true;
250  UP.PartialThreshold = UP.PartialOptSizeThreshold = MaxOps;
251}
252
253//===----------------------------------------------------------------------===//
254//
255// Calls used by the vectorizers.
256//
257//===----------------------------------------------------------------------===//
258
259unsigned BasicTTI::getScalarizationOverhead(Type *Ty, bool Insert,
260                                            bool Extract) const {
261  assert (Ty->isVectorTy() && "Can only scalarize vectors");
262  unsigned Cost = 0;
263
264  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
265    if (Insert)
266      Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
267    if (Extract)
268      Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
269  }
270
271  return Cost;
272}
273
274unsigned BasicTTI::getNumberOfRegisters(bool Vector) const {
275  return 1;
276}
277
278unsigned BasicTTI::getRegisterBitWidth(bool Vector) const {
279  return 32;
280}
281
282unsigned BasicTTI::getMaximumUnrollFactor() const {
283  return 1;
284}
285
286unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
287                                          OperandValueKind,
288                                          OperandValueKind) const {
289  // Check if any of the operands are vector operands.
290  const TargetLoweringBase *TLI = getTLI();
291  int ISD = TLI->InstructionOpcodeToISD(Opcode);
292  assert(ISD && "Invalid opcode");
293
294  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
295
296  bool IsFloat = Ty->getScalarType()->isFloatingPointTy();
297  // Assume that floating point arithmetic operations cost twice as much as
298  // integer operations.
299  unsigned OpCost = (IsFloat ? 2 : 1);
300
301  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
302    // The operation is legal. Assume it costs 1.
303    // If the type is split to multiple registers, assume that there is some
304    // overhead to this.
305    // TODO: Once we have extract/insert subvector cost we need to use them.
306    if (LT.first > 1)
307      return LT.first * 2 * OpCost;
308    return LT.first * 1 * OpCost;
309  }
310
311  if (!TLI->isOperationExpand(ISD, LT.second)) {
312    // If the operation is custom lowered then assume
313    // thare the code is twice as expensive.
314    return LT.first * 2 * OpCost;
315  }
316
317  // Else, assume that we need to scalarize this op.
318  if (Ty->isVectorTy()) {
319    unsigned Num = Ty->getVectorNumElements();
320    unsigned Cost = TopTTI->getArithmeticInstrCost(Opcode, Ty->getScalarType());
321    // return the cost of multiple scalar invocation plus the cost of inserting
322    // and extracting the values.
323    return getScalarizationOverhead(Ty, true, true) + Num * Cost;
324  }
325
326  // We don't know anything about this scalar instruction.
327  return OpCost;
328}
329
330unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
331                                  Type *SubTp) const {
332  return 1;
333}
334
335unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
336                                    Type *Src) const {
337  const TargetLoweringBase *TLI = getTLI();
338  int ISD = TLI->InstructionOpcodeToISD(Opcode);
339  assert(ISD && "Invalid opcode");
340
341  std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(Src);
342  std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(Dst);
343
344  // Check for NOOP conversions.
345  if (SrcLT.first == DstLT.first &&
346      SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
347
348      // Bitcast between types that are legalized to the same type are free.
349      if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
350        return 0;
351  }
352
353  if (Opcode == Instruction::Trunc &&
354      TLI->isTruncateFree(SrcLT.second, DstLT.second))
355    return 0;
356
357  if (Opcode == Instruction::ZExt &&
358      TLI->isZExtFree(SrcLT.second, DstLT.second))
359    return 0;
360
361  // If the cast is marked as legal (or promote) then assume low cost.
362  if (SrcLT.first == DstLT.first &&
363      TLI->isOperationLegalOrPromote(ISD, DstLT.second))
364    return 1;
365
366  // Handle scalar conversions.
367  if (!Src->isVectorTy() && !Dst->isVectorTy()) {
368
369    // Scalar bitcasts are usually free.
370    if (Opcode == Instruction::BitCast)
371      return 0;
372
373    // Just check the op cost. If the operation is legal then assume it costs 1.
374    if (!TLI->isOperationExpand(ISD, DstLT.second))
375      return  1;
376
377    // Assume that illegal scalar instruction are expensive.
378    return 4;
379  }
380
381  // Check vector-to-vector casts.
382  if (Dst->isVectorTy() && Src->isVectorTy()) {
383
384    // If the cast is between same-sized registers, then the check is simple.
385    if (SrcLT.first == DstLT.first &&
386        SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
387
388      // Assume that Zext is done using AND.
389      if (Opcode == Instruction::ZExt)
390        return 1;
391
392      // Assume that sext is done using SHL and SRA.
393      if (Opcode == Instruction::SExt)
394        return 2;
395
396      // Just check the op cost. If the operation is legal then assume it costs
397      // 1 and multiply by the type-legalization overhead.
398      if (!TLI->isOperationExpand(ISD, DstLT.second))
399        return SrcLT.first * 1;
400    }
401
402    // If we are converting vectors and the operation is illegal, or
403    // if the vectors are legalized to different types, estimate the
404    // scalarization costs.
405    unsigned Num = Dst->getVectorNumElements();
406    unsigned Cost = TopTTI->getCastInstrCost(Opcode, Dst->getScalarType(),
407                                             Src->getScalarType());
408
409    // Return the cost of multiple scalar invocation plus the cost of
410    // inserting and extracting the values.
411    return getScalarizationOverhead(Dst, true, true) + Num * Cost;
412  }
413
414  // We already handled vector-to-vector and scalar-to-scalar conversions. This
415  // is where we handle bitcast between vectors and scalars. We need to assume
416  //  that the conversion is scalarized in one way or another.
417  if (Opcode == Instruction::BitCast)
418    // Illegal bitcasts are done by storing and loading from a stack slot.
419    return (Src->isVectorTy()? getScalarizationOverhead(Src, false, true):0) +
420           (Dst->isVectorTy()? getScalarizationOverhead(Dst, true, false):0);
421
422  llvm_unreachable("Unhandled cast");
423 }
424
425unsigned BasicTTI::getCFInstrCost(unsigned Opcode) const {
426  // Branches are assumed to be predicted.
427  return 0;
428}
429
430unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
431                                      Type *CondTy) const {
432  const TargetLoweringBase *TLI = getTLI();
433  int ISD = TLI->InstructionOpcodeToISD(Opcode);
434  assert(ISD && "Invalid opcode");
435
436  // Selects on vectors are actually vector selects.
437  if (ISD == ISD::SELECT) {
438    assert(CondTy && "CondTy must exist");
439    if (CondTy->isVectorTy())
440      ISD = ISD::VSELECT;
441  }
442
443  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
444
445  if (!TLI->isOperationExpand(ISD, LT.second)) {
446    // The operation is legal. Assume it costs 1. Multiply
447    // by the type-legalization overhead.
448    return LT.first * 1;
449  }
450
451  // Otherwise, assume that the cast is scalarized.
452  if (ValTy->isVectorTy()) {
453    unsigned Num = ValTy->getVectorNumElements();
454    if (CondTy)
455      CondTy = CondTy->getScalarType();
456    unsigned Cost = TopTTI->getCmpSelInstrCost(Opcode, ValTy->getScalarType(),
457                                               CondTy);
458
459    // Return the cost of multiple scalar invocation plus the cost of inserting
460    // and extracting the values.
461    return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
462  }
463
464  // Unknown scalar opcode.
465  return 1;
466}
467
468unsigned BasicTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
469                                      unsigned Index) const {
470  std::pair<unsigned, MVT> LT =  getTLI()->getTypeLegalizationCost(Val->getScalarType());
471
472  return LT.first;
473}
474
475unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src,
476                                   unsigned Alignment,
477                                   unsigned AddressSpace) const {
478  assert(!Src->isVoidTy() && "Invalid type");
479  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Src);
480
481  // Assuming that all loads of legal types cost 1.
482  unsigned Cost = LT.first;
483
484  if (Src->isVectorTy() &&
485      Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
486    // This is a vector load that legalizes to a larger type than the vector
487    // itself. Unless the corresponding extending load or truncating store is
488    // legal, then this will scalarize.
489    TargetLowering::LegalizeAction LA = TargetLowering::Expand;
490    EVT MemVT = getTLI()->getValueType(Src, true);
491    if (MemVT.isSimple() && MemVT != MVT::Other) {
492      if (Opcode == Instruction::Store)
493        LA = getTLI()->getTruncStoreAction(LT.second, MemVT.getSimpleVT());
494      else
495        LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, MemVT.getSimpleVT());
496    }
497
498    if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
499      // This is a vector load/store for some illegal type that is scalarized.
500      // We must account for the cost of building or decomposing the vector.
501      Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
502                                            Opcode == Instruction::Store);
503    }
504  }
505
506  return Cost;
507}
508
509unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
510                                         ArrayRef<Type *> Tys) const {
511  unsigned ISD = 0;
512  switch (IID) {
513  default: {
514    // Assume that we need to scalarize this intrinsic.
515    unsigned ScalarizationCost = 0;
516    unsigned ScalarCalls = 1;
517    if (RetTy->isVectorTy()) {
518      ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
519      ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
520    }
521    for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
522      if (Tys[i]->isVectorTy()) {
523        ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
524        ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
525      }
526    }
527
528    return ScalarCalls + ScalarizationCost;
529  }
530  // Look for intrinsics that can be lowered directly or turned into a scalar
531  // intrinsic call.
532  case Intrinsic::sqrt:    ISD = ISD::FSQRT;  break;
533  case Intrinsic::sin:     ISD = ISD::FSIN;   break;
534  case Intrinsic::cos:     ISD = ISD::FCOS;   break;
535  case Intrinsic::exp:     ISD = ISD::FEXP;   break;
536  case Intrinsic::exp2:    ISD = ISD::FEXP2;  break;
537  case Intrinsic::log:     ISD = ISD::FLOG;   break;
538  case Intrinsic::log10:   ISD = ISD::FLOG10; break;
539  case Intrinsic::log2:    ISD = ISD::FLOG2;  break;
540  case Intrinsic::fabs:    ISD = ISD::FABS;   break;
541  case Intrinsic::copysign: ISD = ISD::FCOPYSIGN; break;
542  case Intrinsic::floor:   ISD = ISD::FFLOOR; break;
543  case Intrinsic::ceil:    ISD = ISD::FCEIL;  break;
544  case Intrinsic::trunc:   ISD = ISD::FTRUNC; break;
545  case Intrinsic::nearbyint:
546                           ISD = ISD::FNEARBYINT; break;
547  case Intrinsic::rint:    ISD = ISD::FRINT;  break;
548  case Intrinsic::round:   ISD = ISD::FROUND; break;
549  case Intrinsic::pow:     ISD = ISD::FPOW;   break;
550  case Intrinsic::fma:     ISD = ISD::FMA;    break;
551  case Intrinsic::fmuladd: ISD = ISD::FMA;    break;
552  case Intrinsic::lifetime_start:
553  case Intrinsic::lifetime_end:
554    return 0;
555  }
556
557  const TargetLoweringBase *TLI = getTLI();
558  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy);
559
560  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
561    // The operation is legal. Assume it costs 1.
562    // If the type is split to multiple registers, assume that thre is some
563    // overhead to this.
564    // TODO: Once we have extract/insert subvector cost we need to use them.
565    if (LT.first > 1)
566      return LT.first * 2;
567    return LT.first * 1;
568  }
569
570  if (!TLI->isOperationExpand(ISD, LT.second)) {
571    // If the operation is custom lowered then assume
572    // thare the code is twice as expensive.
573    return LT.first * 2;
574  }
575
576  // If we can't lower fmuladd into an FMA estimate the cost as a floating
577  // point mul followed by an add.
578  if (IID == Intrinsic::fmuladd)
579    return TopTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
580           TopTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
581
582  // Else, assume that we need to scalarize this intrinsic. For math builtins
583  // this will emit a costly libcall, adding call overhead and spills. Make it
584  // very expensive.
585  if (RetTy->isVectorTy()) {
586    unsigned Num = RetTy->getVectorNumElements();
587    unsigned Cost = TopTTI->getIntrinsicInstrCost(IID, RetTy->getScalarType(),
588                                                  Tys);
589    return 10 * Cost * Num;
590  }
591
592  // This is going to be turned into a library call, make it expensive.
593  return 10;
594}
595
596unsigned BasicTTI::getNumberOfParts(Type *Tp) const {
597  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Tp);
598  return LT.first;
599}
600
601unsigned BasicTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
602  return 0;
603}
604
605unsigned BasicTTI::getReductionCost(unsigned Opcode, Type *Ty,
606                                    bool IsPairwise) const {
607  assert(Ty->isVectorTy() && "Expect a vector type");
608  unsigned NumVecElts = Ty->getVectorNumElements();
609  unsigned NumReduxLevels = Log2_32(NumVecElts);
610  unsigned ArithCost = NumReduxLevels *
611    TopTTI->getArithmeticInstrCost(Opcode, Ty);
612  // Assume the pairwise shuffles add a cost.
613  unsigned ShuffleCost =
614      NumReduxLevels * (IsPairwise + 1) *
615      TopTTI->getShuffleCost(SK_ExtractSubvector, Ty, NumVecElts / 2, Ty);
616  return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true);
617}
618