1//===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9/// \file
10/// This pass exposes codegen information to IR-level passes. Every
11/// transformation that uses codegen information is broken into three parts:
12/// 1. The IR-level analysis pass.
13/// 2. The IR-level transformation interface which provides the needed
14///    information.
15/// 3. Codegen-level implementation which uses target-specific hooks.
16///
17/// This file defines #2, which is the interface that IR-level transformations
18/// use for querying the codegen.
19///
20//===----------------------------------------------------------------------===//
21
22#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
24
25#include "llvm/ADT/Optional.h"
26#include "llvm/IR/IntrinsicInst.h"
27#include "llvm/IR/Intrinsics.h"
28#include "llvm/IR/Operator.h"
29#include "llvm/IR/PassManager.h"
30#include "llvm/Pass.h"
31#include "llvm/Support/DataTypes.h"
32#include <functional>
33
34namespace llvm {
35
36class Function;
37class GlobalValue;
38class Loop;
39class Type;
40class User;
41class Value;
42
43/// \brief Information about a load/store intrinsic defined by the target.
44struct MemIntrinsicInfo {
45  MemIntrinsicInfo()
46      : ReadMem(false), WriteMem(false), IsSimple(false), MatchingId(0),
47        NumMemRefs(0), PtrVal(nullptr) {}
48  bool ReadMem;
49  bool WriteMem;
50  /// True only if this memory operation is non-volatile, non-atomic, and
51  /// unordered.  (See LoadInst/StoreInst for details on each)
52  bool IsSimple;
53  // Same Id is set by the target for corresponding load/store intrinsics.
54  unsigned short MatchingId;
55  int NumMemRefs;
56  Value *PtrVal;
57};
58
59/// \brief This pass provides access to the codegen interfaces that are needed
60/// for IR-level transformations.
61class TargetTransformInfo {
62public:
63  /// \brief Construct a TTI object using a type implementing the \c Concept
64  /// API below.
65  ///
66  /// This is used by targets to construct a TTI wrapping their target-specific
67  /// implementaion that encodes appropriate costs for their target.
68  template <typename T> TargetTransformInfo(T Impl);
69
70  /// \brief Construct a baseline TTI object using a minimal implementation of
71  /// the \c Concept API below.
72  ///
73  /// The TTI implementation will reflect the information in the DataLayout
74  /// provided if non-null.
75  explicit TargetTransformInfo(const DataLayout &DL);
76
77  // Provide move semantics.
78  TargetTransformInfo(TargetTransformInfo &&Arg);
79  TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
80
81  // We need to define the destructor out-of-line to define our sub-classes
82  // out-of-line.
83  ~TargetTransformInfo();
84
85  /// \brief Handle the invalidation of this information.
86  ///
87  /// When used as a result of \c TargetIRAnalysis this method will be called
88  /// when the function this was computed for changes. When it returns false,
89  /// the information is preserved across those changes.
90  bool invalidate(Function &, const PreservedAnalyses &) {
91    // FIXME: We should probably in some way ensure that the subtarget
92    // information for a function hasn't changed.
93    return false;
94  }
95
96  /// \name Generic Target Information
97  /// @{
98
99  /// \brief Underlying constants for 'cost' values in this interface.
100  ///
101  /// Many APIs in this interface return a cost. This enum defines the
102  /// fundamental values that should be used to interpret (and produce) those
103  /// costs. The costs are returned as an int rather than a member of this
104  /// enumeration because it is expected that the cost of one IR instruction
105  /// may have a multiplicative factor to it or otherwise won't fit directly
106  /// into the enum. Moreover, it is common to sum or average costs which works
107  /// better as simple integral values. Thus this enum only provides constants.
108  /// Also note that the returned costs are signed integers to make it natural
109  /// to add, subtract, and test with zero (a common boundary condition). It is
110  /// not expected that 2^32 is a realistic cost to be modeling at any point.
111  ///
112  /// Note that these costs should usually reflect the intersection of code-size
113  /// cost and execution cost. A free instruction is typically one that folds
114  /// into another instruction. For example, reg-to-reg moves can often be
115  /// skipped by renaming the registers in the CPU, but they still are encoded
116  /// and thus wouldn't be considered 'free' here.
117  enum TargetCostConstants {
118    TCC_Free = 0,     ///< Expected to fold away in lowering.
119    TCC_Basic = 1,    ///< The cost of a typical 'add' instruction.
120    TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
121  };
122
123  /// \brief Estimate the cost of a specific operation when lowered.
124  ///
125  /// Note that this is designed to work on an arbitrary synthetic opcode, and
126  /// thus work for hypothetical queries before an instruction has even been
127  /// formed. However, this does *not* work for GEPs, and must not be called
128  /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
129  /// analyzing a GEP's cost required more information.
130  ///
131  /// Typically only the result type is required, and the operand type can be
132  /// omitted. However, if the opcode is one of the cast instructions, the
133  /// operand type is required.
134  ///
135  /// The returned cost is defined in terms of \c TargetCostConstants, see its
136  /// comments for a detailed explanation of the cost values.
137  int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const;
138
139  /// \brief Estimate the cost of a GEP operation when lowered.
140  ///
141  /// The contract for this function is the same as \c getOperationCost except
142  /// that it supports an interface that provides extra information specific to
143  /// the GEP operation.
144  int getGEPCost(Type *PointeeType, const Value *Ptr,
145                 ArrayRef<const Value *> Operands) const;
146
147  /// \brief Estimate the cost of a function call when lowered.
148  ///
149  /// The contract for this is the same as \c getOperationCost except that it
150  /// supports an interface that provides extra information specific to call
151  /// instructions.
152  ///
153  /// This is the most basic query for estimating call cost: it only knows the
154  /// function type and (potentially) the number of arguments at the call site.
155  /// The latter is only interesting for varargs function types.
156  int getCallCost(FunctionType *FTy, int NumArgs = -1) const;
157
158  /// \brief Estimate the cost of calling a specific function when lowered.
159  ///
160  /// This overload adds the ability to reason about the particular function
161  /// being called in the event it is a library call with special lowering.
162  int getCallCost(const Function *F, int NumArgs = -1) const;
163
164  /// \brief Estimate the cost of calling a specific function when lowered.
165  ///
166  /// This overload allows specifying a set of candidate argument values.
167  int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) const;
168
169  /// \returns A value by which our inlining threshold should be multiplied.
170  /// This is primarily used to bump up the inlining threshold wholesale on
171  /// targets where calls are unusually expensive.
172  ///
173  /// TODO: This is a rather blunt instrument.  Perhaps altering the costs of
174  /// individual classes of instructions would be better.
175  unsigned getInliningThresholdMultiplier() const;
176
177  /// \brief Estimate the cost of an intrinsic when lowered.
178  ///
179  /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
180  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
181                       ArrayRef<Type *> ParamTys) const;
182
183  /// \brief Estimate the cost of an intrinsic when lowered.
184  ///
185  /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
186  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
187                       ArrayRef<const Value *> Arguments) const;
188
189  /// \brief Estimate the cost of a given IR user when lowered.
190  ///
191  /// This can estimate the cost of either a ConstantExpr or Instruction when
192  /// lowered. It has two primary advantages over the \c getOperationCost and
193  /// \c getGEPCost above, and one significant disadvantage: it can only be
194  /// used when the IR construct has already been formed.
195  ///
196  /// The advantages are that it can inspect the SSA use graph to reason more
197  /// accurately about the cost. For example, all-constant-GEPs can often be
198  /// folded into a load or other instruction, but if they are used in some
199  /// other context they may not be folded. This routine can distinguish such
200  /// cases.
201  ///
202  /// The returned cost is defined in terms of \c TargetCostConstants, see its
203  /// comments for a detailed explanation of the cost values.
204  int getUserCost(const User *U) const;
205
206  /// \brief Return true if branch divergence exists.
207  ///
208  /// Branch divergence has a significantly negative impact on GPU performance
209  /// when threads in the same wavefront take different paths due to conditional
210  /// branches.
211  bool hasBranchDivergence() const;
212
213  /// \brief Returns whether V is a source of divergence.
214  ///
215  /// This function provides the target-dependent information for
216  /// the target-independent DivergenceAnalysis. DivergenceAnalysis first
217  /// builds the dependency graph, and then runs the reachability algorithm
218  /// starting with the sources of divergence.
219  bool isSourceOfDivergence(const Value *V) const;
220
221  /// \brief Test whether calls to a function lower to actual program function
222  /// calls.
223  ///
224  /// The idea is to test whether the program is likely to require a 'call'
225  /// instruction or equivalent in order to call the given function.
226  ///
227  /// FIXME: It's not clear that this is a good or useful query API. Client's
228  /// should probably move to simpler cost metrics using the above.
229  /// Alternatively, we could split the cost interface into distinct code-size
230  /// and execution-speed costs. This would allow modelling the core of this
231  /// query more accurately as a call is a single small instruction, but
232  /// incurs significant execution cost.
233  bool isLoweredToCall(const Function *F) const;
234
235  /// Parameters that control the generic loop unrolling transformation.
236  struct UnrollingPreferences {
237    /// The cost threshold for the unrolled loop. Should be relative to the
238    /// getUserCost values returned by this API, and the expectation is that
239    /// the unrolled loop's instructions when run through that interface should
240    /// not exceed this cost. However, this is only an estimate. Also, specific
241    /// loops may be unrolled even with a cost above this threshold if deemed
242    /// profitable. Set this to UINT_MAX to disable the loop body cost
243    /// restriction.
244    unsigned Threshold;
245    /// If complete unrolling will reduce the cost of the loop below its
246    /// expected dynamic cost while rolled by this percentage, apply a discount
247    /// (below) to its unrolled cost.
248    unsigned PercentDynamicCostSavedThreshold;
249    /// The discount applied to the unrolled cost when the *dynamic* cost
250    /// savings of unrolling exceed the \c PercentDynamicCostSavedThreshold.
251    unsigned DynamicCostSavingsDiscount;
252    /// The cost threshold for the unrolled loop when optimizing for size (set
253    /// to UINT_MAX to disable).
254    unsigned OptSizeThreshold;
255    /// The cost threshold for the unrolled loop, like Threshold, but used
256    /// for partial/runtime unrolling (set to UINT_MAX to disable).
257    unsigned PartialThreshold;
258    /// The cost threshold for the unrolled loop when optimizing for size, like
259    /// OptSizeThreshold, but used for partial/runtime unrolling (set to
260    /// UINT_MAX to disable).
261    unsigned PartialOptSizeThreshold;
262    /// A forced unrolling factor (the number of concatenated bodies of the
263    /// original loop in the unrolled loop body). When set to 0, the unrolling
264    /// transformation will select an unrolling factor based on the current cost
265    /// threshold and other factors.
266    unsigned Count;
267    // Set the maximum unrolling factor. The unrolling factor may be selected
268    // using the appropriate cost threshold, but may not exceed this number
269    // (set to UINT_MAX to disable). This does not apply in cases where the
270    // loop is being fully unrolled.
271    unsigned MaxCount;
272    /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
273    /// applies even if full unrolling is selected. This allows a target to fall
274    /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
275    unsigned FullUnrollMaxCount;
276    /// Allow partial unrolling (unrolling of loops to expand the size of the
277    /// loop body, not only to eliminate small constant-trip-count loops).
278    bool Partial;
279    /// Allow runtime unrolling (unrolling of loops to expand the size of the
280    /// loop body even when the number of loop iterations is not known at
281    /// compile time).
282    bool Runtime;
283    /// Allow generation of a loop remainder (extra iterations after unroll).
284    bool AllowRemainder;
285    /// Allow emitting expensive instructions (such as divisions) when computing
286    /// the trip count of a loop for runtime unrolling.
287    bool AllowExpensiveTripCount;
288    /// Apply loop unroll on any kind of loop
289    /// (mainly to loops that fail runtime unrolling).
290    bool Force;
291  };
292
293  /// \brief Get target-customized preferences for the generic loop unrolling
294  /// transformation. The caller will initialize UP with the current
295  /// target-independent defaults.
296  void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;
297
298  /// @}
299
300  /// \name Scalar Target Information
301  /// @{
302
303  /// \brief Flags indicating the kind of support for population count.
304  ///
305  /// Compared to the SW implementation, HW support is supposed to
306  /// significantly boost the performance when the population is dense, and it
307  /// may or may not degrade performance if the population is sparse. A HW
308  /// support is considered as "Fast" if it can outperform, or is on a par
309  /// with, SW implementation when the population is sparse; otherwise, it is
310  /// considered as "Slow".
311  enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
312
313  /// \brief Return true if the specified immediate is legal add immediate, that
314  /// is the target has add instructions which can add a register with the
315  /// immediate without having to materialize the immediate into a register.
316  bool isLegalAddImmediate(int64_t Imm) const;
317
318  /// \brief Return true if the specified immediate is legal icmp immediate,
319  /// that is the target has icmp instructions which can compare a register
320  /// against the immediate without having to materialize the immediate into a
321  /// register.
322  bool isLegalICmpImmediate(int64_t Imm) const;
323
324  /// \brief Return true if the addressing mode represented by AM is legal for
325  /// this target, for a load/store of the specified type.
326  /// The type may be VoidTy, in which case only return true if the addressing
327  /// mode is legal for a load/store of any legal type.
328  /// TODO: Handle pre/postinc as well.
329  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
330                             bool HasBaseReg, int64_t Scale,
331                             unsigned AddrSpace = 0) const;
332
333  /// \brief Return true if the target supports masked load/store
334  /// AVX2 and AVX-512 targets allow masks for consecutive load and store
335  bool isLegalMaskedStore(Type *DataType) const;
336  bool isLegalMaskedLoad(Type *DataType) const;
337
338  /// \brief Return true if the target supports masked gather/scatter
339  /// AVX-512 fully supports gather and scatter for vectors with 32 and 64
340  /// bits scalar type.
341  bool isLegalMaskedScatter(Type *DataType) const;
342  bool isLegalMaskedGather(Type *DataType) const;
343
344  /// \brief Return the cost of the scaling factor used in the addressing
345  /// mode represented by AM for this target, for a load/store
346  /// of the specified type.
347  /// If the AM is supported, the return value must be >= 0.
348  /// If the AM is not supported, it returns a negative value.
349  /// TODO: Handle pre/postinc as well.
350  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
351                           bool HasBaseReg, int64_t Scale,
352                           unsigned AddrSpace = 0) const;
353
354  /// \brief Return true if it's free to truncate a value of type Ty1 to type
355  /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
356  /// by referencing its sub-register AX.
357  bool isTruncateFree(Type *Ty1, Type *Ty2) const;
358
359  /// \brief Return true if it is profitable to hoist instruction in the
360  /// then/else to before if.
361  bool isProfitableToHoist(Instruction *I) const;
362
363  /// \brief Return true if this type is legal.
364  bool isTypeLegal(Type *Ty) const;
365
366  /// \brief Returns the target's jmp_buf alignment in bytes.
367  unsigned getJumpBufAlignment() const;
368
369  /// \brief Returns the target's jmp_buf size in bytes.
370  unsigned getJumpBufSize() const;
371
372  /// \brief Return true if switches should be turned into lookup tables for the
373  /// target.
374  bool shouldBuildLookupTables() const;
375
376  /// \brief Don't restrict interleaved unrolling to small loops.
377  bool enableAggressiveInterleaving(bool LoopHasReductions) const;
378
379  /// \brief Enable matching of interleaved access groups.
380  bool enableInterleavedAccessVectorization() const;
381
382  /// \brief Indicate that it is potentially unsafe to automatically vectorize
383  /// floating-point operations because the semantics of vector and scalar
384  /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
385  /// does not support IEEE-754 denormal numbers, while depending on the
386  /// platform, scalar floating-point math does.
387  /// This applies to floating-point math operations and calls, not memory
388  /// operations, shuffles, or casts.
389  bool isFPVectorizationPotentiallyUnsafe() const;
390
391  /// \brief Determine if the target supports unaligned memory accesses.
392  bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace = 0,
393                                      unsigned Alignment = 1,
394                                      bool *Fast = nullptr) const;
395
396  /// \brief Return hardware support for population count.
397  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
398
399  /// \brief Return true if the hardware has a fast square-root instruction.
400  bool haveFastSqrt(Type *Ty) const;
401
402  /// \brief Return the expected cost of supporting the floating point operation
403  /// of the specified type.
404  int getFPOpCost(Type *Ty) const;
405
406  /// \brief Return the expected cost of materializing for the given integer
407  /// immediate of the specified type.
408  int getIntImmCost(const APInt &Imm, Type *Ty) const;
409
410  /// \brief Return the expected cost of materialization for the given integer
411  /// immediate of the specified type for a given instruction. The cost can be
412  /// zero if the immediate can be folded into the specified instruction.
413  int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
414                    Type *Ty) const;
415  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
416                    Type *Ty) const;
417
418  /// \brief Return the expected cost for the given integer when optimising
419  /// for size. This is different than the other integer immediate cost
420  /// functions in that it is subtarget agnostic. This is useful when you e.g.
421  /// target one ISA such as Aarch32 but smaller encodings could be possible
422  /// with another such as Thumb. This return value is used as a penalty when
423  /// the total costs for a constant is calculated (the bigger the cost, the
424  /// more beneficial constant hoisting is).
425  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
426                            Type *Ty) const;
427  /// @}
428
429  /// \name Vector Target Information
430  /// @{
431
432  /// \brief The various kinds of shuffle patterns for vector queries.
433  enum ShuffleKind {
434    SK_Broadcast,       ///< Broadcast element 0 to all other elements.
435    SK_Reverse,         ///< Reverse the order of the vector.
436    SK_Alternate,       ///< Choose alternate elements from vector.
437    SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
438    SK_ExtractSubvector ///< ExtractSubvector Index indicates start offset.
439  };
440
441  /// \brief Additional information about an operand's possible values.
442  enum OperandValueKind {
443    OK_AnyValue,               // Operand can have any value.
444    OK_UniformValue,           // Operand is uniform (splat of a value).
445    OK_UniformConstantValue,   // Operand is uniform constant.
446    OK_NonUniformConstantValue // Operand is a non uniform constant value.
447  };
448
449  /// \brief Additional properties of an operand's values.
450  enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
451
452  /// \return The number of scalar or vector registers that the target has.
453  /// If 'Vectors' is true, it returns the number of vector registers. If it is
454  /// set to false, it returns the number of scalar registers.
455  unsigned getNumberOfRegisters(bool Vector) const;
456
457  /// \return The width of the largest scalar or vector register type.
458  unsigned getRegisterBitWidth(bool Vector) const;
459
460  /// \return The bitwidth of the largest vector type that should be used to
461  /// load/store in the given address space.
462  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
463
464  /// \return The size of a cache line in bytes.
465  unsigned getCacheLineSize() const;
466
467  /// \return How much before a load we should place the prefetch instruction.
468  /// This is currently measured in number of instructions.
469  unsigned getPrefetchDistance() const;
470
471  /// \return Some HW prefetchers can handle accesses up to a certain constant
472  /// stride.  This is the minimum stride in bytes where it makes sense to start
473  /// adding SW prefetches.  The default is 1, i.e. prefetch with any stride.
474  unsigned getMinPrefetchStride() const;
475
476  /// \return The maximum number of iterations to prefetch ahead.  If the
477  /// required number of iterations is more than this number, no prefetching is
478  /// performed.
479  unsigned getMaxPrefetchIterationsAhead() const;
480
481  /// \return The maximum interleave factor that any transform should try to
482  /// perform for this target. This number depends on the level of parallelism
483  /// and the number of execution units in the CPU.
484  unsigned getMaxInterleaveFactor(unsigned VF) const;
485
486  /// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc.
487  int getArithmeticInstrCost(
488      unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
489      OperandValueKind Opd2Info = OK_AnyValue,
490      OperandValueProperties Opd1PropInfo = OP_None,
491      OperandValueProperties Opd2PropInfo = OP_None) const;
492
493  /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
494  /// The index and subtype parameters are used by the subvector insertion and
495  /// extraction shuffle kinds.
496  int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
497                     Type *SubTp = nullptr) const;
498
499  /// \return The expected cost of cast instructions, such as bitcast, trunc,
500  /// zext, etc.
501  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const;
502
503  /// \return The expected cost of a sign- or zero-extended vector extract. Use
504  /// -1 to indicate that there is no information about the index value.
505  int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
506                               unsigned Index = -1) const;
507
508  /// \return The expected cost of control-flow related instructions such as
509  /// Phi, Ret, Br.
510  int getCFInstrCost(unsigned Opcode) const;
511
512  /// \returns The expected cost of compare and select instructions.
513  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
514                         Type *CondTy = nullptr) const;
515
516  /// \return The expected cost of vector Insert and Extract.
517  /// Use -1 to indicate that there is no information on the index value.
518  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
519
520  /// \return The cost of Load and Store instructions.
521  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
522                      unsigned AddressSpace) const;
523
524  /// \return The cost of masked Load and Store instructions.
525  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
526                            unsigned AddressSpace) const;
527
528  /// \return The cost of Gather or Scatter operation
529  /// \p Opcode - is a type of memory access Load or Store
530  /// \p DataTy - a vector type of the data to be loaded or stored
531  /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
532  /// \p VariableMask - true when the memory access is predicated with a mask
533  ///                   that is not a compile-time constant
534  /// \p Alignment - alignment of single element
535  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
536                             bool VariableMask, unsigned Alignment) const;
537
538  /// \return The cost of the interleaved memory operation.
539  /// \p Opcode is the memory operation code
540  /// \p VecTy is the vector type of the interleaved access.
541  /// \p Factor is the interleave factor
542  /// \p Indices is the indices for interleaved load members (as interleaved
543  ///    load allows gaps)
544  /// \p Alignment is the alignment of the memory operation
545  /// \p AddressSpace is address space of the pointer.
546  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
547                                 ArrayRef<unsigned> Indices, unsigned Alignment,
548                                 unsigned AddressSpace) const;
549
550  /// \brief Calculate the cost of performing a vector reduction.
551  ///
552  /// This is the cost of reducing the vector value of type \p Ty to a scalar
553  /// value using the operation denoted by \p Opcode. The form of the reduction
554  /// can either be a pairwise reduction or a reduction that splits the vector
555  /// at every reduction level.
556  ///
557  /// Pairwise:
558  ///  (v0, v1, v2, v3)
559  ///  ((v0+v1), (v2, v3), undef, undef)
560  /// Split:
561  ///  (v0, v1, v2, v3)
562  ///  ((v0+v2), (v1+v3), undef, undef)
563  int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const;
564
565  /// \returns The cost of Intrinsic instructions. Types analysis only.
566  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
567                            ArrayRef<Type *> Tys, FastMathFlags FMF) const;
568
569  /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
570  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
571                            ArrayRef<Value *> Args, FastMathFlags FMF) const;
572
573  /// \returns The cost of Call instructions.
574  int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
575
576  /// \returns The number of pieces into which the provided type must be
577  /// split during legalization. Zero is returned when the answer is unknown.
578  unsigned getNumberOfParts(Type *Tp) const;
579
580  /// \returns The cost of the address computation. For most targets this can be
581  /// merged into the instruction indexing mode. Some targets might want to
582  /// distinguish between address computation for memory operations on vector
583  /// types and scalar types. Such targets should override this function.
584  /// The 'IsComplex' parameter is a hint that the address computation is likely
585  /// to involve multiple instructions and as such unlikely to be merged into
586  /// the address indexing mode.
587  int getAddressComputationCost(Type *Ty, bool IsComplex = false) const;
588
589  /// \returns The cost, if any, of keeping values of the given types alive
590  /// over a callsite.
591  ///
592  /// Some types may require the use of register classes that do not have
593  /// any callee-saved registers, so would require a spill and fill.
594  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
595
596  /// \returns True if the intrinsic is a supported memory intrinsic.  Info
597  /// will contain additional information - whether the intrinsic may write
598  /// or read to memory, volatility and the pointer.  Info is undefined
599  /// if false is returned.
600  bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
601
602  /// \returns A value which is the result of the given memory intrinsic.  New
603  /// instructions may be created to extract the result from the given intrinsic
604  /// memory operation.  Returns nullptr if the target cannot create a result
605  /// from the given intrinsic.
606  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
607                                           Type *ExpectedType) const;
608
609  /// \returns True if the two functions have compatible attributes for inlining
610  /// purposes.
611  bool areInlineCompatible(const Function *Caller,
612                           const Function *Callee) const;
613
614  /// @}
615
616private:
617  /// \brief The abstract base class used to type erase specific TTI
618  /// implementations.
619  class Concept;
620
621  /// \brief The template model for the base class which wraps a concrete
622  /// implementation in a type erased interface.
623  template <typename T> class Model;
624
625  std::unique_ptr<Concept> TTIImpl;
626};
627
628class TargetTransformInfo::Concept {
629public:
630  virtual ~Concept() = 0;
631  virtual const DataLayout &getDataLayout() const = 0;
632  virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
633  virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
634                         ArrayRef<const Value *> Operands) = 0;
635  virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0;
636  virtual int getCallCost(const Function *F, int NumArgs) = 0;
637  virtual int getCallCost(const Function *F,
638                          ArrayRef<const Value *> Arguments) = 0;
639  virtual unsigned getInliningThresholdMultiplier() = 0;
640  virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
641                               ArrayRef<Type *> ParamTys) = 0;
642  virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
643                               ArrayRef<const Value *> Arguments) = 0;
644  virtual int getUserCost(const User *U) = 0;
645  virtual bool hasBranchDivergence() = 0;
646  virtual bool isSourceOfDivergence(const Value *V) = 0;
647  virtual bool isLoweredToCall(const Function *F) = 0;
648  virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) = 0;
649  virtual bool isLegalAddImmediate(int64_t Imm) = 0;
650  virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
651  virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
652                                     int64_t BaseOffset, bool HasBaseReg,
653                                     int64_t Scale,
654                                     unsigned AddrSpace) = 0;
655  virtual bool isLegalMaskedStore(Type *DataType) = 0;
656  virtual bool isLegalMaskedLoad(Type *DataType) = 0;
657  virtual bool isLegalMaskedScatter(Type *DataType) = 0;
658  virtual bool isLegalMaskedGather(Type *DataType) = 0;
659  virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
660                                   int64_t BaseOffset, bool HasBaseReg,
661                                   int64_t Scale, unsigned AddrSpace) = 0;
662  virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
663  virtual bool isProfitableToHoist(Instruction *I) = 0;
664  virtual bool isTypeLegal(Type *Ty) = 0;
665  virtual unsigned getJumpBufAlignment() = 0;
666  virtual unsigned getJumpBufSize() = 0;
667  virtual bool shouldBuildLookupTables() = 0;
668  virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
669  virtual bool enableInterleavedAccessVectorization() = 0;
670  virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
671  virtual bool allowsMisalignedMemoryAccesses(unsigned BitWidth,
672                                              unsigned AddressSpace,
673                                              unsigned Alignment,
674                                              bool *Fast) = 0;
675  virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
676  virtual bool haveFastSqrt(Type *Ty) = 0;
677  virtual int getFPOpCost(Type *Ty) = 0;
678  virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
679                                    Type *Ty) = 0;
680  virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
681  virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
682                            Type *Ty) = 0;
683  virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
684                            Type *Ty) = 0;
685  virtual unsigned getNumberOfRegisters(bool Vector) = 0;
686  virtual unsigned getRegisterBitWidth(bool Vector) = 0;
687  virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) = 0;
688  virtual unsigned getCacheLineSize() = 0;
689  virtual unsigned getPrefetchDistance() = 0;
690  virtual unsigned getMinPrefetchStride() = 0;
691  virtual unsigned getMaxPrefetchIterationsAhead() = 0;
692  virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
693  virtual unsigned
694  getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
695                         OperandValueKind Opd2Info,
696                         OperandValueProperties Opd1PropInfo,
697                         OperandValueProperties Opd2PropInfo) = 0;
698  virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
699                             Type *SubTp) = 0;
700  virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) = 0;
701  virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
702                                       VectorType *VecTy, unsigned Index) = 0;
703  virtual int getCFInstrCost(unsigned Opcode) = 0;
704  virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
705                                 Type *CondTy) = 0;
706  virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
707                                 unsigned Index) = 0;
708  virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
709                              unsigned AddressSpace) = 0;
710  virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
711                                    unsigned Alignment,
712                                    unsigned AddressSpace) = 0;
713  virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
714                                     Value *Ptr, bool VariableMask,
715                                     unsigned Alignment) = 0;
716  virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
717                                         unsigned Factor,
718                                         ArrayRef<unsigned> Indices,
719                                         unsigned Alignment,
720                                         unsigned AddressSpace) = 0;
721  virtual int getReductionCost(unsigned Opcode, Type *Ty,
722                               bool IsPairwiseForm) = 0;
723  virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
724                                    ArrayRef<Type *> Tys,
725                                    FastMathFlags FMF) = 0;
726  virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
727                                    ArrayRef<Value *> Args,
728                                    FastMathFlags FMF) = 0;
729  virtual int getCallInstrCost(Function *F, Type *RetTy,
730                               ArrayRef<Type *> Tys) = 0;
731  virtual unsigned getNumberOfParts(Type *Tp) = 0;
732  virtual int getAddressComputationCost(Type *Ty, bool IsComplex) = 0;
733  virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
734  virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
735                                  MemIntrinsicInfo &Info) = 0;
736  virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
737                                                   Type *ExpectedType) = 0;
738  virtual bool areInlineCompatible(const Function *Caller,
739                                   const Function *Callee) const = 0;
740};
741
742template <typename T>
743class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
744  T Impl;
745
746public:
747  Model(T Impl) : Impl(std::move(Impl)) {}
748  ~Model() override {}
749
750  const DataLayout &getDataLayout() const override {
751    return Impl.getDataLayout();
752  }
753
754  int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
755    return Impl.getOperationCost(Opcode, Ty, OpTy);
756  }
757  int getGEPCost(Type *PointeeType, const Value *Ptr,
758                 ArrayRef<const Value *> Operands) override {
759    return Impl.getGEPCost(PointeeType, Ptr, Operands);
760  }
761  int getCallCost(FunctionType *FTy, int NumArgs) override {
762    return Impl.getCallCost(FTy, NumArgs);
763  }
764  int getCallCost(const Function *F, int NumArgs) override {
765    return Impl.getCallCost(F, NumArgs);
766  }
767  int getCallCost(const Function *F,
768                  ArrayRef<const Value *> Arguments) override {
769    return Impl.getCallCost(F, Arguments);
770  }
771  unsigned getInliningThresholdMultiplier() override {
772    return Impl.getInliningThresholdMultiplier();
773  }
774  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
775                       ArrayRef<Type *> ParamTys) override {
776    return Impl.getIntrinsicCost(IID, RetTy, ParamTys);
777  }
778  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
779                       ArrayRef<const Value *> Arguments) override {
780    return Impl.getIntrinsicCost(IID, RetTy, Arguments);
781  }
782  int getUserCost(const User *U) override { return Impl.getUserCost(U); }
783  bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
784  bool isSourceOfDivergence(const Value *V) override {
785    return Impl.isSourceOfDivergence(V);
786  }
787  bool isLoweredToCall(const Function *F) override {
788    return Impl.isLoweredToCall(F);
789  }
790  void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) override {
791    return Impl.getUnrollingPreferences(L, UP);
792  }
793  bool isLegalAddImmediate(int64_t Imm) override {
794    return Impl.isLegalAddImmediate(Imm);
795  }
796  bool isLegalICmpImmediate(int64_t Imm) override {
797    return Impl.isLegalICmpImmediate(Imm);
798  }
799  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
800                             bool HasBaseReg, int64_t Scale,
801                             unsigned AddrSpace) override {
802    return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
803                                      Scale, AddrSpace);
804  }
805  bool isLegalMaskedStore(Type *DataType) override {
806    return Impl.isLegalMaskedStore(DataType);
807  }
808  bool isLegalMaskedLoad(Type *DataType) override {
809    return Impl.isLegalMaskedLoad(DataType);
810  }
811  bool isLegalMaskedScatter(Type *DataType) override {
812    return Impl.isLegalMaskedScatter(DataType);
813  }
814  bool isLegalMaskedGather(Type *DataType) override {
815    return Impl.isLegalMaskedGather(DataType);
816  }
817  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
818                           bool HasBaseReg, int64_t Scale,
819                           unsigned AddrSpace) override {
820    return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
821                                     Scale, AddrSpace);
822  }
823  bool isTruncateFree(Type *Ty1, Type *Ty2) override {
824    return Impl.isTruncateFree(Ty1, Ty2);
825  }
826  bool isProfitableToHoist(Instruction *I) override {
827    return Impl.isProfitableToHoist(I);
828  }
829  bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
830  unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }
831  unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }
832  bool shouldBuildLookupTables() override {
833    return Impl.shouldBuildLookupTables();
834  }
835  bool enableAggressiveInterleaving(bool LoopHasReductions) override {
836    return Impl.enableAggressiveInterleaving(LoopHasReductions);
837  }
838  bool enableInterleavedAccessVectorization() override {
839    return Impl.enableInterleavedAccessVectorization();
840  }
841  bool isFPVectorizationPotentiallyUnsafe() override {
842    return Impl.isFPVectorizationPotentiallyUnsafe();
843  }
844  bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace,
845                                      unsigned Alignment, bool *Fast) override {
846    return Impl.allowsMisalignedMemoryAccesses(BitWidth, AddressSpace,
847                                               Alignment, Fast);
848  }
849  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
850    return Impl.getPopcntSupport(IntTyWidthInBit);
851  }
852  bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
853
854  int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
855
856  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
857                            Type *Ty) override {
858    return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
859  }
860  int getIntImmCost(const APInt &Imm, Type *Ty) override {
861    return Impl.getIntImmCost(Imm, Ty);
862  }
863  int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
864                    Type *Ty) override {
865    return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
866  }
867  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
868                    Type *Ty) override {
869    return Impl.getIntImmCost(IID, Idx, Imm, Ty);
870  }
871  unsigned getNumberOfRegisters(bool Vector) override {
872    return Impl.getNumberOfRegisters(Vector);
873  }
874  unsigned getRegisterBitWidth(bool Vector) override {
875    return Impl.getRegisterBitWidth(Vector);
876  }
877
878  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) override {
879    return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
880  }
881
882  unsigned getCacheLineSize() override {
883    return Impl.getCacheLineSize();
884  }
885  unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
886  unsigned getMinPrefetchStride() override {
887    return Impl.getMinPrefetchStride();
888  }
889  unsigned getMaxPrefetchIterationsAhead() override {
890    return Impl.getMaxPrefetchIterationsAhead();
891  }
892  unsigned getMaxInterleaveFactor(unsigned VF) override {
893    return Impl.getMaxInterleaveFactor(VF);
894  }
895  unsigned
896  getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
897                         OperandValueKind Opd2Info,
898                         OperandValueProperties Opd1PropInfo,
899                         OperandValueProperties Opd2PropInfo) override {
900    return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
901                                       Opd1PropInfo, Opd2PropInfo);
902  }
903  int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
904                     Type *SubTp) override {
905    return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
906  }
907  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) override {
908    return Impl.getCastInstrCost(Opcode, Dst, Src);
909  }
910  int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
911                               unsigned Index) override {
912    return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
913  }
914  int getCFInstrCost(unsigned Opcode) override {
915    return Impl.getCFInstrCost(Opcode);
916  }
917  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) override {
918    return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy);
919  }
920  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
921    return Impl.getVectorInstrCost(Opcode, Val, Index);
922  }
923  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
924                      unsigned AddressSpace) override {
925    return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
926  }
927  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
928                            unsigned AddressSpace) override {
929    return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
930  }
931  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
932                             Value *Ptr, bool VariableMask,
933                             unsigned Alignment) override {
934    return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
935                                       Alignment);
936  }
937  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
938                                 ArrayRef<unsigned> Indices, unsigned Alignment,
939                                 unsigned AddressSpace) override {
940    return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
941                                           Alignment, AddressSpace);
942  }
943  int getReductionCost(unsigned Opcode, Type *Ty,
944                       bool IsPairwiseForm) override {
945    return Impl.getReductionCost(Opcode, Ty, IsPairwiseForm);
946  }
947  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
948                            FastMathFlags FMF) override {
949    return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF);
950  }
951  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
952                            ArrayRef<Value *> Args,
953                            FastMathFlags FMF) override {
954    return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF);
955  }
956  int getCallInstrCost(Function *F, Type *RetTy,
957                       ArrayRef<Type *> Tys) override {
958    return Impl.getCallInstrCost(F, RetTy, Tys);
959  }
960  unsigned getNumberOfParts(Type *Tp) override {
961    return Impl.getNumberOfParts(Tp);
962  }
963  int getAddressComputationCost(Type *Ty, bool IsComplex) override {
964    return Impl.getAddressComputationCost(Ty, IsComplex);
965  }
966  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
967    return Impl.getCostOfKeepingLiveOverCall(Tys);
968  }
969  bool getTgtMemIntrinsic(IntrinsicInst *Inst,
970                          MemIntrinsicInfo &Info) override {
971    return Impl.getTgtMemIntrinsic(Inst, Info);
972  }
973  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
974                                           Type *ExpectedType) override {
975    return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
976  }
977  bool areInlineCompatible(const Function *Caller,
978                           const Function *Callee) const override {
979    return Impl.areInlineCompatible(Caller, Callee);
980  }
981};
982
983template <typename T>
984TargetTransformInfo::TargetTransformInfo(T Impl)
985    : TTIImpl(new Model<T>(Impl)) {}
986
987/// \brief Analysis pass providing the \c TargetTransformInfo.
988///
989/// The core idea of the TargetIRAnalysis is to expose an interface through
990/// which LLVM targets can analyze and provide information about the middle
991/// end's target-independent IR. This supports use cases such as target-aware
992/// cost modeling of IR constructs.
993///
994/// This is a function analysis because much of the cost modeling for targets
995/// is done in a subtarget specific way and LLVM supports compiling different
996/// functions targeting different subtargets in order to support runtime
997/// dispatch according to the observed subtarget.
998class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
999public:
1000  typedef TargetTransformInfo Result;
1001
1002  /// \brief Default construct a target IR analysis.
1003  ///
1004  /// This will use the module's datalayout to construct a baseline
1005  /// conservative TTI result.
1006  TargetIRAnalysis();
1007
1008  /// \brief Construct an IR analysis pass around a target-provide callback.
1009  ///
1010  /// The callback will be called with a particular function for which the TTI
1011  /// is needed and must return a TTI object for that function.
1012  TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
1013
1014  // Value semantics. We spell out the constructors for MSVC.
1015  TargetIRAnalysis(const TargetIRAnalysis &Arg)
1016      : TTICallback(Arg.TTICallback) {}
1017  TargetIRAnalysis(TargetIRAnalysis &&Arg)
1018      : TTICallback(std::move(Arg.TTICallback)) {}
1019  TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
1020    TTICallback = RHS.TTICallback;
1021    return *this;
1022  }
1023  TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
1024    TTICallback = std::move(RHS.TTICallback);
1025    return *this;
1026  }
1027
1028  Result run(const Function &F, AnalysisManager<Function> &);
1029
1030private:
1031  friend AnalysisInfoMixin<TargetIRAnalysis>;
1032  static char PassID;
1033
1034  /// \brief The callback used to produce a result.
1035  ///
1036  /// We use a completely opaque callback so that targets can provide whatever
1037  /// mechanism they desire for constructing the TTI for a given function.
1038  ///
1039  /// FIXME: Should we really use std::function? It's relatively inefficient.
1040  /// It might be possible to arrange for even stateful callbacks to outlive
1041  /// the analysis and thus use a function_ref which would be lighter weight.
1042  /// This may also be less error prone as the callback is likely to reference
1043  /// the external TargetMachine, and that reference needs to never dangle.
1044  std::function<Result(const Function &)> TTICallback;
1045
1046  /// \brief Helper function used as the callback in the default constructor.
1047  static Result getDefaultTTI(const Function &F);
1048};
1049
1050/// \brief Wrapper pass for TargetTransformInfo.
1051///
1052/// This pass can be constructed from a TTI object which it stores internally
1053/// and is queried by passes.
1054class TargetTransformInfoWrapperPass : public ImmutablePass {
1055  TargetIRAnalysis TIRA;
1056  Optional<TargetTransformInfo> TTI;
1057
1058  virtual void anchor();
1059
1060public:
1061  static char ID;
1062
1063  /// \brief We must provide a default constructor for the pass but it should
1064  /// never be used.
1065  ///
1066  /// Use the constructor below or call one of the creation routines.
1067  TargetTransformInfoWrapperPass();
1068
1069  explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
1070
1071  TargetTransformInfo &getTTI(const Function &F);
1072};
1073
1074/// \brief Create an analysis pass wrapper around a TTI object.
1075///
1076/// This analysis pass just holds the TTI instance and makes it available to
1077/// clients.
1078ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
1079
1080} // End llvm namespace
1081
1082#endif
1083