1//===- CostModel.cpp ------ Cost Model Analysis ---------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the cost model analysis. It provides a very basic cost
11// estimation for LLVM-IR. This analysis uses the services of the codegen
12// to approximate the cost of any IR instruction when lowered to machine
13// instructions. The cost results are unit-less and the cost number represents
14// the throughput of the machine assuming that all loads hit the cache, all
15// branches are predicted, etc. The cost numbers can be added in order to
16// compare two or more transformation alternatives.
17//
18//===----------------------------------------------------------------------===//
19
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/Analysis/Passes.h"
22#include "llvm/Analysis/TargetTransformInfo.h"
23#include "llvm/IR/Function.h"
24#include "llvm/IR/Instructions.h"
25#include "llvm/IR/IntrinsicInst.h"
26#include "llvm/IR/Value.h"
27#include "llvm/Pass.h"
28#include "llvm/Support/CommandLine.h"
29#include "llvm/Support/Debug.h"
30#include "llvm/Support/raw_ostream.h"
31using namespace llvm;
32
33#define CM_NAME "cost-model"
34#define DEBUG_TYPE CM_NAME
35
36static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
37                                     cl::Hidden,
38                                     cl::desc("Recognize reduction patterns."));
39
40namespace {
41  class CostModelAnalysis : public FunctionPass {
42
43  public:
44    static char ID; // Class identification, replacement for typeinfo
45    CostModelAnalysis() : FunctionPass(ID), F(nullptr), TTI(nullptr) {
46      initializeCostModelAnalysisPass(
47        *PassRegistry::getPassRegistry());
48    }
49
50    /// Returns the expected cost of the instruction.
51    /// Returns -1 if the cost is unknown.
52    /// Note, this method does not cache the cost calculation and it
53    /// can be expensive in some cases.
54    unsigned getInstructionCost(const Instruction *I) const;
55
56  private:
57    void getAnalysisUsage(AnalysisUsage &AU) const override;
58    bool runOnFunction(Function &F) override;
59    void print(raw_ostream &OS, const Module*) const override;
60
61    /// The function that we analyze.
62    Function *F;
63    /// Target information.
64    const TargetTransformInfo *TTI;
65  };
66}  // End of anonymous namespace
67
68// Register this pass.
69char CostModelAnalysis::ID = 0;
70static const char cm_name[] = "Cost Model Analysis";
71INITIALIZE_PASS_BEGIN(CostModelAnalysis, CM_NAME, cm_name, false, true)
72INITIALIZE_PASS_END  (CostModelAnalysis, CM_NAME, cm_name, false, true)
73
74FunctionPass *llvm::createCostModelAnalysisPass() {
75  return new CostModelAnalysis();
76}
77
78void
79CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
80  AU.setPreservesAll();
81}
82
83bool
84CostModelAnalysis::runOnFunction(Function &F) {
85 this->F = &F;
86 auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
87 TTI = TTIWP ? &TTIWP->getTTI(F) : nullptr;
88
89 return false;
90}
91
92static bool isReverseVectorMask(SmallVectorImpl<int> &Mask) {
93  for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i)
94    if (Mask[i] > 0 && Mask[i] != (int)(MaskSize - 1 - i))
95      return false;
96  return true;
97}
98
99static bool isAlternateVectorMask(SmallVectorImpl<int> &Mask) {
100  bool isAlternate = true;
101  unsigned MaskSize = Mask.size();
102
103  // Example: shufflevector A, B, <0,5,2,7>
104  for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
105    if (Mask[i] < 0)
106      continue;
107    isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i);
108  }
109
110  if (isAlternate)
111    return true;
112
113  isAlternate = true;
114  // Example: shufflevector A, B, <4,1,6,3>
115  for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
116    if (Mask[i] < 0)
117      continue;
118    isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i);
119  }
120
121  return isAlternate;
122}
123
124static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
125  TargetTransformInfo::OperandValueKind OpInfo =
126    TargetTransformInfo::OK_AnyValue;
127
128  // Check for a splat of a constant or for a non uniform vector of constants.
129  if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
130    OpInfo = TargetTransformInfo::OK_NonUniformConstantValue;
131    if (cast<Constant>(V)->getSplatValue() != nullptr)
132      OpInfo = TargetTransformInfo::OK_UniformConstantValue;
133  }
134
135  return OpInfo;
136}
137
138static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
139                                     unsigned Level) {
140  // We don't need a shuffle if we just want to have element 0 in position 0 of
141  // the vector.
142  if (!SI && Level == 0 && IsLeft)
143    return true;
144  else if (!SI)
145    return false;
146
147  SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1);
148
149  // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
150  // we look at the left or right side.
151  for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
152    Mask[i] = val;
153
154  SmallVector<int, 16> ActualMask = SI->getShuffleMask();
155  return Mask == ActualMask;
156}
157
158static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp,
159                                          unsigned Level, unsigned NumLevels) {
160  // Match one level of pairwise operations.
161  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
162  //       <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
163  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
164  //       <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
165  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
166  if (BinOp == nullptr)
167    return false;
168
169  assert(BinOp->getType()->isVectorTy() && "Expecting a vector type");
170
171  unsigned Opcode = BinOp->getOpcode();
172  Value *L = BinOp->getOperand(0);
173  Value *R = BinOp->getOperand(1);
174
175  ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(L);
176  if (!LS && Level)
177    return false;
178  ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(R);
179  if (!RS && Level)
180    return false;
181
182  // On level 0 we can omit one shufflevector instruction.
183  if (!Level && !RS && !LS)
184    return false;
185
186  // Shuffle inputs must match.
187  Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr;
188  Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr;
189  Value *NextLevelOp = nullptr;
190  if (NextLevelOpR && NextLevelOpL) {
191    // If we have two shuffles their operands must match.
192    if (NextLevelOpL != NextLevelOpR)
193      return false;
194
195    NextLevelOp = NextLevelOpL;
196  } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) {
197    // On the first level we can omit the shufflevector <0, undef,...>. So the
198    // input to the other shufflevector <1, undef> must match with one of the
199    // inputs to the current binary operation.
200    // Example:
201    //  %NextLevelOpL = shufflevector %R, <1, undef ...>
202    //  %BinOp        = fadd          %NextLevelOpL, %R
203    if (NextLevelOpL && NextLevelOpL != R)
204      return false;
205    else if (NextLevelOpR && NextLevelOpR != L)
206      return false;
207
208    NextLevelOp = NextLevelOpL ? R : L;
209  } else
210    return false;
211
212  // Check that the next levels binary operation exists and matches with the
213  // current one.
214  BinaryOperator *NextLevelBinOp = nullptr;
215  if (Level + 1 != NumLevels) {
216    if (!(NextLevelBinOp = dyn_cast<BinaryOperator>(NextLevelOp)))
217      return false;
218    else if (NextLevelBinOp->getOpcode() != Opcode)
219      return false;
220  }
221
222  // Shuffle mask for pairwise operation must match.
223  if (matchPairwiseShuffleMask(LS, true, Level)) {
224    if (!matchPairwiseShuffleMask(RS, false, Level))
225      return false;
226  } else if (matchPairwiseShuffleMask(RS, true, Level)) {
227    if (!matchPairwiseShuffleMask(LS, false, Level))
228      return false;
229  } else
230    return false;
231
232  if (++Level == NumLevels)
233    return true;
234
235  // Match next level.
236  return matchPairwiseReductionAtLevel(NextLevelBinOp, Level, NumLevels);
237}
238
239static bool matchPairwiseReduction(const ExtractElementInst *ReduxRoot,
240                                   unsigned &Opcode, Type *&Ty) {
241  if (!EnableReduxCost)
242    return false;
243
244  // Need to extract the first element.
245  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
246  unsigned Idx = ~0u;
247  if (CI)
248    Idx = CI->getZExtValue();
249  if (Idx != 0)
250    return false;
251
252  BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0));
253  if (!RdxStart)
254    return false;
255
256  Type *VecTy = ReduxRoot->getOperand(0)->getType();
257  unsigned NumVecElems = VecTy->getVectorNumElements();
258  if (!isPowerOf2_32(NumVecElems))
259    return false;
260
261  // We look for a sequence of shuffle,shuffle,add triples like the following
262  // that builds a pairwise reduction tree.
263  //
264  //  (X0, X1, X2, X3)
265  //   (X0 + X1, X2 + X3, undef, undef)
266  //    ((X0 + X1) + (X2 + X3), undef, undef, undef)
267  //
268  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
269  //       <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
270  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
271  //       <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
272  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
273  // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
274  //       <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
275  // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
276  //       <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
277  // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
278  // %r = extractelement <4 x float> %bin.rdx8, i32 0
279  if (!matchPairwiseReductionAtLevel(RdxStart, 0,  Log2_32(NumVecElems)))
280    return false;
281
282  Opcode = RdxStart->getOpcode();
283  Ty = VecTy;
284
285  return true;
286}
287
288static std::pair<Value *, ShuffleVectorInst *>
289getShuffleAndOtherOprd(BinaryOperator *B) {
290
291  Value *L = B->getOperand(0);
292  Value *R = B->getOperand(1);
293  ShuffleVectorInst *S = nullptr;
294
295  if ((S = dyn_cast<ShuffleVectorInst>(L)))
296    return std::make_pair(R, S);
297
298  S = dyn_cast<ShuffleVectorInst>(R);
299  return std::make_pair(L, S);
300}
301
302static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
303                                          unsigned &Opcode, Type *&Ty) {
304  if (!EnableReduxCost)
305    return false;
306
307  // Need to extract the first element.
308  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
309  unsigned Idx = ~0u;
310  if (CI)
311    Idx = CI->getZExtValue();
312  if (Idx != 0)
313    return false;
314
315  BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0));
316  if (!RdxStart)
317    return false;
318  unsigned RdxOpcode = RdxStart->getOpcode();
319
320  Type *VecTy = ReduxRoot->getOperand(0)->getType();
321  unsigned NumVecElems = VecTy->getVectorNumElements();
322  if (!isPowerOf2_32(NumVecElems))
323    return false;
324
325  // We look for a sequence of shuffles and adds like the following matching one
326  // fadd, shuffle vector pair at a time.
327  //
328  // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
329  //                           <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
330  // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
331  // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
332  //                          <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
333  // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
334  // %r = extractelement <4 x float> %bin.rdx8, i32 0
335
336  unsigned MaskStart = 1;
337  Value *RdxOp = RdxStart;
338  SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
339  unsigned NumVecElemsRemain = NumVecElems;
340  while (NumVecElemsRemain - 1) {
341    // Check for the right reduction operation.
342    BinaryOperator *BinOp;
343    if (!(BinOp = dyn_cast<BinaryOperator>(RdxOp)))
344      return false;
345    if (BinOp->getOpcode() != RdxOpcode)
346      return false;
347
348    Value *NextRdxOp;
349    ShuffleVectorInst *Shuffle;
350    std::tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp);
351
352    // Check the current reduction operation and the shuffle use the same value.
353    if (Shuffle == nullptr)
354      return false;
355    if (Shuffle->getOperand(0) != NextRdxOp)
356      return false;
357
358    // Check that shuffle masks matches.
359    for (unsigned j = 0; j != MaskStart; ++j)
360      ShuffleMask[j] = MaskStart + j;
361    // Fill the rest of the mask with -1 for undef.
362    std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
363
364    SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
365    if (ShuffleMask != Mask)
366      return false;
367
368    RdxOp = NextRdxOp;
369    NumVecElemsRemain /= 2;
370    MaskStart *= 2;
371  }
372
373  Opcode = RdxOpcode;
374  Ty = VecTy;
375  return true;
376}
377
378unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
379  if (!TTI)
380    return -1;
381
382  switch (I->getOpcode()) {
383  case Instruction::GetElementPtr:
384    return TTI->getUserCost(I);
385
386  case Instruction::Ret:
387  case Instruction::PHI:
388  case Instruction::Br: {
389    return TTI->getCFInstrCost(I->getOpcode());
390  }
391  case Instruction::Add:
392  case Instruction::FAdd:
393  case Instruction::Sub:
394  case Instruction::FSub:
395  case Instruction::Mul:
396  case Instruction::FMul:
397  case Instruction::UDiv:
398  case Instruction::SDiv:
399  case Instruction::FDiv:
400  case Instruction::URem:
401  case Instruction::SRem:
402  case Instruction::FRem:
403  case Instruction::Shl:
404  case Instruction::LShr:
405  case Instruction::AShr:
406  case Instruction::And:
407  case Instruction::Or:
408  case Instruction::Xor: {
409    TargetTransformInfo::OperandValueKind Op1VK =
410      getOperandInfo(I->getOperand(0));
411    TargetTransformInfo::OperandValueKind Op2VK =
412      getOperandInfo(I->getOperand(1));
413    return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK,
414                                       Op2VK);
415  }
416  case Instruction::Select: {
417    const SelectInst *SI = cast<SelectInst>(I);
418    Type *CondTy = SI->getCondition()->getType();
419    return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy);
420  }
421  case Instruction::ICmp:
422  case Instruction::FCmp: {
423    Type *ValTy = I->getOperand(0)->getType();
424    return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy);
425  }
426  case Instruction::Store: {
427    const StoreInst *SI = cast<StoreInst>(I);
428    Type *ValTy = SI->getValueOperand()->getType();
429    return TTI->getMemoryOpCost(I->getOpcode(), ValTy,
430                                 SI->getAlignment(),
431                                 SI->getPointerAddressSpace());
432  }
433  case Instruction::Load: {
434    const LoadInst *LI = cast<LoadInst>(I);
435    return TTI->getMemoryOpCost(I->getOpcode(), I->getType(),
436                                 LI->getAlignment(),
437                                 LI->getPointerAddressSpace());
438  }
439  case Instruction::ZExt:
440  case Instruction::SExt:
441  case Instruction::FPToUI:
442  case Instruction::FPToSI:
443  case Instruction::FPExt:
444  case Instruction::PtrToInt:
445  case Instruction::IntToPtr:
446  case Instruction::SIToFP:
447  case Instruction::UIToFP:
448  case Instruction::Trunc:
449  case Instruction::FPTrunc:
450  case Instruction::BitCast:
451  case Instruction::AddrSpaceCast: {
452    Type *SrcTy = I->getOperand(0)->getType();
453    return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy);
454  }
455  case Instruction::ExtractElement: {
456    const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
457    ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
458    unsigned Idx = -1;
459    if (CI)
460      Idx = CI->getZExtValue();
461
462    // Try to match a reduction sequence (series of shufflevector and vector
463    // adds followed by a extractelement).
464    unsigned ReduxOpCode;
465    Type *ReduxType;
466
467    if (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType))
468      return TTI->getReductionCost(ReduxOpCode, ReduxType, false);
469    else if (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType))
470      return TTI->getReductionCost(ReduxOpCode, ReduxType, true);
471
472    return TTI->getVectorInstrCost(I->getOpcode(),
473                                   EEI->getOperand(0)->getType(), Idx);
474  }
475  case Instruction::InsertElement: {
476    const InsertElementInst * IE = cast<InsertElementInst>(I);
477    ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
478    unsigned Idx = -1;
479    if (CI)
480      Idx = CI->getZExtValue();
481    return TTI->getVectorInstrCost(I->getOpcode(),
482                                   IE->getType(), Idx);
483  }
484  case Instruction::ShuffleVector: {
485    const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
486    Type *VecTypOp0 = Shuffle->getOperand(0)->getType();
487    unsigned NumVecElems = VecTypOp0->getVectorNumElements();
488    SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
489
490    if (NumVecElems == Mask.size()) {
491      if (isReverseVectorMask(Mask))
492        return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0,
493                                   0, nullptr);
494      if (isAlternateVectorMask(Mask))
495        return TTI->getShuffleCost(TargetTransformInfo::SK_Alternate,
496                                   VecTypOp0, 0, nullptr);
497    }
498
499    return -1;
500  }
501  case Instruction::Call:
502    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
503      SmallVector<Value *, 4> Args;
504      for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J)
505        Args.push_back(II->getArgOperand(J));
506
507      FastMathFlags FMF;
508      if (auto *FPMO = dyn_cast<FPMathOperator>(II))
509        FMF = FPMO->getFastMathFlags();
510
511      return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
512                                        Args, FMF);
513    }
514    return -1;
515  default:
516    // We don't have any information on this instruction.
517    return -1;
518  }
519}
520
521void CostModelAnalysis::print(raw_ostream &OS, const Module*) const {
522  if (!F)
523    return;
524
525  for (BasicBlock &B : *F) {
526    for (Instruction &Inst : B) {
527      unsigned Cost = getInstructionCost(&Inst);
528      if (Cost != (unsigned)-1)
529        OS << "Cost Model: Found an estimated cost of " << Cost;
530      else
531        OS << "Cost Model: Unknown cost";
532
533      OS << " for instruction: " << Inst << "\n";
534    }
535  }
536}
537