16bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem//===- CostModel.cpp ------ Cost Model Analysis ---------------------------===// 26bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem// 36bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem// The LLVM Compiler Infrastructure 46bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem// 56bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem// This file is distributed under the University of Illinois Open Source 66bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem// License. See LICENSE.TXT for details. 76bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem// 86bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem//===----------------------------------------------------------------------===// 96bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem// 106bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem// This file defines the cost model analysis. It provides a very basic cost 1199b7a993762a926f042e7cb582e45c577ede596dNadav Rotem// estimation for LLVM-IR. This analysis uses the services of the codegen 1299b7a993762a926f042e7cb582e45c577ede596dNadav Rotem// to approximate the cost of any IR instruction when lowered to machine 1399b7a993762a926f042e7cb582e45c577ede596dNadav Rotem// instructions. The cost results are unit-less and the cost number represents 1499b7a993762a926f042e7cb582e45c577ede596dNadav Rotem// the throughput of the machine assuming that all loads hit the cache, all 1599b7a993762a926f042e7cb582e45c577ede596dNadav Rotem// branches are predicted, etc. The cost numbers can be added in order to 1699b7a993762a926f042e7cb582e45c577ede596dNadav Rotem// compare two or more transformation alternatives. 176bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem// 186bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem//===----------------------------------------------------------------------===// 196bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem 2065457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer#include "llvm/ADT/STLExtras.h" 216bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem#include "llvm/Analysis/Passes.h" 22be04929f7fd76a921540e9901f24563e51dc1219Chandler Carruth#include "llvm/Analysis/TargetTransformInfo.h" 230b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/Function.h" 240b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/Instructions.h" 258611d4449a77ca05e808823bc966573a85da00cbBenjamin Kramer#include "llvm/IR/IntrinsicInst.h" 260b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/Value.h" 276bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem#include "llvm/Pass.h" 2865457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer#include "llvm/Support/CommandLine.h" 296bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem#include "llvm/Support/Debug.h" 306bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem#include "llvm/Support/raw_ostream.h" 316bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotemusing namespace llvm; 326bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem 33dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define CM_NAME "cost-model" 34dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define DEBUG_TYPE CM_NAME 35dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 3665457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighoferstatic cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false), 3765457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer cl::Hidden, 3865457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer cl::desc("Recognize reduction patterns.")); 3965457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 406bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotemnamespace { 416bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem class CostModelAnalysis : public FunctionPass { 426bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem 436bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem public: 446bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem static char ID; // Class identification, replacement for typeinfo 45dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines CostModelAnalysis() : FunctionPass(ID), F(nullptr), TTI(nullptr) { 466bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem initializeCostModelAnalysisPass( 476bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem *PassRegistry::getPassRegistry()); 486bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem } 496bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem 506bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem /// Returns the expected cost of the instruction. 516bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem /// Returns -1 if the cost is unknown. 526bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem /// Note, this method does not cache the cost calculation and it 536bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem /// can be expensive in some cases. 54e70b2680a89e1ffb88594f032da9fef757b487faNadav Rotem unsigned getInstructionCost(const Instruction *I) const; 556bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem 566bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem private: 5736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines void getAnalysisUsage(AnalysisUsage &AU) const override; 5836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool runOnFunction(Function &F) override; 5936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines void print(raw_ostream &OS, const Module*) const override; 606bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem 616bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem /// The function that we analyze. 626bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem Function *F; 63194bd71b069abd7b3bd278b1df840f9cc9a4161aChandler Carruth /// Target information. 64194bd71b069abd7b3bd278b1df840f9cc9a4161aChandler Carruth const TargetTransformInfo *TTI; 656bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem }; 666bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem} // End of anonymous namespace 676bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem 686bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem// Register this pass. 696bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotemchar CostModelAnalysis::ID = 0; 706bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotemstatic const char cm_name[] = "Cost Model Analysis"; 716bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav RotemINITIALIZE_PASS_BEGIN(CostModelAnalysis, CM_NAME, cm_name, false, true) 726bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav RotemINITIALIZE_PASS_END (CostModelAnalysis, CM_NAME, cm_name, false, true) 736bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem 746bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav RotemFunctionPass *llvm::createCostModelAnalysisPass() { 756bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem return new CostModelAnalysis(); 766bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem} 776bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem 786bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotemvoid 796bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav RotemCostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { 806bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem AU.setPreservesAll(); 816bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem} 826bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem 836bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotembool 846bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav RotemCostModelAnalysis::runOnFunction(Function &F) { 856bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem this->F = &F; 866bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem TTI = getAnalysisIfAvailable<TargetTransformInfo>(); 876bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem 886bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem return false; 896bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem} 906bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem 919e639e8fd95488cb4c8ef2f7f3a41919acb29ac4Craig Topperstatic bool isReverseVectorMask(SmallVectorImpl<int> &Mask) { 924fc6484ee2439a9506d525ca757171e0ecc07744Arnold Schwaighofer for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i) 934fc6484ee2439a9506d525ca757171e0ecc07744Arnold Schwaighofer if (Mask[i] > 0 && Mask[i] != (int)(MaskSize - 1 - i)) 944fc6484ee2439a9506d525ca757171e0ecc07744Arnold Schwaighofer return false; 954fc6484ee2439a9506d525ca757171e0ecc07744Arnold Schwaighofer return true; 964fc6484ee2439a9506d525ca757171e0ecc07744Arnold Schwaighofer} 974fc6484ee2439a9506d525ca757171e0ecc07744Arnold Schwaighofer 98cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hinesstatic bool isAlternateVectorMask(SmallVectorImpl<int> &Mask) { 99cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines bool isAlternate = true; 100cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines unsigned MaskSize = Mask.size(); 101cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 102cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // Example: shufflevector A, B, <0,5,2,7> 103cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { 104cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines if (Mask[i] < 0) 105cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines continue; 106cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i); 107cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines } 108cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 109cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines if (isAlternate) 110cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines return true; 111cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 112cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines isAlternate = true; 113cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // Example: shufflevector A, B, <4,1,6,3> 114cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { 115cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines if (Mask[i] < 0) 116cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines continue; 117cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i); 118cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines } 119cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 120cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines return isAlternate; 121cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines} 122cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 1236bf4f676413b8f7d97aaff289997aab344180957Arnold Schwaighoferstatic TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) { 1246bf4f676413b8f7d97aaff289997aab344180957Arnold Schwaighofer TargetTransformInfo::OperandValueKind OpInfo = 1256bf4f676413b8f7d97aaff289997aab344180957Arnold Schwaighofer TargetTransformInfo::OK_AnyValue; 1266bf4f676413b8f7d97aaff289997aab344180957Arnold Schwaighofer 12736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Check for a splat of a constant or for a non uniform vector of constants. 12836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) { 12936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines OpInfo = TargetTransformInfo::OK_NonUniformConstantValue; 130dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (cast<Constant>(V)->getSplatValue() != nullptr) 1316bf4f676413b8f7d97aaff289997aab344180957Arnold Schwaighofer OpInfo = TargetTransformInfo::OK_UniformConstantValue; 13236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 1336bf4f676413b8f7d97aaff289997aab344180957Arnold Schwaighofer 1346bf4f676413b8f7d97aaff289997aab344180957Arnold Schwaighofer return OpInfo; 1356bf4f676413b8f7d97aaff289997aab344180957Arnold Schwaighofer} 1366bf4f676413b8f7d97aaff289997aab344180957Arnold Schwaighofer 13765457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighoferstatic bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, 13865457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer unsigned Level) { 13965457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // We don't need a shuffle if we just want to have element 0 in position 0 of 14065457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // the vector. 14165457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if (!SI && Level == 0 && IsLeft) 14265457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return true; 14365457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer else if (!SI) 14465457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 14565457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 14665457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1); 14765457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 14865457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether 14965457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // we look at the left or right side. 15065457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2) 15165457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer Mask[i] = val; 15265457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 15365457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer SmallVector<int, 16> ActualMask = SI->getShuffleMask(); 154dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (Mask != ActualMask) 15565457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 15665457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 15765457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return true; 15865457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer} 15965457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 16065457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighoferstatic bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp, 16165457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer unsigned Level, unsigned NumLevels) { 16265457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // Match one level of pairwise operations. 16365457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, 16465457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> 16565457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, 16665457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> 16765457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 168dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (BinOp == nullptr) 16965457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 17065457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 1712bc3cd83b34b3f76dfb98940201f35464150fa9fEric Christopher assert(BinOp->getType()->isVectorTy() && "Expecting a vector type"); 17265457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 17365457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer unsigned Opcode = BinOp->getOpcode(); 17465457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer Value *L = BinOp->getOperand(0); 17565457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer Value *R = BinOp->getOperand(1); 17665457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 17765457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(L); 17865457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if (!LS && Level) 17965457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 18065457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(R); 18165457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if (!RS && Level) 18265457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 18365457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 18465457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // On level 0 we can omit one shufflevector instruction. 18565457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if (!Level && !RS && !LS) 18665457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 18765457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 18865457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // Shuffle inputs must match. 189dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr; 190dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr; 191dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Value *NextLevelOp = nullptr; 19265457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if (NextLevelOpR && NextLevelOpL) { 19365457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // If we have two shuffles their operands must match. 19465457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if (NextLevelOpL != NextLevelOpR) 19565457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 19665457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 19765457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer NextLevelOp = NextLevelOpL; 19865457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) { 19965457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // On the first level we can omit the shufflevector <0, undef,...>. So the 20065457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // input to the other shufflevector <1, undef> must match with one of the 20165457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // inputs to the current binary operation. 20265457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // Example: 20365457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // %NextLevelOpL = shufflevector %R, <1, undef ...> 20465457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // %BinOp = fadd %NextLevelOpL, %R 20565457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if (NextLevelOpL && NextLevelOpL != R) 20665457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 20765457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer else if (NextLevelOpR && NextLevelOpR != L) 20865457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 20965457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 21065457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer NextLevelOp = NextLevelOpL ? R : L; 21165457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer } else 21265457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 21365457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 21465457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // Check that the next levels binary operation exists and matches with the 21565457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // current one. 216dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines BinaryOperator *NextLevelBinOp = nullptr; 21765457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if (Level + 1 != NumLevels) { 21865457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if (!(NextLevelBinOp = dyn_cast<BinaryOperator>(NextLevelOp))) 21965457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 22065457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer else if (NextLevelBinOp->getOpcode() != Opcode) 22165457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 22265457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer } 22365457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 22465457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // Shuffle mask for pairwise operation must match. 22565457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if (matchPairwiseShuffleMask(LS, true, Level)) { 22665457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if (!matchPairwiseShuffleMask(RS, false, Level)) 22765457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 22865457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer } else if (matchPairwiseShuffleMask(RS, true, Level)) { 22965457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if (!matchPairwiseShuffleMask(LS, false, Level)) 23065457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 23165457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer } else 23265457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 23365457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 23465457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if (++Level == NumLevels) 23565457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return true; 23665457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 23765457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // Match next level. 23865457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return matchPairwiseReductionAtLevel(NextLevelBinOp, Level, NumLevels); 23965457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer} 24065457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 24165457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighoferstatic bool matchPairwiseReduction(const ExtractElementInst *ReduxRoot, 24265457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer unsigned &Opcode, Type *&Ty) { 24365457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if (!EnableReduxCost) 24465457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 24565457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 24665457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // Need to extract the first element. 24765457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); 24865457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer unsigned Idx = ~0u; 24965457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if (CI) 25065457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer Idx = CI->getZExtValue(); 25165457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if (Idx != 0) 25265457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 25365457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 25465457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0)); 25565457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if (!RdxStart) 25665457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 25765457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 25865457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer Type *VecTy = ReduxRoot->getOperand(0)->getType(); 25965457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer unsigned NumVecElems = VecTy->getVectorNumElements(); 26065457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if (!isPowerOf2_32(NumVecElems)) 26165457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 26265457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 26365457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // We look for a sequence of shuffle,shuffle,add triples like the following 26465457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // that builds a pairwise reduction tree. 26565457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // 26665457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // (X0, X1, X2, X3) 26765457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // (X0 + X1, X2 + X3, undef, undef) 26865457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // ((X0 + X1) + (X2 + X3), undef, undef, undef) 26965457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // 27065457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, 27165457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> 27265457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, 27365457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> 27465457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 27565457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, 27665457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 27765457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, 27865457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 27965457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 28065457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // %r = extractelement <4 x float> %bin.rdx8, i32 0 28165457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if (!matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems))) 28265457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 28365457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 28465457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer Opcode = RdxStart->getOpcode(); 28565457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer Ty = VecTy; 28665457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 28765457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return true; 28865457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer} 28965457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 29065457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighoferstatic std::pair<Value *, ShuffleVectorInst *> 29165457b679ae240c1a37da82c5484dac478c47b6dArnold SchwaighofergetShuffleAndOtherOprd(BinaryOperator *B) { 29265457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 29365457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer Value *L = B->getOperand(0); 29465457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer Value *R = B->getOperand(1); 295dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines ShuffleVectorInst *S = nullptr; 29665457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 29765457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if ((S = dyn_cast<ShuffleVectorInst>(L))) 29865457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return std::make_pair(R, S); 29965457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 30065457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer S = dyn_cast<ShuffleVectorInst>(R); 30165457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return std::make_pair(L, S); 30265457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer} 30365457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 30465457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighoferstatic bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, 30565457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer unsigned &Opcode, Type *&Ty) { 30665457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if (!EnableReduxCost) 30765457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 30865457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 30965457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // Need to extract the first element. 31065457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); 31165457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer unsigned Idx = ~0u; 31265457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if (CI) 31365457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer Idx = CI->getZExtValue(); 31465457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if (Idx != 0) 31565457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 31665457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 31765457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0)); 31865457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if (!RdxStart) 31965457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 32065457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer unsigned RdxOpcode = RdxStart->getOpcode(); 32165457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 32265457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer Type *VecTy = ReduxRoot->getOperand(0)->getType(); 32365457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer unsigned NumVecElems = VecTy->getVectorNumElements(); 32465457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if (!isPowerOf2_32(NumVecElems)) 32565457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 32665457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 32765457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // We look for a sequence of shuffles and adds like the following matching one 32865457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // fadd, shuffle vector pair at a time. 32965457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // 33065457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, 33165457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 33265457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf 33365457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, 33465457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 33565457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 33665457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // %r = extractelement <4 x float> %bin.rdx8, i32 0 33765457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 33865457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer unsigned MaskStart = 1; 33965457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer Value *RdxOp = RdxStart; 34065457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer SmallVector<int, 32> ShuffleMask(NumVecElems, 0); 34165457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer unsigned NumVecElemsRemain = NumVecElems; 34265457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer while (NumVecElemsRemain - 1) { 34365457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // Check for the right reduction operation. 34465457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer BinaryOperator *BinOp; 34565457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if (!(BinOp = dyn_cast<BinaryOperator>(RdxOp))) 34665457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 34765457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if (BinOp->getOpcode() != RdxOpcode) 34865457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 34965457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 35065457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer Value *NextRdxOp; 35165457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer ShuffleVectorInst *Shuffle; 35236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines std::tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp); 35365457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 35465457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // Check the current reduction operation and the shuffle use the same value. 355dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (Shuffle == nullptr) 35665457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 35765457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if (Shuffle->getOperand(0) != NextRdxOp) 35865457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 35965457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 36065457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // Check that shuffle masks matches. 36165457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer for (unsigned j = 0; j != MaskStart; ++j) 36265457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer ShuffleMask[j] = MaskStart + j; 36365457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // Fill the rest of the mask with -1 for undef. 36465457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1); 36565457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 36665457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); 367dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (ShuffleMask != Mask) 36865457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return false; 36965457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 37065457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer RdxOp = NextRdxOp; 37165457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer NumVecElemsRemain /= 2; 37265457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer MaskStart *= 2; 37365457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer } 37465457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 37565457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer Opcode = RdxOpcode; 37665457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer Ty = VecTy; 37765457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return true; 37865457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer} 37965457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 380e70b2680a89e1ffb88594f032da9fef757b487faNadav Rotemunsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { 381194bd71b069abd7b3bd278b1df840f9cc9a4161aChandler Carruth if (!TTI) 3826bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem return -1; 3836bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem 3846bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem switch (I->getOpcode()) { 385fb55a8fd7c38aa09d9c243d48a8a72d890f36a3dArnold Schwaighofer case Instruction::GetElementPtr:{ 386fb55a8fd7c38aa09d9c243d48a8a72d890f36a3dArnold Schwaighofer Type *ValTy = I->getOperand(0)->getType()->getPointerElementType(); 387fb55a8fd7c38aa09d9c243d48a8a72d890f36a3dArnold Schwaighofer return TTI->getAddressComputationCost(ValTy); 388fb55a8fd7c38aa09d9c243d48a8a72d890f36a3dArnold Schwaighofer } 389fb55a8fd7c38aa09d9c243d48a8a72d890f36a3dArnold Schwaighofer 3906bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::Ret: 3916bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::PHI: 3926bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::Br: { 393194bd71b069abd7b3bd278b1df840f9cc9a4161aChandler Carruth return TTI->getCFInstrCost(I->getOpcode()); 3946bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem } 3956bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::Add: 3966bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::FAdd: 3976bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::Sub: 3986bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::FSub: 3996bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::Mul: 4006bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::FMul: 4016bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::UDiv: 4026bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::SDiv: 4036bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::FDiv: 4046bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::URem: 4056bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::SRem: 4066bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::FRem: 4076bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::Shl: 4086bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::LShr: 4096bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::AShr: 4106bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::And: 4116bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::Or: 4126bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::Xor: { 4136bf4f676413b8f7d97aaff289997aab344180957Arnold Schwaighofer TargetTransformInfo::OperandValueKind Op1VK = 4146bf4f676413b8f7d97aaff289997aab344180957Arnold Schwaighofer getOperandInfo(I->getOperand(0)); 4156bf4f676413b8f7d97aaff289997aab344180957Arnold Schwaighofer TargetTransformInfo::OperandValueKind Op2VK = 4166bf4f676413b8f7d97aaff289997aab344180957Arnold Schwaighofer getOperandInfo(I->getOperand(1)); 4176bf4f676413b8f7d97aaff289997aab344180957Arnold Schwaighofer return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, 4186bf4f676413b8f7d97aaff289997aab344180957Arnold Schwaighofer Op2VK); 4196bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem } 4206bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::Select: { 421e70b2680a89e1ffb88594f032da9fef757b487faNadav Rotem const SelectInst *SI = cast<SelectInst>(I); 4226bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem Type *CondTy = SI->getCondition()->getType(); 423194bd71b069abd7b3bd278b1df840f9cc9a4161aChandler Carruth return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy); 4246bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem } 4256bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::ICmp: 4266bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::FCmp: { 4276bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem Type *ValTy = I->getOperand(0)->getType(); 428194bd71b069abd7b3bd278b1df840f9cc9a4161aChandler Carruth return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy); 4296bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem } 4306bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::Store: { 431e70b2680a89e1ffb88594f032da9fef757b487faNadav Rotem const StoreInst *SI = cast<StoreInst>(I); 4326bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem Type *ValTy = SI->getValueOperand()->getType(); 433194bd71b069abd7b3bd278b1df840f9cc9a4161aChandler Carruth return TTI->getMemoryOpCost(I->getOpcode(), ValTy, 4346bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem SI->getAlignment(), 4356bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem SI->getPointerAddressSpace()); 4366bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem } 4376bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::Load: { 438e70b2680a89e1ffb88594f032da9fef757b487faNadav Rotem const LoadInst *LI = cast<LoadInst>(I); 439194bd71b069abd7b3bd278b1df840f9cc9a4161aChandler Carruth return TTI->getMemoryOpCost(I->getOpcode(), I->getType(), 4406bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem LI->getAlignment(), 4416bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem LI->getPointerAddressSpace()); 4426bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem } 4436bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::ZExt: 4446bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::SExt: 4456bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::FPToUI: 4466bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::FPToSI: 4476bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::FPExt: 4486bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::PtrToInt: 4496bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::IntToPtr: 4506bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::SIToFP: 4516bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::UIToFP: 4526bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::Trunc: 4536bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem case Instruction::FPTrunc: 45436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case Instruction::BitCast: 45536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case Instruction::AddrSpaceCast: { 4566bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem Type *SrcTy = I->getOperand(0)->getType(); 457194bd71b069abd7b3bd278b1df840f9cc9a4161aChandler Carruth return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy); 4586bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem } 459f1495671605b50a4b0386697fee0b76ebae9d17fNadav Rotem case Instruction::ExtractElement: { 460e70b2680a89e1ffb88594f032da9fef757b487faNadav Rotem const ExtractElementInst * EEI = cast<ExtractElementInst>(I); 461f1495671605b50a4b0386697fee0b76ebae9d17fNadav Rotem ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); 462f1495671605b50a4b0386697fee0b76ebae9d17fNadav Rotem unsigned Idx = -1; 463f1495671605b50a4b0386697fee0b76ebae9d17fNadav Rotem if (CI) 464f1495671605b50a4b0386697fee0b76ebae9d17fNadav Rotem Idx = CI->getZExtValue(); 46565457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 46665457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // Try to match a reduction sequence (series of shufflevector and vector 46765457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer // adds followed by a extractelement). 46865457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer unsigned ReduxOpCode; 46965457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer Type *ReduxType; 47065457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 47165457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer if (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) 47265457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return TTI->getReductionCost(ReduxOpCode, ReduxType, false); 47365457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer else if (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) 47465457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer return TTI->getReductionCost(ReduxOpCode, ReduxType, true); 47565457b679ae240c1a37da82c5484dac478c47b6dArnold Schwaighofer 476194bd71b069abd7b3bd278b1df840f9cc9a4161aChandler Carruth return TTI->getVectorInstrCost(I->getOpcode(), 477194bd71b069abd7b3bd278b1df840f9cc9a4161aChandler Carruth EEI->getOperand(0)->getType(), Idx); 478f1495671605b50a4b0386697fee0b76ebae9d17fNadav Rotem } 479f1495671605b50a4b0386697fee0b76ebae9d17fNadav Rotem case Instruction::InsertElement: { 4802ebba647eab05aaf71f6a309f855720ab6f90c7fCraig Topper const InsertElementInst * IE = cast<InsertElementInst>(I); 4812ebba647eab05aaf71f6a309f855720ab6f90c7fCraig Topper ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); 4822ebba647eab05aaf71f6a309f855720ab6f90c7fCraig Topper unsigned Idx = -1; 4832ebba647eab05aaf71f6a309f855720ab6f90c7fCraig Topper if (CI) 4842ebba647eab05aaf71f6a309f855720ab6f90c7fCraig Topper Idx = CI->getZExtValue(); 4852ebba647eab05aaf71f6a309f855720ab6f90c7fCraig Topper return TTI->getVectorInstrCost(I->getOpcode(), 4862ebba647eab05aaf71f6a309f855720ab6f90c7fCraig Topper IE->getType(), Idx); 4872ebba647eab05aaf71f6a309f855720ab6f90c7fCraig Topper } 4884fc6484ee2439a9506d525ca757171e0ecc07744Arnold Schwaighofer case Instruction::ShuffleVector: { 4894fc6484ee2439a9506d525ca757171e0ecc07744Arnold Schwaighofer const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); 4904fc6484ee2439a9506d525ca757171e0ecc07744Arnold Schwaighofer Type *VecTypOp0 = Shuffle->getOperand(0)->getType(); 4914fc6484ee2439a9506d525ca757171e0ecc07744Arnold Schwaighofer unsigned NumVecElems = VecTypOp0->getVectorNumElements(); 4924fc6484ee2439a9506d525ca757171e0ecc07744Arnold Schwaighofer SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); 4934fc6484ee2439a9506d525ca757171e0ecc07744Arnold Schwaighofer 494cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines if (NumVecElems == Mask.size()) { 495cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines if (isReverseVectorMask(Mask)) 496cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 497cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 0, nullptr); 498cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines if (isAlternateVectorMask(Mask)) 499cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines return TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, 500cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines VecTypOp0, 0, nullptr); 501cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines } 502cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 5034fc6484ee2439a9506d525ca757171e0ecc07744Arnold Schwaighofer return -1; 5044fc6484ee2439a9506d525ca757171e0ecc07744Arnold Schwaighofer } 5058611d4449a77ca05e808823bc966573a85da00cbBenjamin Kramer case Instruction::Call: 5068611d4449a77ca05e808823bc966573a85da00cbBenjamin Kramer if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 5078611d4449a77ca05e808823bc966573a85da00cbBenjamin Kramer SmallVector<Type*, 4> Tys; 5088611d4449a77ca05e808823bc966573a85da00cbBenjamin Kramer for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J) 5098611d4449a77ca05e808823bc966573a85da00cbBenjamin Kramer Tys.push_back(II->getArgOperand(J)->getType()); 5108611d4449a77ca05e808823bc966573a85da00cbBenjamin Kramer 5118611d4449a77ca05e808823bc966573a85da00cbBenjamin Kramer return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), 5128611d4449a77ca05e808823bc966573a85da00cbBenjamin Kramer Tys); 5138611d4449a77ca05e808823bc966573a85da00cbBenjamin Kramer } 5148611d4449a77ca05e808823bc966573a85da00cbBenjamin Kramer return -1; 5156bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem default: 5166bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem // We don't have any information on this instruction. 5176bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem return -1; 5186bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem } 5196bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem} 5206bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem 5216bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotemvoid CostModelAnalysis::print(raw_ostream &OS, const Module*) const { 5226bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem if (!F) 5236bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem return; 5246bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem 5256bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem for (Function::iterator B = F->begin(), BE = F->end(); B != BE; ++B) { 5266bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem for (BasicBlock::iterator it = B->begin(), e = B->end(); it != e; ++it) { 5276bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem Instruction *Inst = it; 5286bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem unsigned Cost = getInstructionCost(Inst); 5296bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem if (Cost != (unsigned)-1) 5306bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem OS << "Cost Model: Found an estimated cost of " << Cost; 5316bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem else 5326bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem OS << "Cost Model: Unknown cost"; 5336bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem 5346bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem OS << " for instruction: "<< *Inst << "\n"; 5356bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem } 5366bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem } 5376bed58ef240b1e1a1fb41fb867a8ba6e7566e0e9Nadav Rotem} 538