1//===- CostModel.cpp ------ Cost Model Analysis ---------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the cost model analysis. It provides a very basic cost 11// estimation for LLVM-IR. This analysis uses the services of the codegen 12// to approximate the cost of any IR instruction when lowered to machine 13// instructions. The cost results are unit-less and the cost number represents 14// the throughput of the machine assuming that all loads hit the cache, all 15// branches are predicted, etc. The cost numbers can be added in order to 16// compare two or more transformation alternatives. 17// 18//===----------------------------------------------------------------------===// 19 20#include "llvm/ADT/STLExtras.h" 21#include "llvm/Analysis/Passes.h" 22#include "llvm/Analysis/TargetTransformInfo.h" 23#include "llvm/IR/Function.h" 24#include "llvm/IR/Instructions.h" 25#include "llvm/IR/IntrinsicInst.h" 26#include "llvm/IR/Value.h" 27#include "llvm/Pass.h" 28#include "llvm/Support/CommandLine.h" 29#include "llvm/Support/Debug.h" 30#include "llvm/Support/raw_ostream.h" 31using namespace llvm; 32 33#define CM_NAME "cost-model" 34#define DEBUG_TYPE CM_NAME 35 36static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false), 37 cl::Hidden, 38 cl::desc("Recognize reduction patterns.")); 39 40namespace { 41 class CostModelAnalysis : public FunctionPass { 42 43 public: 44 static char ID; // Class identification, replacement for typeinfo 45 CostModelAnalysis() : FunctionPass(ID), F(nullptr), TTI(nullptr) { 46 initializeCostModelAnalysisPass( 47 *PassRegistry::getPassRegistry()); 48 } 49 50 /// Returns the expected cost of the instruction. 51 /// Returns -1 if the cost is unknown. 52 /// Note, this method does not cache the cost calculation and it 53 /// can be expensive in some cases. 54 unsigned getInstructionCost(const Instruction *I) const; 55 56 private: 57 void getAnalysisUsage(AnalysisUsage &AU) const override; 58 bool runOnFunction(Function &F) override; 59 void print(raw_ostream &OS, const Module*) const override; 60 61 /// The function that we analyze. 62 Function *F; 63 /// Target information. 64 const TargetTransformInfo *TTI; 65 }; 66} // End of anonymous namespace 67 68// Register this pass. 69char CostModelAnalysis::ID = 0; 70static const char cm_name[] = "Cost Model Analysis"; 71INITIALIZE_PASS_BEGIN(CostModelAnalysis, CM_NAME, cm_name, false, true) 72INITIALIZE_PASS_END (CostModelAnalysis, CM_NAME, cm_name, false, true) 73 74FunctionPass *llvm::createCostModelAnalysisPass() { 75 return new CostModelAnalysis(); 76} 77 78void 79CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { 80 AU.setPreservesAll(); 81} 82 83bool 84CostModelAnalysis::runOnFunction(Function &F) { 85 this->F = &F; 86 TTI = getAnalysisIfAvailable<TargetTransformInfo>(); 87 88 return false; 89} 90 91static bool isReverseVectorMask(SmallVectorImpl<int> &Mask) { 92 for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i) 93 if (Mask[i] > 0 && Mask[i] != (int)(MaskSize - 1 - i)) 94 return false; 95 return true; 96} 97 98static bool isAlternateVectorMask(SmallVectorImpl<int> &Mask) { 99 bool isAlternate = true; 100 unsigned MaskSize = Mask.size(); 101 102 // Example: shufflevector A, B, <0,5,2,7> 103 for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { 104 if (Mask[i] < 0) 105 continue; 106 isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i); 107 } 108 109 if (isAlternate) 110 return true; 111 112 isAlternate = true; 113 // Example: shufflevector A, B, <4,1,6,3> 114 for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { 115 if (Mask[i] < 0) 116 continue; 117 isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i); 118 } 119 120 return isAlternate; 121} 122 123static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) { 124 TargetTransformInfo::OperandValueKind OpInfo = 125 TargetTransformInfo::OK_AnyValue; 126 127 // Check for a splat of a constant or for a non uniform vector of constants. 128 if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) { 129 OpInfo = TargetTransformInfo::OK_NonUniformConstantValue; 130 if (cast<Constant>(V)->getSplatValue() != nullptr) 131 OpInfo = TargetTransformInfo::OK_UniformConstantValue; 132 } 133 134 return OpInfo; 135} 136 137static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, 138 unsigned Level) { 139 // We don't need a shuffle if we just want to have element 0 in position 0 of 140 // the vector. 141 if (!SI && Level == 0 && IsLeft) 142 return true; 143 else if (!SI) 144 return false; 145 146 SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1); 147 148 // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether 149 // we look at the left or right side. 150 for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2) 151 Mask[i] = val; 152 153 SmallVector<int, 16> ActualMask = SI->getShuffleMask(); 154 if (Mask != ActualMask) 155 return false; 156 157 return true; 158} 159 160static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp, 161 unsigned Level, unsigned NumLevels) { 162 // Match one level of pairwise operations. 163 // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, 164 // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> 165 // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, 166 // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> 167 // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 168 if (BinOp == nullptr) 169 return false; 170 171 assert(BinOp->getType()->isVectorTy() && "Expecting a vector type"); 172 173 unsigned Opcode = BinOp->getOpcode(); 174 Value *L = BinOp->getOperand(0); 175 Value *R = BinOp->getOperand(1); 176 177 ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(L); 178 if (!LS && Level) 179 return false; 180 ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(R); 181 if (!RS && Level) 182 return false; 183 184 // On level 0 we can omit one shufflevector instruction. 185 if (!Level && !RS && !LS) 186 return false; 187 188 // Shuffle inputs must match. 189 Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr; 190 Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr; 191 Value *NextLevelOp = nullptr; 192 if (NextLevelOpR && NextLevelOpL) { 193 // If we have two shuffles their operands must match. 194 if (NextLevelOpL != NextLevelOpR) 195 return false; 196 197 NextLevelOp = NextLevelOpL; 198 } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) { 199 // On the first level we can omit the shufflevector <0, undef,...>. So the 200 // input to the other shufflevector <1, undef> must match with one of the 201 // inputs to the current binary operation. 202 // Example: 203 // %NextLevelOpL = shufflevector %R, <1, undef ...> 204 // %BinOp = fadd %NextLevelOpL, %R 205 if (NextLevelOpL && NextLevelOpL != R) 206 return false; 207 else if (NextLevelOpR && NextLevelOpR != L) 208 return false; 209 210 NextLevelOp = NextLevelOpL ? R : L; 211 } else 212 return false; 213 214 // Check that the next levels binary operation exists and matches with the 215 // current one. 216 BinaryOperator *NextLevelBinOp = nullptr; 217 if (Level + 1 != NumLevels) { 218 if (!(NextLevelBinOp = dyn_cast<BinaryOperator>(NextLevelOp))) 219 return false; 220 else if (NextLevelBinOp->getOpcode() != Opcode) 221 return false; 222 } 223 224 // Shuffle mask for pairwise operation must match. 225 if (matchPairwiseShuffleMask(LS, true, Level)) { 226 if (!matchPairwiseShuffleMask(RS, false, Level)) 227 return false; 228 } else if (matchPairwiseShuffleMask(RS, true, Level)) { 229 if (!matchPairwiseShuffleMask(LS, false, Level)) 230 return false; 231 } else 232 return false; 233 234 if (++Level == NumLevels) 235 return true; 236 237 // Match next level. 238 return matchPairwiseReductionAtLevel(NextLevelBinOp, Level, NumLevels); 239} 240 241static bool matchPairwiseReduction(const ExtractElementInst *ReduxRoot, 242 unsigned &Opcode, Type *&Ty) { 243 if (!EnableReduxCost) 244 return false; 245 246 // Need to extract the first element. 247 ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); 248 unsigned Idx = ~0u; 249 if (CI) 250 Idx = CI->getZExtValue(); 251 if (Idx != 0) 252 return false; 253 254 BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0)); 255 if (!RdxStart) 256 return false; 257 258 Type *VecTy = ReduxRoot->getOperand(0)->getType(); 259 unsigned NumVecElems = VecTy->getVectorNumElements(); 260 if (!isPowerOf2_32(NumVecElems)) 261 return false; 262 263 // We look for a sequence of shuffle,shuffle,add triples like the following 264 // that builds a pairwise reduction tree. 265 // 266 // (X0, X1, X2, X3) 267 // (X0 + X1, X2 + X3, undef, undef) 268 // ((X0 + X1) + (X2 + X3), undef, undef, undef) 269 // 270 // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, 271 // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> 272 // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, 273 // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> 274 // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 275 // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, 276 // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 277 // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, 278 // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 279 // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 280 // %r = extractelement <4 x float> %bin.rdx8, i32 0 281 if (!matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems))) 282 return false; 283 284 Opcode = RdxStart->getOpcode(); 285 Ty = VecTy; 286 287 return true; 288} 289 290static std::pair<Value *, ShuffleVectorInst *> 291getShuffleAndOtherOprd(BinaryOperator *B) { 292 293 Value *L = B->getOperand(0); 294 Value *R = B->getOperand(1); 295 ShuffleVectorInst *S = nullptr; 296 297 if ((S = dyn_cast<ShuffleVectorInst>(L))) 298 return std::make_pair(R, S); 299 300 S = dyn_cast<ShuffleVectorInst>(R); 301 return std::make_pair(L, S); 302} 303 304static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, 305 unsigned &Opcode, Type *&Ty) { 306 if (!EnableReduxCost) 307 return false; 308 309 // Need to extract the first element. 310 ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); 311 unsigned Idx = ~0u; 312 if (CI) 313 Idx = CI->getZExtValue(); 314 if (Idx != 0) 315 return false; 316 317 BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0)); 318 if (!RdxStart) 319 return false; 320 unsigned RdxOpcode = RdxStart->getOpcode(); 321 322 Type *VecTy = ReduxRoot->getOperand(0)->getType(); 323 unsigned NumVecElems = VecTy->getVectorNumElements(); 324 if (!isPowerOf2_32(NumVecElems)) 325 return false; 326 327 // We look for a sequence of shuffles and adds like the following matching one 328 // fadd, shuffle vector pair at a time. 329 // 330 // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, 331 // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 332 // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf 333 // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, 334 // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 335 // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 336 // %r = extractelement <4 x float> %bin.rdx8, i32 0 337 338 unsigned MaskStart = 1; 339 Value *RdxOp = RdxStart; 340 SmallVector<int, 32> ShuffleMask(NumVecElems, 0); 341 unsigned NumVecElemsRemain = NumVecElems; 342 while (NumVecElemsRemain - 1) { 343 // Check for the right reduction operation. 344 BinaryOperator *BinOp; 345 if (!(BinOp = dyn_cast<BinaryOperator>(RdxOp))) 346 return false; 347 if (BinOp->getOpcode() != RdxOpcode) 348 return false; 349 350 Value *NextRdxOp; 351 ShuffleVectorInst *Shuffle; 352 std::tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp); 353 354 // Check the current reduction operation and the shuffle use the same value. 355 if (Shuffle == nullptr) 356 return false; 357 if (Shuffle->getOperand(0) != NextRdxOp) 358 return false; 359 360 // Check that shuffle masks matches. 361 for (unsigned j = 0; j != MaskStart; ++j) 362 ShuffleMask[j] = MaskStart + j; 363 // Fill the rest of the mask with -1 for undef. 364 std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1); 365 366 SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); 367 if (ShuffleMask != Mask) 368 return false; 369 370 RdxOp = NextRdxOp; 371 NumVecElemsRemain /= 2; 372 MaskStart *= 2; 373 } 374 375 Opcode = RdxOpcode; 376 Ty = VecTy; 377 return true; 378} 379 380unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { 381 if (!TTI) 382 return -1; 383 384 switch (I->getOpcode()) { 385 case Instruction::GetElementPtr:{ 386 Type *ValTy = I->getOperand(0)->getType()->getPointerElementType(); 387 return TTI->getAddressComputationCost(ValTy); 388 } 389 390 case Instruction::Ret: 391 case Instruction::PHI: 392 case Instruction::Br: { 393 return TTI->getCFInstrCost(I->getOpcode()); 394 } 395 case Instruction::Add: 396 case Instruction::FAdd: 397 case Instruction::Sub: 398 case Instruction::FSub: 399 case Instruction::Mul: 400 case Instruction::FMul: 401 case Instruction::UDiv: 402 case Instruction::SDiv: 403 case Instruction::FDiv: 404 case Instruction::URem: 405 case Instruction::SRem: 406 case Instruction::FRem: 407 case Instruction::Shl: 408 case Instruction::LShr: 409 case Instruction::AShr: 410 case Instruction::And: 411 case Instruction::Or: 412 case Instruction::Xor: { 413 TargetTransformInfo::OperandValueKind Op1VK = 414 getOperandInfo(I->getOperand(0)); 415 TargetTransformInfo::OperandValueKind Op2VK = 416 getOperandInfo(I->getOperand(1)); 417 return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, 418 Op2VK); 419 } 420 case Instruction::Select: { 421 const SelectInst *SI = cast<SelectInst>(I); 422 Type *CondTy = SI->getCondition()->getType(); 423 return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy); 424 } 425 case Instruction::ICmp: 426 case Instruction::FCmp: { 427 Type *ValTy = I->getOperand(0)->getType(); 428 return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy); 429 } 430 case Instruction::Store: { 431 const StoreInst *SI = cast<StoreInst>(I); 432 Type *ValTy = SI->getValueOperand()->getType(); 433 return TTI->getMemoryOpCost(I->getOpcode(), ValTy, 434 SI->getAlignment(), 435 SI->getPointerAddressSpace()); 436 } 437 case Instruction::Load: { 438 const LoadInst *LI = cast<LoadInst>(I); 439 return TTI->getMemoryOpCost(I->getOpcode(), I->getType(), 440 LI->getAlignment(), 441 LI->getPointerAddressSpace()); 442 } 443 case Instruction::ZExt: 444 case Instruction::SExt: 445 case Instruction::FPToUI: 446 case Instruction::FPToSI: 447 case Instruction::FPExt: 448 case Instruction::PtrToInt: 449 case Instruction::IntToPtr: 450 case Instruction::SIToFP: 451 case Instruction::UIToFP: 452 case Instruction::Trunc: 453 case Instruction::FPTrunc: 454 case Instruction::BitCast: 455 case Instruction::AddrSpaceCast: { 456 Type *SrcTy = I->getOperand(0)->getType(); 457 return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy); 458 } 459 case Instruction::ExtractElement: { 460 const ExtractElementInst * EEI = cast<ExtractElementInst>(I); 461 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); 462 unsigned Idx = -1; 463 if (CI) 464 Idx = CI->getZExtValue(); 465 466 // Try to match a reduction sequence (series of shufflevector and vector 467 // adds followed by a extractelement). 468 unsigned ReduxOpCode; 469 Type *ReduxType; 470 471 if (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) 472 return TTI->getReductionCost(ReduxOpCode, ReduxType, false); 473 else if (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) 474 return TTI->getReductionCost(ReduxOpCode, ReduxType, true); 475 476 return TTI->getVectorInstrCost(I->getOpcode(), 477 EEI->getOperand(0)->getType(), Idx); 478 } 479 case Instruction::InsertElement: { 480 const InsertElementInst * IE = cast<InsertElementInst>(I); 481 ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); 482 unsigned Idx = -1; 483 if (CI) 484 Idx = CI->getZExtValue(); 485 return TTI->getVectorInstrCost(I->getOpcode(), 486 IE->getType(), Idx); 487 } 488 case Instruction::ShuffleVector: { 489 const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); 490 Type *VecTypOp0 = Shuffle->getOperand(0)->getType(); 491 unsigned NumVecElems = VecTypOp0->getVectorNumElements(); 492 SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); 493 494 if (NumVecElems == Mask.size()) { 495 if (isReverseVectorMask(Mask)) 496 return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 497 0, nullptr); 498 if (isAlternateVectorMask(Mask)) 499 return TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, 500 VecTypOp0, 0, nullptr); 501 } 502 503 return -1; 504 } 505 case Instruction::Call: 506 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 507 SmallVector<Type*, 4> Tys; 508 for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J) 509 Tys.push_back(II->getArgOperand(J)->getType()); 510 511 return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), 512 Tys); 513 } 514 return -1; 515 default: 516 // We don't have any information on this instruction. 517 return -1; 518 } 519} 520 521void CostModelAnalysis::print(raw_ostream &OS, const Module*) const { 522 if (!F) 523 return; 524 525 for (Function::iterator B = F->begin(), BE = F->end(); B != BE; ++B) { 526 for (BasicBlock::iterator it = B->begin(), e = B->end(); it != e; ++it) { 527 Instruction *Inst = it; 528 unsigned Cost = getInstructionCost(Inst); 529 if (Cost != (unsigned)-1) 530 OS << "Cost Model: Found an estimated cost of " << Cost; 531 else 532 OS << "Cost Model: Unknown cost"; 533 534 OS << " for instruction: "<< *Inst << "\n"; 535 } 536 } 537} 538