AMDGPUTargetTransformInfo.cpp revision 36b56886974eae4f9c5ebc96befd3e7bfe5de338
1//===-- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass ---------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// \file 11// This file implements a TargetTransformInfo analysis pass specific to the 12// AMDGPU target machine. It uses the target's detailed information to provide 13// more precise answers to certain TTI queries, while letting the target 14// independent and default TTI implementations handle the rest. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "AMDGPUtti" 19#include "AMDGPU.h" 20#include "AMDGPUTargetMachine.h" 21#include "llvm/Analysis/LoopInfo.h" 22#include "llvm/Analysis/TargetTransformInfo.h" 23#include "llvm/Analysis/ValueTracking.h" 24#include "llvm/Support/Debug.h" 25#include "llvm/Target/CostTable.h" 26#include "llvm/Target/TargetLowering.h" 27using namespace llvm; 28 29// Declare the pass initialization routine locally as target-specific passes 30// don't have a target-wide initialization entry point, and so we rely on the 31// pass constructor initialization. 32namespace llvm { 33void initializeAMDGPUTTIPass(PassRegistry &); 34} 35 36namespace { 37 38class AMDGPUTTI final : public ImmutablePass, public TargetTransformInfo { 39 const AMDGPUTargetMachine *TM; 40 const AMDGPUSubtarget *ST; 41 const AMDGPUTargetLowering *TLI; 42 43 /// Estimate the overhead of scalarizing an instruction. Insert and Extract 44 /// are set if the result needs to be inserted and/or extracted from vectors. 45 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; 46 47public: 48 AMDGPUTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { 49 llvm_unreachable("This pass cannot be directly constructed"); 50 } 51 52 AMDGPUTTI(const AMDGPUTargetMachine *TM) 53 : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), 54 TLI(TM->getTargetLowering()) { 55 initializeAMDGPUTTIPass(*PassRegistry::getPassRegistry()); 56 } 57 58 virtual void initializePass() override { pushTTIStack(this); } 59 60 virtual void getAnalysisUsage(AnalysisUsage &AU) const override { 61 TargetTransformInfo::getAnalysisUsage(AU); 62 } 63 64 /// Pass identification. 65 static char ID; 66 67 /// Provide necessary pointer adjustments for the two base classes. 68 virtual void *getAdjustedAnalysisPointer(const void *ID) override { 69 if (ID == &TargetTransformInfo::ID) 70 return (TargetTransformInfo *)this; 71 return this; 72 } 73 74 virtual bool hasBranchDivergence() const override; 75 76 virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const; 77 78 /// @} 79}; 80 81} // end anonymous namespace 82 83INITIALIZE_AG_PASS(AMDGPUTTI, TargetTransformInfo, "AMDGPUtti", 84 "AMDGPU Target Transform Info", true, true, false) 85char AMDGPUTTI::ID = 0; 86 87ImmutablePass * 88llvm::createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM) { 89 return new AMDGPUTTI(TM); 90} 91 92bool AMDGPUTTI::hasBranchDivergence() const { return true; } 93 94void AMDGPUTTI::getUnrollingPreferences(Loop *L, 95 UnrollingPreferences &UP) const { 96 for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end(); 97 BI != BE; ++BI) { 98 BasicBlock *BB = *BI; 99 for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); 100 I != E; ++I) { 101 const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I); 102 if (!GEP) 103 continue; 104 const Value *Ptr = GEP->getPointerOperand(); 105 const AllocaInst *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr)); 106 if (Alloca) { 107 // We want to do whatever we can to limit the number of alloca 108 // instructions that make it through to the code generator. allocas 109 // require us to use indirect addressing, which is slow and prone to 110 // compiler bugs. If this loop does an address calculation on an 111 // alloca ptr, then we want to use a higher than normal loop unroll 112 // threshold. This will give SROA a better chance to eliminate these 113 // allocas. 114 // 115 // Don't use the maximum allowed value here as it will make some 116 // programs way too big. 117 UP.Threshold = 500; 118 } 119 } 120 } 121} 122