157e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard//===-- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass ---------===// 257e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard// 357e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard// The LLVM Compiler Infrastructure 457e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard// 557e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard// This file is distributed under the University of Illinois Open Source 657e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard// License. See LICENSE.TXT for details. 757e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard// 857e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard//===----------------------------------------------------------------------===// 957e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard// 1057e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard// \file 1157e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard// This file implements a TargetTransformInfo analysis pass specific to the 1257e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard// AMDGPU target machine. It uses the target's detailed information to provide 1357e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard// more precise answers to certain TTI queries, while letting the target 1457e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard// independent and default TTI implementations handle the rest. 1557e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard// 1657e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard//===----------------------------------------------------------------------===// 1757e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard 1857e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard#include "AMDGPU.h" 1957e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard#include "AMDGPUTargetMachine.h" 2036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/Analysis/LoopInfo.h" 2157e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard#include "llvm/Analysis/TargetTransformInfo.h" 2236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/Analysis/ValueTracking.h" 2357e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard#include "llvm/Support/Debug.h" 2457e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard#include "llvm/Target/CostTable.h" 2536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/Target/TargetLowering.h" 2657e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellardusing namespace llvm; 2757e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard 28dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define DEBUG_TYPE "AMDGPUtti" 29dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 3057e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard// Declare the pass initialization routine locally as target-specific passes 3157e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard// don't have a target-wide initialization entry point, and so we rely on the 3257e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard// pass constructor initialization. 3357e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellardnamespace llvm { 3457e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellardvoid initializeAMDGPUTTIPass(PassRegistry &); 3557e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard} 3657e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard 3757e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellardnamespace { 3857e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard 3936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesclass AMDGPUTTI final : public ImmutablePass, public TargetTransformInfo { 4057e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard const AMDGPUTargetMachine *TM; 4157e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard const AMDGPUSubtarget *ST; 4257e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard const AMDGPUTargetLowering *TLI; 4357e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard 4457e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard /// Estimate the overhead of scalarizing an instruction. Insert and Extract 4557e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard /// are set if the result needs to be inserted and/or extracted from vectors. 4657e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; 4757e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard 4857e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellardpublic: 49dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines AMDGPUTTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) { 5057e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard llvm_unreachable("This pass cannot be directly constructed"); 5157e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard } 5257e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard 5357e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard AMDGPUTTI(const AMDGPUTargetMachine *TM) 5457e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), 5557e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard TLI(TM->getTargetLowering()) { 5657e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard initializeAMDGPUTTIPass(*PassRegistry::getPassRegistry()); 5757e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard } 5857e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard 59dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines void initializePass() override { pushTTIStack(this); } 6057e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard 61dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines void getAnalysisUsage(AnalysisUsage &AU) const override { 6257e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard TargetTransformInfo::getAnalysisUsage(AU); 6357e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard } 6457e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard 6557e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard /// Pass identification. 6657e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard static char ID; 6757e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard 6857e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard /// Provide necessary pointer adjustments for the two base classes. 69dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines void *getAdjustedAnalysisPointer(const void *ID) override { 7057e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard if (ID == &TargetTransformInfo::ID) 7157e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard return (TargetTransformInfo *)this; 7257e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard return this; 7357e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard } 7457e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard 75dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines bool hasBranchDivergence() const override; 7636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 77dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines void getUnrollingPreferences(Loop *L, 78dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines UnrollingPreferences &UP) const override; 7957e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard 8057e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard /// @} 8157e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard}; 8257e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard 8357e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard} // end anonymous namespace 8457e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard 8557e6b2d1f3de0bf459e96f7038e692d624f7e580Tom StellardINITIALIZE_AG_PASS(AMDGPUTTI, TargetTransformInfo, "AMDGPUtti", 8657e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard "AMDGPU Target Transform Info", true, true, false) 8757e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellardchar AMDGPUTTI::ID = 0; 8857e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard 8957e6b2d1f3de0bf459e96f7038e692d624f7e580Tom StellardImmutablePass * 9057e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellardllvm::createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM) { 9157e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard return new AMDGPUTTI(TM); 9257e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard} 9357e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard 9457e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellardbool AMDGPUTTI::hasBranchDivergence() const { return true; } 9536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 9636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid AMDGPUTTI::getUnrollingPreferences(Loop *L, 9736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines UnrollingPreferences &UP) const { 9836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end(); 9936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines BI != BE; ++BI) { 10036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines BasicBlock *BB = *BI; 10136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); 10236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines I != E; ++I) { 10336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I); 10436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (!GEP) 10536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines continue; 10636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const Value *Ptr = GEP->getPointerOperand(); 10736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const AllocaInst *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr)); 10836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (Alloca) { 10936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // We want to do whatever we can to limit the number of alloca 11036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // instructions that make it through to the code generator. allocas 11136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // require us to use indirect addressing, which is slow and prone to 11236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // compiler bugs. If this loop does an address calculation on an 11336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // alloca ptr, then we want to use a higher than normal loop unroll 114dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // threshold. This will give SROA a better chance to eliminate these 115dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // allocas. 116dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // 117dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Don't use the maximum allowed value here as it will make some 118dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // programs way too big. 11936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines UP.Threshold = 500; 12036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 12136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 12236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 12336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 124