157e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard//===-- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass ---------===//
257e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard//
357e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard//                     The LLVM Compiler Infrastructure
457e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard//
557e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard// This file is distributed under the University of Illinois Open Source
657e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard// License. See LICENSE.TXT for details.
757e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard//
857e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard//===----------------------------------------------------------------------===//
957e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard//
1057e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard// \file
1157e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard// This file implements a TargetTransformInfo analysis pass specific to the
1257e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard// AMDGPU target machine. It uses the target's detailed information to provide
1357e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard// more precise answers to certain TTI queries, while letting the target
1457e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard// independent and default TTI implementations handle the rest.
1557e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard//
1657e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard//===----------------------------------------------------------------------===//
1757e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard
1857e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard#include "AMDGPU.h"
1957e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard#include "AMDGPUTargetMachine.h"
2036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/Analysis/LoopInfo.h"
2157e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard#include "llvm/Analysis/TargetTransformInfo.h"
2236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/Analysis/ValueTracking.h"
2357e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard#include "llvm/Support/Debug.h"
2457e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard#include "llvm/Target/CostTable.h"
2536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/Target/TargetLowering.h"
2657e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellardusing namespace llvm;
2757e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard
28dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define DEBUG_TYPE "AMDGPUtti"
29dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
3057e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard// Declare the pass initialization routine locally as target-specific passes
3157e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard// don't have a target-wide initialization entry point, and so we rely on the
3257e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard// pass constructor initialization.
3357e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellardnamespace llvm {
3457e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellardvoid initializeAMDGPUTTIPass(PassRegistry &);
3557e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard}
3657e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard
3757e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellardnamespace {
3857e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard
3936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesclass AMDGPUTTI final : public ImmutablePass, public TargetTransformInfo {
4057e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard  const AMDGPUTargetMachine *TM;
4157e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard  const AMDGPUSubtarget *ST;
4257e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard  const AMDGPUTargetLowering *TLI;
4357e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard
4457e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
4557e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard  /// are set if the result needs to be inserted and/or extracted from vectors.
4657e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
4757e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard
4857e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellardpublic:
49dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  AMDGPUTTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) {
5057e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard    llvm_unreachable("This pass cannot be directly constructed");
5157e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard  }
5257e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard
5357e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard  AMDGPUTTI(const AMDGPUTargetMachine *TM)
5457e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard      : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
5557e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard        TLI(TM->getTargetLowering()) {
5657e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard    initializeAMDGPUTTIPass(*PassRegistry::getPassRegistry());
5757e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard  }
5857e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard
59dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  void initializePass() override { pushTTIStack(this); }
6057e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard
61dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  void getAnalysisUsage(AnalysisUsage &AU) const override {
6257e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard    TargetTransformInfo::getAnalysisUsage(AU);
6357e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard  }
6457e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard
6557e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard  /// Pass identification.
6657e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard  static char ID;
6757e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard
6857e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard  /// Provide necessary pointer adjustments for the two base classes.
69dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  void *getAdjustedAnalysisPointer(const void *ID) override {
7057e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard    if (ID == &TargetTransformInfo::ID)
7157e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard      return (TargetTransformInfo *)this;
7257e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard    return this;
7357e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard  }
7457e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard
75dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  bool hasBranchDivergence() const override;
7636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
77dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  void getUnrollingPreferences(Loop *L,
78dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                               UnrollingPreferences &UP) const override;
7957e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard
8057e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard  /// @}
8157e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard};
8257e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard
8357e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard} // end anonymous namespace
8457e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard
8557e6b2d1f3de0bf459e96f7038e692d624f7e580Tom StellardINITIALIZE_AG_PASS(AMDGPUTTI, TargetTransformInfo, "AMDGPUtti",
8657e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard                   "AMDGPU Target Transform Info", true, true, false)
8757e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellardchar AMDGPUTTI::ID = 0;
8857e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard
8957e6b2d1f3de0bf459e96f7038e692d624f7e580Tom StellardImmutablePass *
9057e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellardllvm::createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM) {
9157e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard  return new AMDGPUTTI(TM);
9257e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard}
9357e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellard
9457e6b2d1f3de0bf459e96f7038e692d624f7e580Tom Stellardbool AMDGPUTTI::hasBranchDivergence() const { return true; }
9536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
9636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid AMDGPUTTI::getUnrollingPreferences(Loop *L,
9736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                        UnrollingPreferences &UP) const {
9836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();
9936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                                  BI != BE; ++BI) {
10036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    BasicBlock *BB = *BI;
10136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
10236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                                      I != E; ++I) {
10336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I);
10436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      if (!GEP)
10536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        continue;
10636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      const Value *Ptr = GEP->getPointerOperand();
10736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      const AllocaInst *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr));
10836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      if (Alloca) {
10936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // We want to do whatever we can to limit the number of alloca
11036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // instructions that make it through to the code generator.  allocas
11136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // require us to use indirect addressing, which is slow and prone to
11236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // compiler bugs.  If this loop does an address calculation on an
11336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // alloca ptr, then we want to use a higher than normal loop unroll
114dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        // threshold. This will give SROA a better chance to eliminate these
115dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        // allocas.
116dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        //
117dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        // Don't use the maximum allowed value here as it will make some
118dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        // programs way too big.
11936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        UP.Threshold = 500;
12036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      }
12136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    }
12236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
12336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
124