AMDGPUTargetTransformInfo.cpp revision dce4a407a24b04eebc6a376f8e62b41aaa7b071f
1d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis//===-- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass ---------===//
21eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump//
3f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek//                     The LLVM Compiler Infrastructure
4f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek//
5f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek// This file is distributed under the University of Illinois Open Source
6f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek// License. See LICENSE.TXT for details.
7f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek//
8f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek//===----------------------------------------------------------------------===//
9f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek//
10f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek// \file
118e49dd6e7e73b275a74338a5127a524f0765303cTed Kremenek// This file implements a TargetTransformInfo analysis pass specific to the
12f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek// AMDGPU target machine. It uses the target's detailed information to provide
13f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek// more precise answers to certain TTI queries, while letting the target
14f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek// independent and default TTI implementations handle the rest.
15d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis//
16d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis//===----------------------------------------------------------------------===//
17f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek
1899c6ad3f22b865d0f4cce52bc36904403c9ed4c4Ted Kremenek#include "AMDGPU.h"
195eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek#include "AMDGPUTargetMachine.h"
209b663716449b618ba0390b1dbebc54fa8e971124Ted Kremenek#include "llvm/Analysis/LoopInfo.h"
219b663716449b618ba0390b1dbebc54fa8e971124Ted Kremenek#include "llvm/Analysis/TargetTransformInfo.h"
229b663716449b618ba0390b1dbebc54fa8e971124Ted Kremenek#include "llvm/Analysis/ValueTracking.h"
23f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek#include "llvm/Support/Debug.h"
24f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek#include "llvm/Target/CostTable.h"
25f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek#include "llvm/Target/TargetLowering.h"
260111f575b968e423dccae439e501225b8314b257Zhongxing Xuusing namespace llvm;
27ca804539d908d3a0e8c72a0df5f1f571d29490bbTed Kremenek
28ca804539d908d3a0e8c72a0df5f1f571d29490bbTed Kremenek#define DEBUG_TYPE "AMDGPUtti"
299ef6537a894c33003359b1f9b9676e9178e028b7Ted Kremenek
305a4f98ff943e6a501b0fe47ade007c9bbf96cb88Argyrios Kyrtzidis// Declare the pass initialization routine locally as target-specific passes
31a19f4af7a94835ce4693bfe12d6270754e79eb56Anna Zaks// don't have a target-wide initialization entry point, and so we rely on the
32a19f4af7a94835ce4693bfe12d6270754e79eb56Anna Zaks// pass constructor initialization.
3324f1a967741ff9f8025ee23be12ba6feacc31f77Ted Kremeneknamespace llvm {
34d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidisvoid initializeAMDGPUTTIPass(PassRegistry &);
35922059dec59c7bed235da01aff75ae522a369811Ted Kremenek}
36922059dec59c7bed235da01aff75ae522a369811Ted Kremenek
37d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidisnamespace {
38922059dec59c7bed235da01aff75ae522a369811Ted Kremenek
39922059dec59c7bed235da01aff75ae522a369811Ted Kremenekclass AMDGPUTTI final : public ImmutablePass, public TargetTransformInfo {
40922059dec59c7bed235da01aff75ae522a369811Ted Kremenek  const AMDGPUTargetMachine *TM;
41922059dec59c7bed235da01aff75ae522a369811Ted Kremenek  const AMDGPUSubtarget *ST;
42d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis  const AMDGPUTargetLowering *TLI;
43ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks
44f05aac8472d8ed081a361a218fd14d59ddc91b85Anna Zaks  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
45ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks  /// are set if the result needs to be inserted and/or extracted from vectors.
46d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
47d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis
48e36de1fe51c39d9161915dd3dbef880954af6476Ted Kremenekpublic:
49d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis  AMDGPUTTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) {
50d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis    llvm_unreachable("This pass cannot be directly constructed");
510111f575b968e423dccae439e501225b8314b257Zhongxing Xu  }
52a7a8a450d908b34fa5f569f2e694ebd4b61aae2fTom Care
53a7a8a450d908b34fa5f569f2e694ebd4b61aae2fTom Care  AMDGPUTTI(const AMDGPUTargetMachine *TM)
5466750fa464ace9f8c41666c8585ec71a248c1ccaTed Kremenek      : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
55422ab7a49a9a4252dbc6350e49d7a5708337b9c7Ted Kremenek        TLI(TM->getTargetLowering()) {
56422ab7a49a9a4252dbc6350e49d7a5708337b9c7Ted Kremenek    initializeAMDGPUTTIPass(*PassRegistry::getPassRegistry());
57422ab7a49a9a4252dbc6350e49d7a5708337b9c7Ted Kremenek  }
58422ab7a49a9a4252dbc6350e49d7a5708337b9c7Ted Kremenek
59a7a8a450d908b34fa5f569f2e694ebd4b61aae2fTom Care  void initializePass() override { pushTTIStack(this); }
60a7a8a450d908b34fa5f569f2e694ebd4b61aae2fTom Care
61d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis  void getAnalysisUsage(AnalysisUsage &AU) const override {
620111f575b968e423dccae439e501225b8314b257Zhongxing Xu    TargetTransformInfo::getAnalysisUsage(AU);
63f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek  }
6438b02b912e1a55c912f603c4369431264d36a381Zhongxing Xu
651eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  /// Pass identification.
66f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek  static char ID;
67f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek
68f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek  /// Provide necessary pointer adjustments for the two base classes.
69d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis  void *getAdjustedAnalysisPointer(const void *ID) override {
701eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    if (ID == &TargetTransformInfo::ID)
71d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis      return (TargetTransformInfo *)this;
728e49dd6e7e73b275a74338a5127a524f0765303cTed Kremenek    return this;
738e49dd6e7e73b275a74338a5127a524f0765303cTed Kremenek  }
74d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis
75f598087b4adfea164acdd5b53ea2951bde740a2dTed Kremenek  bool hasBranchDivergence() const override;
76f598087b4adfea164acdd5b53ea2951bde740a2dTed Kremenek
77f598087b4adfea164acdd5b53ea2951bde740a2dTed Kremenek  void getUnrollingPreferences(Loop *L,
7866750fa464ace9f8c41666c8585ec71a248c1ccaTed Kremenek                               UnrollingPreferences &UP) const override;
79422ab7a49a9a4252dbc6350e49d7a5708337b9c7Ted Kremenek
80422ab7a49a9a4252dbc6350e49d7a5708337b9c7Ted Kremenek  /// @}
81422ab7a49a9a4252dbc6350e49d7a5708337b9c7Ted Kremenek};
82422ab7a49a9a4252dbc6350e49d7a5708337b9c7Ted Kremenek
831eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump} // end anonymous namespace
8418c66fdc3c4008d335885695fe36fb5353c5f672Ted Kremenek
8518c66fdc3c4008d335885695fe36fb5353c5f672Ted KremenekINITIALIZE_AG_PASS(AMDGPUTTI, TargetTransformInfo, "AMDGPUtti",
869c378f705405d37f49795d5e915989de774fe11fTed Kremenek                   "AMDGPU Target Transform Info", true, true, false)
871eb4433ac451dc16f4133a88af2d002ac26c58efMike Stumpchar AMDGPUTTI::ID = 0;
889c378f705405d37f49795d5e915989de774fe11fTed Kremenek
899c378f705405d37f49795d5e915989de774fe11fTed KremenekImmutablePass *
909c378f705405d37f49795d5e915989de774fe11fTed Kremenekllvm::createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM) {
919c378f705405d37f49795d5e915989de774fe11fTed Kremenek  return new AMDGPUTTI(TM);
921eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump}
939c378f705405d37f49795d5e915989de774fe11fTed Kremenek
949c378f705405d37f49795d5e915989de774fe11fTed Kremenekbool AMDGPUTTI::hasBranchDivergence() const { return true; }
95102acd5369bbb17c0d6ab868af376671acff7a93Douglas Gregor
96102acd5369bbb17c0d6ab868af376671acff7a93Douglas Gregorvoid AMDGPUTTI::getUnrollingPreferences(Loop *L,
97102acd5369bbb17c0d6ab868af376671acff7a93Douglas Gregor                                        UnrollingPreferences &UP) const {
980111f575b968e423dccae439e501225b8314b257Zhongxing Xu  for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();
99f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek                                                  BI != BE; ++BI) {
100d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis    BasicBlock *BB = *BI;
101d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
1021eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump                                                      I != E; ++I) {
103dd7ddf2b2296f95e7591ca3f9791f0eb9a15ee42Anna Zaks      const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I);
104dd7ddf2b2296f95e7591ca3f9791f0eb9a15ee42Anna Zaks      if (!GEP)
105dd7ddf2b2296f95e7591ca3f9791f0eb9a15ee42Anna Zaks        continue;
1064d2ae4a70336dc2aa11389b34946be152bb454c9Anna Zaks      const Value *Ptr = GEP->getPointerOperand();
1074d2ae4a70336dc2aa11389b34946be152bb454c9Anna Zaks      const AllocaInst *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr));
108f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek      if (Alloca) {
109d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis        // We want to do whatever we can to limit the number of alloca
1100111f575b968e423dccae439e501225b8314b257Zhongxing Xu        // instructions that make it through to the code generator.  allocas
111d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis        // require us to use indirect addressing, which is slow and prone to
112d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis        // compiler bugs.  If this loop does an address calculation on an
11355825aa2d88fe82bf3622f195046ae48532d3106Ted Kremenek        // alloca ptr, then we want to use a higher than normal loop unroll
114f598087b4adfea164acdd5b53ea2951bde740a2dTed Kremenek        // threshold. This will give SROA a better chance to eliminate these
1150111f575b968e423dccae439e501225b8314b257Zhongxing Xu        // allocas.
116d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis        //
1170111f575b968e423dccae439e501225b8314b257Zhongxing Xu        // Don't use the maximum allowed value here as it will make some
118d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis        // programs way too big.
119d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis        UP.Threshold = 500;
120d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis      }
121f598087b4adfea164acdd5b53ea2951bde740a2dTed Kremenek    }
1220111f575b968e423dccae439e501225b8314b257Zhongxing Xu  }
123d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis}
1240111f575b968e423dccae439e501225b8314b257Zhongxing Xu