AMDGPUTargetTransformInfo.cpp revision dce4a407a24b04eebc6a376f8e62b41aaa7b071f
1d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis//===-- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass ---------===// 21eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump// 3f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek// The LLVM Compiler Infrastructure 4f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek// 5f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek// This file is distributed under the University of Illinois Open Source 6f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek// License. See LICENSE.TXT for details. 7f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek// 8f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek//===----------------------------------------------------------------------===// 9f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek// 10f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek// \file 118e49dd6e7e73b275a74338a5127a524f0765303cTed Kremenek// This file implements a TargetTransformInfo analysis pass specific to the 12f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek// AMDGPU target machine. It uses the target's detailed information to provide 13f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek// more precise answers to certain TTI queries, while letting the target 14f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek// independent and default TTI implementations handle the rest. 15d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis// 16d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis//===----------------------------------------------------------------------===// 17f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek 1899c6ad3f22b865d0f4cce52bc36904403c9ed4c4Ted Kremenek#include "AMDGPU.h" 195eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek#include "AMDGPUTargetMachine.h" 209b663716449b618ba0390b1dbebc54fa8e971124Ted Kremenek#include "llvm/Analysis/LoopInfo.h" 219b663716449b618ba0390b1dbebc54fa8e971124Ted Kremenek#include "llvm/Analysis/TargetTransformInfo.h" 229b663716449b618ba0390b1dbebc54fa8e971124Ted Kremenek#include "llvm/Analysis/ValueTracking.h" 23f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek#include "llvm/Support/Debug.h" 24f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek#include "llvm/Target/CostTable.h" 25f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek#include "llvm/Target/TargetLowering.h" 260111f575b968e423dccae439e501225b8314b257Zhongxing Xuusing namespace llvm; 27ca804539d908d3a0e8c72a0df5f1f571d29490bbTed Kremenek 28ca804539d908d3a0e8c72a0df5f1f571d29490bbTed Kremenek#define DEBUG_TYPE "AMDGPUtti" 299ef6537a894c33003359b1f9b9676e9178e028b7Ted Kremenek 305a4f98ff943e6a501b0fe47ade007c9bbf96cb88Argyrios Kyrtzidis// Declare the pass initialization routine locally as target-specific passes 31a19f4af7a94835ce4693bfe12d6270754e79eb56Anna Zaks// don't have a target-wide initialization entry point, and so we rely on the 32a19f4af7a94835ce4693bfe12d6270754e79eb56Anna Zaks// pass constructor initialization. 3324f1a967741ff9f8025ee23be12ba6feacc31f77Ted Kremeneknamespace llvm { 34d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidisvoid initializeAMDGPUTTIPass(PassRegistry &); 35922059dec59c7bed235da01aff75ae522a369811Ted Kremenek} 36922059dec59c7bed235da01aff75ae522a369811Ted Kremenek 37d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidisnamespace { 38922059dec59c7bed235da01aff75ae522a369811Ted Kremenek 39922059dec59c7bed235da01aff75ae522a369811Ted Kremenekclass AMDGPUTTI final : public ImmutablePass, public TargetTransformInfo { 40922059dec59c7bed235da01aff75ae522a369811Ted Kremenek const AMDGPUTargetMachine *TM; 41922059dec59c7bed235da01aff75ae522a369811Ted Kremenek const AMDGPUSubtarget *ST; 42d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis const AMDGPUTargetLowering *TLI; 43ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks 44f05aac8472d8ed081a361a218fd14d59ddc91b85Anna Zaks /// Estimate the overhead of scalarizing an instruction. Insert and Extract 45ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks /// are set if the result needs to be inserted and/or extracted from vectors. 46d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; 47d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis 48e36de1fe51c39d9161915dd3dbef880954af6476Ted Kremenekpublic: 49d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis AMDGPUTTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) { 50d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis llvm_unreachable("This pass cannot be directly constructed"); 510111f575b968e423dccae439e501225b8314b257Zhongxing Xu } 52a7a8a450d908b34fa5f569f2e694ebd4b61aae2fTom Care 53a7a8a450d908b34fa5f569f2e694ebd4b61aae2fTom Care AMDGPUTTI(const AMDGPUTargetMachine *TM) 5466750fa464ace9f8c41666c8585ec71a248c1ccaTed Kremenek : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), 55422ab7a49a9a4252dbc6350e49d7a5708337b9c7Ted Kremenek TLI(TM->getTargetLowering()) { 56422ab7a49a9a4252dbc6350e49d7a5708337b9c7Ted Kremenek initializeAMDGPUTTIPass(*PassRegistry::getPassRegistry()); 57422ab7a49a9a4252dbc6350e49d7a5708337b9c7Ted Kremenek } 58422ab7a49a9a4252dbc6350e49d7a5708337b9c7Ted Kremenek 59a7a8a450d908b34fa5f569f2e694ebd4b61aae2fTom Care void initializePass() override { pushTTIStack(this); } 60a7a8a450d908b34fa5f569f2e694ebd4b61aae2fTom Care 61d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis void getAnalysisUsage(AnalysisUsage &AU) const override { 620111f575b968e423dccae439e501225b8314b257Zhongxing Xu TargetTransformInfo::getAnalysisUsage(AU); 63f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek } 6438b02b912e1a55c912f603c4369431264d36a381Zhongxing Xu 651eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump /// Pass identification. 66f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek static char ID; 67f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek 68f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek /// Provide necessary pointer adjustments for the two base classes. 69d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis void *getAdjustedAnalysisPointer(const void *ID) override { 701eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump if (ID == &TargetTransformInfo::ID) 71d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis return (TargetTransformInfo *)this; 728e49dd6e7e73b275a74338a5127a524f0765303cTed Kremenek return this; 738e49dd6e7e73b275a74338a5127a524f0765303cTed Kremenek } 74d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis 75f598087b4adfea164acdd5b53ea2951bde740a2dTed Kremenek bool hasBranchDivergence() const override; 76f598087b4adfea164acdd5b53ea2951bde740a2dTed Kremenek 77f598087b4adfea164acdd5b53ea2951bde740a2dTed Kremenek void getUnrollingPreferences(Loop *L, 7866750fa464ace9f8c41666c8585ec71a248c1ccaTed Kremenek UnrollingPreferences &UP) const override; 79422ab7a49a9a4252dbc6350e49d7a5708337b9c7Ted Kremenek 80422ab7a49a9a4252dbc6350e49d7a5708337b9c7Ted Kremenek /// @} 81422ab7a49a9a4252dbc6350e49d7a5708337b9c7Ted Kremenek}; 82422ab7a49a9a4252dbc6350e49d7a5708337b9c7Ted Kremenek 831eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump} // end anonymous namespace 8418c66fdc3c4008d335885695fe36fb5353c5f672Ted Kremenek 8518c66fdc3c4008d335885695fe36fb5353c5f672Ted KremenekINITIALIZE_AG_PASS(AMDGPUTTI, TargetTransformInfo, "AMDGPUtti", 869c378f705405d37f49795d5e915989de774fe11fTed Kremenek "AMDGPU Target Transform Info", true, true, false) 871eb4433ac451dc16f4133a88af2d002ac26c58efMike Stumpchar AMDGPUTTI::ID = 0; 889c378f705405d37f49795d5e915989de774fe11fTed Kremenek 899c378f705405d37f49795d5e915989de774fe11fTed KremenekImmutablePass * 909c378f705405d37f49795d5e915989de774fe11fTed Kremenekllvm::createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM) { 919c378f705405d37f49795d5e915989de774fe11fTed Kremenek return new AMDGPUTTI(TM); 921eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump} 939c378f705405d37f49795d5e915989de774fe11fTed Kremenek 949c378f705405d37f49795d5e915989de774fe11fTed Kremenekbool AMDGPUTTI::hasBranchDivergence() const { return true; } 95102acd5369bbb17c0d6ab868af376671acff7a93Douglas Gregor 96102acd5369bbb17c0d6ab868af376671acff7a93Douglas Gregorvoid AMDGPUTTI::getUnrollingPreferences(Loop *L, 97102acd5369bbb17c0d6ab868af376671acff7a93Douglas Gregor UnrollingPreferences &UP) const { 980111f575b968e423dccae439e501225b8314b257Zhongxing Xu for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end(); 99f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek BI != BE; ++BI) { 100d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis BasicBlock *BB = *BI; 101d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); 1021eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump I != E; ++I) { 103dd7ddf2b2296f95e7591ca3f9791f0eb9a15ee42Anna Zaks const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I); 104dd7ddf2b2296f95e7591ca3f9791f0eb9a15ee42Anna Zaks if (!GEP) 105dd7ddf2b2296f95e7591ca3f9791f0eb9a15ee42Anna Zaks continue; 1064d2ae4a70336dc2aa11389b34946be152bb454c9Anna Zaks const Value *Ptr = GEP->getPointerOperand(); 1074d2ae4a70336dc2aa11389b34946be152bb454c9Anna Zaks const AllocaInst *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr)); 108f24af5bc2e01ca8e7396ed997378a77fddfa521eTed Kremenek if (Alloca) { 109d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis // We want to do whatever we can to limit the number of alloca 1100111f575b968e423dccae439e501225b8314b257Zhongxing Xu // instructions that make it through to the code generator. allocas 111d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis // require us to use indirect addressing, which is slow and prone to 112d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis // compiler bugs. If this loop does an address calculation on an 11355825aa2d88fe82bf3622f195046ae48532d3106Ted Kremenek // alloca ptr, then we want to use a higher than normal loop unroll 114f598087b4adfea164acdd5b53ea2951bde740a2dTed Kremenek // threshold. This will give SROA a better chance to eliminate these 1150111f575b968e423dccae439e501225b8314b257Zhongxing Xu // allocas. 116d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis // 1170111f575b968e423dccae439e501225b8314b257Zhongxing Xu // Don't use the maximum allowed value here as it will make some 118d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis // programs way too big. 119d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis UP.Threshold = 500; 120d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis } 121f598087b4adfea164acdd5b53ea2951bde740a2dTed Kremenek } 1220111f575b968e423dccae439e501225b8314b257Zhongxing Xu } 123d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis} 1240111f575b968e423dccae439e501225b8314b257Zhongxing Xu