1dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//===-- NVPTXFavorNonGenericAddrSpace.cpp - ---------------------*- C++ -*-===// 2dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// 3dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// The LLVM Compiler Infrastructure 4dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// 5dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// This file is distributed under the University of Illinois Open Source 6dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// License. See LICENSE.TXT for details. 7dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// 8dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//===----------------------------------------------------------------------===// 9dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// 10dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// When a load/store accesses the generic address space, checks whether the 11dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// address is casted from a non-generic address space. If so, remove this 12dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// addrspacecast because accessing non-generic address spaces is typically 13dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// faster. Besides seeking addrspacecasts, this optimization also traces into 14dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// the base pointer of a GEP. 15dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// 16dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// For instance, the code below loads a float from an array allocated in 17dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// addrspace(3). 18dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// 19dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]* 20dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// %1 = gep [10 x float]* %0, i64 0, i64 %i 21dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// %2 = load float* %1 ; emits ld.f32 22dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// 23dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// First, function hoistAddrSpaceCastFromGEP reorders the addrspacecast 24dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// and the GEP to expose more optimization opportunities to function 25dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// optimizeMemoryInst. The intermediate code looks like: 26dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// 27dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i 28dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// %1 = addrspacecast float addrspace(3)* %0 to float* 29dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// %2 = load float* %1 ; still emits ld.f32, but will be optimized shortly 30dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// 31dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// Then, function optimizeMemoryInstruction detects a load from addrspacecast'ed 32dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// generic pointers, and folds the load and the addrspacecast into a load from 33dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// the original address space. The final code looks like: 34dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// 35dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i 36dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// %2 = load float addrspace(3)* %0 ; emits ld.shared.f32 37dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// 38dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// This pass may remove an addrspacecast in a different BB. Therefore, we 39dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// implement it as a FunctionPass. 40dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// 41dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//===----------------------------------------------------------------------===// 42dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 43dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "NVPTX.h" 44dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/IR/Function.h" 45dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/IR/Instructions.h" 46dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/IR/Operator.h" 47dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/Support/CommandLine.h" 48dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 49dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesusing namespace llvm; 50dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 51dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// An option to disable this optimization. Enable it by default. 52dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesstatic cl::opt<bool> DisableFavorNonGeneric( 53dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines "disable-nvptx-favor-non-generic", 54dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines cl::init(false), 55dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines cl::desc("Do not convert generic address space usage " 56dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines "to non-generic address space usage"), 57dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines cl::Hidden); 58dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 59dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesnamespace { 60dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines/// \brief NVPTXFavorNonGenericAddrSpaces 61dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesclass NVPTXFavorNonGenericAddrSpaces : public FunctionPass { 62dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinespublic: 63dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines static char ID; 64dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines NVPTXFavorNonGenericAddrSpaces() : FunctionPass(ID) {} 65dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 66dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines bool runOnFunction(Function &F) override; 67dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 68dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines /// Optimizes load/store instructions. Idx is the index of the pointer operand 69dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines /// (0 for load, and 1 for store). Returns true if it changes anything. 70dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines bool optimizeMemoryInstruction(Instruction *I, unsigned Idx); 71dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines /// Transforms "gep (addrspacecast X), indices" into "addrspacecast (gep X, 72dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines /// indices)". This reordering exposes to optimizeMemoryInstruction more 73dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines /// optimization opportunities on loads and stores. Returns true if it changes 74dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines /// the program. 75dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines bool hoistAddrSpaceCastFromGEP(GEPOperator *GEP); 76dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines}; 77dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines} 78dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 79dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hineschar NVPTXFavorNonGenericAddrSpaces::ID = 0; 80dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 81dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesnamespace llvm { 82dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesvoid initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); 83dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines} 84dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesINITIALIZE_PASS(NVPTXFavorNonGenericAddrSpaces, "nvptx-favor-non-generic", 85dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines "Remove unnecessary non-generic-to-generic addrspacecasts", 86dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines false, false) 87dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 88dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// Decides whether removing Cast is valid and beneficial. Cast can be an 89dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// instruction or a constant expression. 90dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesstatic bool IsEliminableAddrSpaceCast(Operator *Cast) { 91dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Returns false if not even an addrspacecast. 92dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (Cast->getOpcode() != Instruction::AddrSpaceCast) 93dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return false; 94dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 95dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Value *Src = Cast->getOperand(0); 96dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines PointerType *SrcTy = cast<PointerType>(Src->getType()); 97dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines PointerType *DestTy = cast<PointerType>(Cast->getType()); 98dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // TODO: For now, we only handle the case where the addrspacecast only changes 99dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // the address space but not the type. If the type also changes, we could 100dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // still get rid of the addrspacecast by adding an extra bitcast, but we 101dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // rarely see such scenarios. 102dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (SrcTy->getElementType() != DestTy->getElementType()) 103dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return false; 104dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 105dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Checks whether the addrspacecast is from a non-generic address space to the 106dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // generic address space. 107dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return (SrcTy->getAddressSpace() != AddressSpace::ADDRESS_SPACE_GENERIC && 108dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines DestTy->getAddressSpace() == AddressSpace::ADDRESS_SPACE_GENERIC); 109dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines} 110dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 111dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesbool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP( 112dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines GEPOperator *GEP) { 113dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Operator *Cast = dyn_cast<Operator>(GEP->getPointerOperand()); 114dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!Cast) 115dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return false; 116dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 117dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!IsEliminableAddrSpaceCast(Cast)) 118dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return false; 119dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 120dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end()); 121dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (Instruction *GEPI = dyn_cast<Instruction>(GEP)) { 122dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // %1 = gep (addrspacecast X), indices 123dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // => 124dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // %0 = gep X, indices 125dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // %1 = addrspacecast %0 126dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines GetElementPtrInst *NewGEPI = GetElementPtrInst::Create(Cast->getOperand(0), 127dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Indices, 128dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines GEP->getName(), 129dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines GEPI); 130dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines NewGEPI->setIsInBounds(GEP->isInBounds()); 131dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines GEP->replaceAllUsesWith( 132dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines new AddrSpaceCastInst(NewGEPI, GEP->getType(), "", GEPI)); 133dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } else { 134dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // GEP is a constant expression. 135dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Constant *NewGEPCE = ConstantExpr::getGetElementPtr( 136dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines cast<Constant>(Cast->getOperand(0)), 137dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Indices, 138dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines GEP->isInBounds()); 139dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines GEP->replaceAllUsesWith( 140dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines ConstantExpr::getAddrSpaceCast(NewGEPCE, GEP->getType())); 141dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 142dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 143dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return true; 144dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines} 145dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 146dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesbool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI, 147dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned Idx) { 148dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // If the pointer operand is a GEP, hoist the addrspacecast if any from the 149dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // GEP to expose more optimization opportunites. 150dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (GEPOperator *GEP = dyn_cast<GEPOperator>(MI->getOperand(Idx))) { 151dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines hoistAddrSpaceCastFromGEP(GEP); 152dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 153dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 154dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // load/store (addrspacecast X) => load/store X if shortcutting the 155dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // addrspacecast is valid and can improve performance. 156dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // 157dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // e.g., 158dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // %1 = addrspacecast float addrspace(3)* %0 to float* 159dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // %2 = load float* %1 160dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // -> 161dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // %2 = load float addrspace(3)* %0 162dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // 163dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Note: the addrspacecast can also be a constant expression. 164dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (Operator *Cast = dyn_cast<Operator>(MI->getOperand(Idx))) { 165dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (IsEliminableAddrSpaceCast(Cast)) { 166dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines MI->setOperand(Idx, Cast->getOperand(0)); 167dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return true; 168dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 169dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 170dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 171dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return false; 172dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines} 173dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 174dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesbool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) { 175dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (DisableFavorNonGeneric) 176dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return false; 177dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 178dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines bool Changed = false; 179dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) { 180dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ++I) { 181dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (isa<LoadInst>(I)) { 182dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // V = load P 183dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Changed |= optimizeMemoryInstruction(I, 0); 184dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } else if (isa<StoreInst>(I)) { 185dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // store V, P 186dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Changed |= optimizeMemoryInstruction(I, 1); 187dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 188dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 189dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 190dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return Changed; 191dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines} 192dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 193dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesFunctionPass *llvm::createNVPTXFavorNonGenericAddrSpacesPass() { 194dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return new NVPTXFavorNonGenericAddrSpaces(); 195dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines} 196