1a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford//===--- PartiallyInlineLibCalls.cpp - Partially inline libcalls ----------===// 245137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka// 345137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka// The LLVM Compiler Infrastructure 445137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka// 545137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka// This file is distributed under the University of Illinois Open Source 645137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka// License. See LICENSE.TXT for details. 745137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka// 845137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka//===----------------------------------------------------------------------===// 945137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka// 10a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford// This pass tries to partially inline the fast path of well-known library 11a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford// functions, such as using square-root instructions for cases where sqrt() 12a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford// does not need to set errno. 1345137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka// 1445137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka//===----------------------------------------------------------------------===// 1545137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka 16a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford#include "llvm/Analysis/TargetTransformInfo.h" 1745137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka#include "llvm/IR/IRBuilder.h" 1845137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka#include "llvm/IR/Intrinsics.h" 1945137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka#include "llvm/Pass.h" 2045137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka#include "llvm/Support/CommandLine.h" 2145137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka#include "llvm/Target/TargetLibraryInfo.h" 22a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford#include "llvm/Transforms/Scalar.h" 2345137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka#include "llvm/Transforms/Utils/BasicBlockUtils.h" 2445137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka 2545137f954f976ea75282f7b4b2dac5777837840fAkira Hatanakausing namespace llvm; 2645137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka 27dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define DEBUG_TYPE "partially-inline-libcalls" 28dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 2945137f954f976ea75282f7b4b2dac5777837840fAkira Hatanakanamespace { 30a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford class PartiallyInlineLibCalls : public FunctionPass { 3145137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka public: 3245137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka static char ID; 3345137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka 34a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford PartiallyInlineLibCalls() : 35a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford FunctionPass(ID) { 36a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford initializePartiallyInlineLibCallsPass(*PassRegistry::getPassRegistry()); 3745137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka } 3845137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka 3936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines void getAnalysisUsage(AnalysisUsage &AU) const override; 4036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool runOnFunction(Function &F) override; 4145137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka 4245137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka private: 4345137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka /// Optimize calls to sqrt. 4445137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka bool optimizeSQRT(CallInst *Call, Function *CalledFunc, 45a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford BasicBlock &CurrBB, Function::iterator &BB); 4645137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka }; 4745137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka 48a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford char PartiallyInlineLibCalls::ID = 0; 4945137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka} 5045137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka 51a8a7099c1849fcbb4a68642a292fd0250aa46505Richard SandifordINITIALIZE_PASS(PartiallyInlineLibCalls, "partially-inline-libcalls", 52a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford "Partially inline calls to library functions", false, false) 5345137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka 54a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandifordvoid PartiallyInlineLibCalls::getAnalysisUsage(AnalysisUsage &AU) const { 5545137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka AU.addRequired<TargetLibraryInfo>(); 56a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford AU.addRequired<TargetTransformInfo>(); 5745137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka FunctionPass::getAnalysisUsage(AU); 5845137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka} 5945137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka 60a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandifordbool PartiallyInlineLibCalls::runOnFunction(Function &F) { 6145137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka bool Changed = false; 6245137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka Function::iterator CurrBB; 63a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); 64a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford const TargetTransformInfo *TTI = &getAnalysis<TargetTransformInfo>(); 6545137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) { 6645137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka CurrBB = BB++; 6745137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka 6845137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka for (BasicBlock::iterator II = CurrBB->begin(), IE = CurrBB->end(); 6945137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka II != IE; ++II) { 7045137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka CallInst *Call = dyn_cast<CallInst>(&*II); 7145137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka Function *CalledFunc; 7245137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka 7345137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka if (!Call || !(CalledFunc = Call->getCalledFunction())) 7445137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka continue; 7545137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka 7645137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka // Skip if function either has local linkage or is not a known library 7745137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka // function. 78a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford LibFunc::Func LibFunc; 7945137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka if (CalledFunc->hasLocalLinkage() || !CalledFunc->hasName() || 80a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford !TLI->getLibFunc(CalledFunc->getName(), LibFunc)) 8145137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka continue; 8245137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka 8345137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka switch (LibFunc) { 8445137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka case LibFunc::sqrtf: 8545137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka case LibFunc::sqrt: 86a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford if (TTI->haveFastSqrt(Call->getType()) && 87a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford optimizeSQRT(Call, CalledFunc, *CurrBB, BB)) 8845137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka break; 8945137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka continue; 9045137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka default: 9145137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka continue; 9245137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka } 9345137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka 9445137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka Changed = true; 9545137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka break; 9645137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka } 9745137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka } 9845137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka 9945137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka return Changed; 10045137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka} 10145137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka 102a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandifordbool PartiallyInlineLibCalls::optimizeSQRT(CallInst *Call, 103a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford Function *CalledFunc, 104a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford BasicBlock &CurrBB, 105a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford Function::iterator &BB) { 10645137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka // There is no need to change the IR, since backend will emit sqrt 10745137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka // instruction if the call has already been marked read-only. 10845137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka if (Call->onlyReadsMemory()) 10945137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka return false; 11045137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka 11145137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka // Do the following transformation: 11245137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka // 11345137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka // (before) 11445137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka // dst = sqrt(src) 11545137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka // 11645137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka // (after) 11745137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka // v0 = sqrt_noreadmem(src) # native sqrt instruction. 11845137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka // if (v0 is a NaN) 11945137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka // v1 = sqrt(src) # library call. 12045137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka // dst = phi(v0, v1) 12145137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka // 12245137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka 12345137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka // Move all instructions following Call to newly created block JoinBB. 12445137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka // Create phi and replace all uses. 12545137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka BasicBlock *JoinBB = llvm::SplitBlock(&CurrBB, Call->getNextNode(), this); 12645137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka IRBuilder<> Builder(JoinBB, JoinBB->begin()); 12745137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka PHINode *Phi = Builder.CreatePHI(Call->getType(), 2); 12845137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka Call->replaceAllUsesWith(Phi); 12945137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka 13045137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka // Create basic block LibCallBB and insert a call to library function sqrt. 13145137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka BasicBlock *LibCallBB = BasicBlock::Create(CurrBB.getContext(), "call.sqrt", 13245137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka CurrBB.getParent(), JoinBB); 13345137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka Builder.SetInsertPoint(LibCallBB); 13445137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka Instruction *LibCall = Call->clone(); 13545137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka Builder.Insert(LibCall); 13645137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka Builder.CreateBr(JoinBB); 13745137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka 13845137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka // Add attribute "readnone" so that backend can use a native sqrt instruction 13945137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka // for this call. Insert a FP compare instruction and a conditional branch 14045137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka // at the end of CurrBB. 14145137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka Call->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone); 14245137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka CurrBB.getTerminator()->eraseFromParent(); 14345137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka Builder.SetInsertPoint(&CurrBB); 14445137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka Value *FCmp = Builder.CreateFCmpOEQ(Call, Call); 14545137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka Builder.CreateCondBr(FCmp, JoinBB, LibCallBB); 14645137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka 14745137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka // Add phi operands. 14845137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka Phi->addIncoming(Call, &CurrBB); 14945137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka Phi->addIncoming(LibCall, LibCallBB); 15045137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka 15145137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka BB = JoinBB; 15245137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka return true; 15345137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka} 154a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford 155a8a7099c1849fcbb4a68642a292fd0250aa46505Richard SandifordFunctionPass *llvm::createPartiallyInlineLibCallsPass() { 156a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford return new PartiallyInlineLibCalls(); 157a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford} 158