1a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford//===--- PartiallyInlineLibCalls.cpp - Partially inline libcalls ----------===//
245137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka//
345137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka//                     The LLVM Compiler Infrastructure
445137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka//
545137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka// This file is distributed under the University of Illinois Open Source
645137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka// License. See LICENSE.TXT for details.
745137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka//
845137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka//===----------------------------------------------------------------------===//
945137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka//
10a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford// This pass tries to partially inline the fast path of well-known library
11a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford// functions, such as using square-root instructions for cases where sqrt()
12a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford// does not need to set errno.
1345137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka//
1445137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka//===----------------------------------------------------------------------===//
1545137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka
16a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford#include "llvm/Analysis/TargetTransformInfo.h"
1745137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka#include "llvm/IR/IRBuilder.h"
1845137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka#include "llvm/IR/Intrinsics.h"
1945137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka#include "llvm/Pass.h"
2045137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka#include "llvm/Support/CommandLine.h"
2145137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka#include "llvm/Target/TargetLibraryInfo.h"
22a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford#include "llvm/Transforms/Scalar.h"
2345137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka#include "llvm/Transforms/Utils/BasicBlockUtils.h"
2445137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka
2545137f954f976ea75282f7b4b2dac5777837840fAkira Hatanakausing namespace llvm;
2645137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka
27dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define DEBUG_TYPE "partially-inline-libcalls"
28dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
2945137f954f976ea75282f7b4b2dac5777837840fAkira Hatanakanamespace {
30a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford  class PartiallyInlineLibCalls : public FunctionPass {
3145137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  public:
3245137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka    static char ID;
3345137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka
34a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford    PartiallyInlineLibCalls() :
35a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford      FunctionPass(ID) {
36a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford      initializePartiallyInlineLibCallsPass(*PassRegistry::getPassRegistry());
3745137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka    }
3845137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka
3936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    void getAnalysisUsage(AnalysisUsage &AU) const override;
4036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    bool runOnFunction(Function &F) override;
4145137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka
4245137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  private:
4345137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka    /// Optimize calls to sqrt.
4445137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka    bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
45a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford                      BasicBlock &CurrBB, Function::iterator &BB);
4645137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  };
4745137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka
48a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford  char PartiallyInlineLibCalls::ID = 0;
4945137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka}
5045137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka
51a8a7099c1849fcbb4a68642a292fd0250aa46505Richard SandifordINITIALIZE_PASS(PartiallyInlineLibCalls, "partially-inline-libcalls",
52a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford                "Partially inline calls to library functions", false, false)
5345137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka
54a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandifordvoid PartiallyInlineLibCalls::getAnalysisUsage(AnalysisUsage &AU) const {
5545137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  AU.addRequired<TargetLibraryInfo>();
56a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford  AU.addRequired<TargetTransformInfo>();
5745137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  FunctionPass::getAnalysisUsage(AU);
5845137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka}
5945137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka
60a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandifordbool PartiallyInlineLibCalls::runOnFunction(Function &F) {
6145137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  bool Changed = false;
6245137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  Function::iterator CurrBB;
63a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford  TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
64a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford  const TargetTransformInfo *TTI = &getAnalysis<TargetTransformInfo>();
6545137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) {
6645137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka    CurrBB = BB++;
6745137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka
6845137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka    for (BasicBlock::iterator II = CurrBB->begin(), IE = CurrBB->end();
6945137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka         II != IE; ++II) {
7045137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka      CallInst *Call = dyn_cast<CallInst>(&*II);
7145137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka      Function *CalledFunc;
7245137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka
7345137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka      if (!Call || !(CalledFunc = Call->getCalledFunction()))
7445137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka        continue;
7545137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka
7645137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka      // Skip if function either has local linkage or is not a known library
7745137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka      // function.
78a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford      LibFunc::Func LibFunc;
7945137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka      if (CalledFunc->hasLocalLinkage() || !CalledFunc->hasName() ||
80a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford          !TLI->getLibFunc(CalledFunc->getName(), LibFunc))
8145137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka        continue;
8245137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka
8345137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka      switch (LibFunc) {
8445137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka      case LibFunc::sqrtf:
8545137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka      case LibFunc::sqrt:
86a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford        if (TTI->haveFastSqrt(Call->getType()) &&
87a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford            optimizeSQRT(Call, CalledFunc, *CurrBB, BB))
8845137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka          break;
8945137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka        continue;
9045137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka      default:
9145137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka        continue;
9245137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka      }
9345137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka
9445137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka      Changed = true;
9545137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka      break;
9645137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka    }
9745137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  }
9845137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka
9945137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  return Changed;
10045137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka}
10145137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka
102a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandifordbool PartiallyInlineLibCalls::optimizeSQRT(CallInst *Call,
103a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford                                           Function *CalledFunc,
104a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford                                           BasicBlock &CurrBB,
105a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford                                           Function::iterator &BB) {
10645137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  // There is no need to change the IR, since backend will emit sqrt
10745137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  // instruction if the call has already been marked read-only.
10845137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  if (Call->onlyReadsMemory())
10945137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka    return false;
11045137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka
11145137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  // Do the following transformation:
11245137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  //
11345137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  // (before)
11445137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  // dst = sqrt(src)
11545137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  //
11645137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  // (after)
11745137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  // v0 = sqrt_noreadmem(src) # native sqrt instruction.
11845137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  // if (v0 is a NaN)
11945137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  //   v1 = sqrt(src)         # library call.
12045137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  // dst = phi(v0, v1)
12145137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  //
12245137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka
12345137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  // Move all instructions following Call to newly created block JoinBB.
12445137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  // Create phi and replace all uses.
12545137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  BasicBlock *JoinBB = llvm::SplitBlock(&CurrBB, Call->getNextNode(), this);
12645137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  IRBuilder<> Builder(JoinBB, JoinBB->begin());
12745137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  PHINode *Phi = Builder.CreatePHI(Call->getType(), 2);
12845137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  Call->replaceAllUsesWith(Phi);
12945137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka
13045137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  // Create basic block LibCallBB and insert a call to library function sqrt.
13145137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  BasicBlock *LibCallBB = BasicBlock::Create(CurrBB.getContext(), "call.sqrt",
13245137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka                                             CurrBB.getParent(), JoinBB);
13345137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  Builder.SetInsertPoint(LibCallBB);
13445137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  Instruction *LibCall = Call->clone();
13545137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  Builder.Insert(LibCall);
13645137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  Builder.CreateBr(JoinBB);
13745137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka
13845137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  // Add attribute "readnone" so that backend can use a native sqrt instruction
13945137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  // for this call. Insert a FP compare instruction and a conditional branch
14045137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  // at the end of CurrBB.
14145137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  Call->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone);
14245137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  CurrBB.getTerminator()->eraseFromParent();
14345137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  Builder.SetInsertPoint(&CurrBB);
14445137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  Value *FCmp = Builder.CreateFCmpOEQ(Call, Call);
14545137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  Builder.CreateCondBr(FCmp, JoinBB, LibCallBB);
14645137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka
14745137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  // Add phi operands.
14845137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  Phi->addIncoming(Call, &CurrBB);
14945137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  Phi->addIncoming(LibCall, LibCallBB);
15045137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka
15145137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  BB = JoinBB;
15245137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka  return true;
15345137f954f976ea75282f7b4b2dac5777837840fAkira Hatanaka}
154a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford
155a8a7099c1849fcbb4a68642a292fd0250aa46505Richard SandifordFunctionPass *llvm::createPartiallyInlineLibCallsPass() {
156a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford  return new PartiallyInlineLibCalls();
157a8a7099c1849fcbb4a68642a292fd0250aa46505Richard Sandiford}
158