Inliner.cpp revision fd7079292b43ca5eceae2c585fc2899033a448f7
1//===- Inliner.cpp - Code common to all inliners --------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the mechanics required to implement inlining without 11// missing any calls and updating the call graph. The decisions of which calls 12// are profitable to inline are implemented elsewhere. 13// 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "inline" 17#include "llvm/Module.h" 18#include "llvm/Instructions.h" 19#include "llvm/IntrinsicInst.h" 20#include "llvm/Analysis/CallGraph.h" 21#include "llvm/Support/CallSite.h" 22#include "llvm/Target/TargetData.h" 23#include "llvm/Transforms/IPO/InlinerPass.h" 24#include "llvm/Transforms/Utils/InlineCost.h" 25#include "llvm/Transforms/Utils/Cloning.h" 26#include "llvm/Support/CommandLine.h" 27#include "llvm/Support/Debug.h" 28#include "llvm/Support/raw_ostream.h" 29#include "llvm/ADT/SmallPtrSet.h" 30#include "llvm/ADT/Statistic.h" 31#include <set> 32using namespace llvm; 33 34STATISTIC(NumInlined, "Number of functions inlined"); 35STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); 36STATISTIC(NumMergedAllocas, "Number of allocas merged together"); 37 38static cl::opt<int> 39InlineLimit("inline-threshold", cl::Hidden, cl::init(200), cl::ZeroOrMore, 40 cl::desc("Control the amount of inlining to perform (default = 200)")); 41 42Inliner::Inliner(void *ID) 43 : CallGraphSCCPass(ID), InlineThreshold(InlineLimit) {} 44 45Inliner::Inliner(void *ID, int Threshold) 46 : CallGraphSCCPass(ID), InlineThreshold(Threshold) {} 47 48/// getAnalysisUsage - For this class, we declare that we require and preserve 49/// the call graph. If the derived class implements this method, it should 50/// always explicitly call the implementation here. 51void Inliner::getAnalysisUsage(AnalysisUsage &Info) const { 52 CallGraphSCCPass::getAnalysisUsage(Info); 53} 54 55 56typedef DenseMap<const ArrayType*, std::vector<AllocaInst*> > 57InlinedArrayAllocasTy; 58 59/// InlineCallIfPossible - If it is possible to inline the specified call site, 60/// do so and update the CallGraph for this operation. 61/// 62/// This function also does some basic book-keeping to update the IR. The 63/// InlinedArrayAllocas map keeps track of any allocas that are already 64/// available from other functions inlined into the caller. If we are able to 65/// inline this call site we attempt to reuse already available allocas or add 66/// any new allocas to the set if not possible. 67static bool InlineCallIfPossible(CallSite CS, CallGraph &CG, 68 const TargetData *TD, 69 InlinedArrayAllocasTy &InlinedArrayAllocas) { 70 Function *Callee = CS.getCalledFunction(); 71 Function *Caller = CS.getCaller(); 72 73 // Try to inline the function. Get the list of static allocas that were 74 // inlined. 75 SmallVector<AllocaInst*, 16> StaticAllocas; 76 if (!InlineFunction(CS, &CG, TD, &StaticAllocas)) 77 return false; 78 79 // If the inlined function had a higher stack protection level than the 80 // calling function, then bump up the caller's stack protection level. 81 if (Callee->hasFnAttr(Attribute::StackProtectReq)) 82 Caller->addFnAttr(Attribute::StackProtectReq); 83 else if (Callee->hasFnAttr(Attribute::StackProtect) && 84 !Caller->hasFnAttr(Attribute::StackProtectReq)) 85 Caller->addFnAttr(Attribute::StackProtect); 86 87 88 // Look at all of the allocas that we inlined through this call site. If we 89 // have already inlined other allocas through other calls into this function, 90 // then we know that they have disjoint lifetimes and that we can merge them. 91 // 92 // There are many heuristics possible for merging these allocas, and the 93 // different options have different tradeoffs. One thing that we *really* 94 // don't want to hurt is SRoA: once inlining happens, often allocas are no 95 // longer address taken and so they can be promoted. 96 // 97 // Our "solution" for that is to only merge allocas whose outermost type is an 98 // array type. These are usually not promoted because someone is using a 99 // variable index into them. These are also often the most important ones to 100 // merge. 101 // 102 // A better solution would be to have real memory lifetime markers in the IR 103 // and not have the inliner do any merging of allocas at all. This would 104 // allow the backend to do proper stack slot coloring of all allocas that 105 // *actually make it to the backend*, which is really what we want. 106 // 107 // Because we don't have this information, we do this simple and useful hack. 108 // 109 SmallPtrSet<AllocaInst*, 16> UsedAllocas; 110 111 // Loop over all the allocas we have so far and see if they can be merged with 112 // a previously inlined alloca. If not, remember that we had it. 113 for (unsigned AllocaNo = 0, e = StaticAllocas.size(); 114 AllocaNo != e; ++AllocaNo) { 115 AllocaInst *AI = StaticAllocas[AllocaNo]; 116 117 // Don't bother trying to merge array allocations (they will usually be 118 // canonicalized to be an allocation *of* an array), or allocations whose 119 // type is not itself an array (because we're afraid of pessimizing SRoA). 120 const ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType()); 121 if (ATy == 0 || AI->isArrayAllocation()) 122 continue; 123 124 // Get the list of all available allocas for this array type. 125 std::vector<AllocaInst*> &AllocasForType = InlinedArrayAllocas[ATy]; 126 127 // Loop over the allocas in AllocasForType to see if we can reuse one. Note 128 // that we have to be careful not to reuse the same "available" alloca for 129 // multiple different allocas that we just inlined, we use the 'UsedAllocas' 130 // set to keep track of which "available" allocas are being used by this 131 // function. Also, AllocasForType can be empty of course! 132 bool MergedAwayAlloca = false; 133 for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) { 134 AllocaInst *AvailableAlloca = AllocasForType[i]; 135 136 // The available alloca has to be in the right function, not in some other 137 // function in this SCC. 138 if (AvailableAlloca->getParent() != AI->getParent()) 139 continue; 140 141 // If the inlined function already uses this alloca then we can't reuse 142 // it. 143 if (!UsedAllocas.insert(AvailableAlloca)) 144 continue; 145 146 // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare 147 // success! 148 DEBUG(errs() << " ***MERGED ALLOCA: " << *AI); 149 150 AI->replaceAllUsesWith(AvailableAlloca); 151 AI->eraseFromParent(); 152 MergedAwayAlloca = true; 153 ++NumMergedAllocas; 154 break; 155 } 156 157 // If we already nuked the alloca, we're done with it. 158 if (MergedAwayAlloca) 159 continue; 160 161 // If we were unable to merge away the alloca either because there are no 162 // allocas of the right type available or because we reused them all 163 // already, remember that this alloca came from an inlined function and mark 164 // it used so we don't reuse it for other allocas from this inline 165 // operation. 166 AllocasForType.push_back(AI); 167 UsedAllocas.insert(AI); 168 } 169 170 return true; 171} 172 173/// shouldInline - Return true if the inliner should attempt to inline 174/// at the given CallSite. 175bool Inliner::shouldInline(CallSite CS) { 176 InlineCost IC = getInlineCost(CS); 177 178 if (IC.isAlways()) { 179 DEBUG(errs() << " Inlining: cost=always" 180 << ", Call: " << *CS.getInstruction() << "\n"); 181 return true; 182 } 183 184 if (IC.isNever()) { 185 DEBUG(errs() << " NOT Inlining: cost=never" 186 << ", Call: " << *CS.getInstruction() << "\n"); 187 return false; 188 } 189 190 int Cost = IC.getValue(); 191 int CurrentThreshold = InlineThreshold; 192 Function *Fn = CS.getCaller(); 193 if (Fn && !Fn->isDeclaration() && 194 Fn->hasFnAttr(Attribute::OptimizeForSize) && 195 InlineThreshold != 50) 196 CurrentThreshold = 50; 197 198 float FudgeFactor = getInlineFudgeFactor(CS); 199 if (Cost >= (int)(CurrentThreshold * FudgeFactor)) { 200 DEBUG(errs() << " NOT Inlining: cost=" << Cost 201 << ", Call: " << *CS.getInstruction() << "\n"); 202 return false; 203 } 204 205 DEBUG(errs() << " Inlining: cost=" << Cost 206 << ", Call: " << *CS.getInstruction() << "\n"); 207 return true; 208} 209 210bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) { 211 CallGraph &CG = getAnalysis<CallGraph>(); 212 const TargetData *TD = getAnalysisIfAvailable<TargetData>(); 213 214 SmallPtrSet<Function*, 8> SCCFunctions; 215 DEBUG(errs() << "Inliner visiting SCC:"); 216 for (unsigned i = 0, e = SCC.size(); i != e; ++i) { 217 Function *F = SCC[i]->getFunction(); 218 if (F) SCCFunctions.insert(F); 219 DEBUG(errs() << " " << (F ? F->getName() : "INDIRECTNODE")); 220 } 221 222 // Scan through and identify all call sites ahead of time so that we only 223 // inline call sites in the original functions, not call sites that result 224 // from inlining other functions. 225 SmallVector<CallSite, 16> CallSites; 226 227 for (unsigned i = 0, e = SCC.size(); i != e; ++i) { 228 Function *F = SCC[i]->getFunction(); 229 if (!F) continue; 230 231 for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) 232 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { 233 CallSite CS = CallSite::get(I); 234 if (CS.getInstruction() == 0 || isa<DbgInfoIntrinsic>(I)) 235 continue; 236 237 if (CS.getCalledFunction() == 0 || 238 !CS.getCalledFunction()->isDeclaration()) 239 CallSites.push_back(CS); 240 } 241 } 242 243 DEBUG(errs() << ": " << CallSites.size() << " call sites.\n"); 244 245 // Now that we have all of the call sites, move the ones to functions in the 246 // current SCC to the end of the list. 247 unsigned FirstCallInSCC = CallSites.size(); 248 for (unsigned i = 0; i < FirstCallInSCC; ++i) 249 if (Function *F = CallSites[i].getCalledFunction()) 250 if (SCCFunctions.count(F)) 251 std::swap(CallSites[i--], CallSites[--FirstCallInSCC]); 252 253 254 InlinedArrayAllocasTy InlinedArrayAllocas; 255 256 // Now that we have all of the call sites, loop over them and inline them if 257 // it looks profitable to do so. 258 bool Changed = false; 259 bool LocalChange; 260 do { 261 LocalChange = false; 262 // Iterate over the outer loop because inlining functions can cause indirect 263 // calls to become direct calls. 264 for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) { 265 // We can only inline direct calls. 266 CallSite CS = CallSites[CSi]; 267 268 Function *Callee = CS.getCalledFunction(); 269 if (!Callee) continue; 270 271 // Calls to external functions are never inlinable. 272 if (Callee->isDeclaration()) { 273 if (SCC.size() == 1) { 274 std::swap(CallSites[CSi], CallSites.back()); 275 CallSites.pop_back(); 276 } else { 277 // Keep the 'in SCC / not in SCC' boundary correct. 278 CallSites.erase(CallSites.begin()+CSi); 279 } 280 --CSi; 281 continue; 282 } 283 284 // If the policy determines that we should inline this function, 285 // try to do so. 286 if (!shouldInline(CS)) 287 continue; 288 289 Function *Caller = CS.getCaller(); 290 // Attempt to inline the function... 291 if (!InlineCallIfPossible(CS, CG, TD, InlinedArrayAllocas)) 292 continue; 293 294 // If we inlined the last possible call site to the function, delete the 295 // function body now. 296 if (Callee->use_empty() && Callee->hasLocalLinkage() && 297 !SCCFunctions.count(Callee) && 298 // The function may be apparently dead, but if there are indirect 299 // callgraph references to the node, we cannot delete it yet, this 300 // could invalidate the CGSCC iterator. 301 CG[Callee]->getNumReferences() == 0) { 302 DEBUG(errs() << " -> Deleting dead function: " 303 << Callee->getName() << "\n"); 304 CallGraphNode *CalleeNode = CG[Callee]; 305 306 // Remove any call graph edges from the callee to its callees. 307 CalleeNode->removeAllCalledFunctions(); 308 309 resetCachedCostInfo(Callee); 310 311 // Removing the node for callee from the call graph and delete it. 312 delete CG.removeFunctionFromModule(CalleeNode); 313 ++NumDeleted; 314 } 315 316 // Remove any cached cost info for this caller, as inlining the 317 // callee has increased the size of the caller (which may be the 318 // same as the callee). 319 resetCachedCostInfo(Caller); 320 321 // Remove this call site from the list. If possible, use 322 // swap/pop_back for efficiency, but do not use it if doing so would 323 // move a call site to a function in this SCC before the 324 // 'FirstCallInSCC' barrier. 325 if (SCC.size() == 1) { 326 std::swap(CallSites[CSi], CallSites.back()); 327 CallSites.pop_back(); 328 } else { 329 CallSites.erase(CallSites.begin()+CSi); 330 } 331 --CSi; 332 333 ++NumInlined; 334 Changed = true; 335 LocalChange = true; 336 } 337 } while (LocalChange); 338 339 return Changed; 340} 341 342// doFinalization - Remove now-dead linkonce functions at the end of 343// processing to avoid breaking the SCC traversal. 344bool Inliner::doFinalization(CallGraph &CG) { 345 return removeDeadFunctions(CG); 346} 347 348/// removeDeadFunctions - Remove dead functions that are not included in 349/// DNR (Do Not Remove) list. 350bool Inliner::removeDeadFunctions(CallGraph &CG, 351 SmallPtrSet<const Function *, 16> *DNR) { 352 SmallPtrSet<CallGraphNode*, 16> FunctionsToRemove; 353 354 // Scan for all of the functions, looking for ones that should now be removed 355 // from the program. Insert the dead ones in the FunctionsToRemove set. 356 for (CallGraph::iterator I = CG.begin(), E = CG.end(); I != E; ++I) { 357 CallGraphNode *CGN = I->second; 358 if (CGN->getFunction() == 0) 359 continue; 360 361 Function *F = CGN->getFunction(); 362 363 // If the only remaining users of the function are dead constants, remove 364 // them. 365 F->removeDeadConstantUsers(); 366 367 if (DNR && DNR->count(F)) 368 continue; 369 if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() && 370 !F->hasAvailableExternallyLinkage()) 371 continue; 372 if (!F->use_empty()) 373 continue; 374 375 // Remove any call graph edges from the function to its callees. 376 CGN->removeAllCalledFunctions(); 377 378 // Remove any edges from the external node to the function's call graph 379 // node. These edges might have been made irrelegant due to 380 // optimization of the program. 381 CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN); 382 383 // Removing the node for callee from the call graph and delete it. 384 FunctionsToRemove.insert(CGN); 385 } 386 387 // Now that we know which functions to delete, do so. We didn't want to do 388 // this inline, because that would invalidate our CallGraph::iterator 389 // objects. :( 390 // 391 // Note that it doesn't matter that we are iterating over a non-stable set 392 // here to do this, it doesn't matter which order the functions are deleted 393 // in. 394 bool Changed = false; 395 for (SmallPtrSet<CallGraphNode*, 16>::iterator I = FunctionsToRemove.begin(), 396 E = FunctionsToRemove.end(); I != E; ++I) { 397 resetCachedCostInfo((*I)->getFunction()); 398 delete CG.removeFunctionFromModule(*I); 399 ++NumDeleted; 400 Changed = true; 401 } 402 403 return Changed; 404} 405