GlobalOpt.cpp revision bc384a1feb08141691ef994a7d11a506e89c5a62
1//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass transforms simple global variables that never have their address 11// taken. If obviously true, it marks read/write globals as constant, deletes 12// variables only stored to, etc. 13// 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "globalopt" 17#include "llvm/Transforms/IPO.h" 18#include "llvm/CallingConv.h" 19#include "llvm/Constants.h" 20#include "llvm/DerivedTypes.h" 21#include "llvm/Instructions.h" 22#include "llvm/IntrinsicInst.h" 23#include "llvm/Module.h" 24#include "llvm/Operator.h" 25#include "llvm/Pass.h" 26#include "llvm/Analysis/ConstantFolding.h" 27#include "llvm/Analysis/MemoryBuiltins.h" 28#include "llvm/Target/TargetData.h" 29#include "llvm/Target/TargetLibraryInfo.h" 30#include "llvm/Support/CallSite.h" 31#include "llvm/Support/Debug.h" 32#include "llvm/Support/ErrorHandling.h" 33#include "llvm/Support/GetElementPtrTypeIterator.h" 34#include "llvm/Support/MathExtras.h" 35#include "llvm/Support/raw_ostream.h" 36#include "llvm/ADT/DenseMap.h" 37#include "llvm/ADT/SmallPtrSet.h" 38#include "llvm/ADT/SmallVector.h" 39#include "llvm/ADT/Statistic.h" 40#include "llvm/ADT/STLExtras.h" 41#include <algorithm> 42using namespace llvm; 43 44STATISTIC(NumMarked , "Number of globals marked constant"); 45STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr"); 46STATISTIC(NumSRA , "Number of aggregate globals broken into scalars"); 47STATISTIC(NumHeapSRA , "Number of heap objects SRA'd"); 48STATISTIC(NumSubstitute,"Number of globals with initializers stored into them"); 49STATISTIC(NumDeleted , "Number of globals deleted"); 50STATISTIC(NumFnDeleted , "Number of functions deleted"); 51STATISTIC(NumGlobUses , "Number of global uses devirtualized"); 52STATISTIC(NumLocalized , "Number of globals localized"); 53STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans"); 54STATISTIC(NumFastCallFns , "Number of functions converted to fastcc"); 55STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated"); 56STATISTIC(NumNestRemoved , "Number of nest attributes removed"); 57STATISTIC(NumAliasesResolved, "Number of global aliases resolved"); 58STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated"); 59STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed"); 60 61namespace { 62 struct GlobalStatus; 63 struct GlobalOpt : public ModulePass { 64 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 65 AU.addRequired<TargetLibraryInfo>(); 66 } 67 static char ID; // Pass identification, replacement for typeid 68 GlobalOpt() : ModulePass(ID) { 69 initializeGlobalOptPass(*PassRegistry::getPassRegistry()); 70 } 71 72 bool runOnModule(Module &M); 73 74 private: 75 GlobalVariable *FindGlobalCtors(Module &M); 76 bool OptimizeFunctions(Module &M); 77 bool OptimizeGlobalVars(Module &M); 78 bool OptimizeGlobalAliases(Module &M); 79 bool OptimizeGlobalCtorsList(GlobalVariable *&GCL); 80 bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI); 81 bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI, 82 const SmallPtrSet<const PHINode*, 16> &PHIUsers, 83 const GlobalStatus &GS); 84 bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn); 85 }; 86} 87 88char GlobalOpt::ID = 0; 89INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt", 90 "Global Variable Optimizer", false, false) 91INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) 92INITIALIZE_PASS_END(GlobalOpt, "globalopt", 93 "Global Variable Optimizer", false, false) 94 95ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); } 96 97namespace { 98 99/// GlobalStatus - As we analyze each global, keep track of some information 100/// about it. If we find out that the address of the global is taken, none of 101/// this info will be accurate. 102struct GlobalStatus { 103 /// isCompared - True if the global's address is used in a comparison. 104 bool isCompared; 105 106 /// isLoaded - True if the global is ever loaded. If the global isn't ever 107 /// loaded it can be deleted. 108 bool isLoaded; 109 110 /// StoredType - Keep track of what stores to the global look like. 111 /// 112 enum StoredType { 113 /// NotStored - There is no store to this global. It can thus be marked 114 /// constant. 115 NotStored, 116 117 /// isInitializerStored - This global is stored to, but the only thing 118 /// stored is the constant it was initialized with. This is only tracked 119 /// for scalar globals. 120 isInitializerStored, 121 122 /// isStoredOnce - This global is stored to, but only its initializer and 123 /// one other value is ever stored to it. If this global isStoredOnce, we 124 /// track the value stored to it in StoredOnceValue below. This is only 125 /// tracked for scalar globals. 126 isStoredOnce, 127 128 /// isStored - This global is stored to by multiple values or something else 129 /// that we cannot track. 130 isStored 131 } StoredType; 132 133 /// StoredOnceValue - If only one value (besides the initializer constant) is 134 /// ever stored to this global, keep track of what value it is. 135 Value *StoredOnceValue; 136 137 /// AccessingFunction/HasMultipleAccessingFunctions - These start out 138 /// null/false. When the first accessing function is noticed, it is recorded. 139 /// When a second different accessing function is noticed, 140 /// HasMultipleAccessingFunctions is set to true. 141 const Function *AccessingFunction; 142 bool HasMultipleAccessingFunctions; 143 144 /// HasNonInstructionUser - Set to true if this global has a user that is not 145 /// an instruction (e.g. a constant expr or GV initializer). 146 bool HasNonInstructionUser; 147 148 /// HasPHIUser - Set to true if this global has a user that is a PHI node. 149 bool HasPHIUser; 150 151 GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored), 152 StoredOnceValue(0), AccessingFunction(0), 153 HasMultipleAccessingFunctions(false), HasNonInstructionUser(false), 154 HasPHIUser(false) {} 155}; 156 157} 158 159/// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used 160/// by constants itself. Note that constants cannot be cyclic, so this test is 161/// pretty easy to implement recursively. 162/// 163static bool SafeToDestroyConstant(const Constant *C) { 164 if (isa<GlobalValue>(C)) return false; 165 166 for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; 167 ++UI) 168 if (const Constant *CU = dyn_cast<Constant>(*UI)) { 169 if (!SafeToDestroyConstant(CU)) return false; 170 } else 171 return false; 172 return true; 173} 174 175 176/// AnalyzeGlobal - Look at all uses of the global and fill in the GlobalStatus 177/// structure. If the global has its address taken, return true to indicate we 178/// can't do anything with it. 179/// 180static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, 181 SmallPtrSet<const PHINode*, 16> &PHIUsers) { 182 for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; 183 ++UI) { 184 const User *U = *UI; 185 if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { 186 GS.HasNonInstructionUser = true; 187 188 // If the result of the constantexpr isn't pointer type, then we won't 189 // know to expect it in various places. Just reject early. 190 if (!isa<PointerType>(CE->getType())) return true; 191 192 if (AnalyzeGlobal(CE, GS, PHIUsers)) return true; 193 } else if (const Instruction *I = dyn_cast<Instruction>(U)) { 194 if (!GS.HasMultipleAccessingFunctions) { 195 const Function *F = I->getParent()->getParent(); 196 if (GS.AccessingFunction == 0) 197 GS.AccessingFunction = F; 198 else if (GS.AccessingFunction != F) 199 GS.HasMultipleAccessingFunctions = true; 200 } 201 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { 202 GS.isLoaded = true; 203 // Don't hack on volatile/atomic loads. 204 if (!LI->isSimple()) return true; 205 } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) { 206 // Don't allow a store OF the address, only stores TO the address. 207 if (SI->getOperand(0) == V) return true; 208 209 // Don't hack on volatile/atomic stores. 210 if (!SI->isSimple()) return true; 211 212 // If this is a direct store to the global (i.e., the global is a scalar 213 // value, not an aggregate), keep more specific information about 214 // stores. 215 if (GS.StoredType != GlobalStatus::isStored) { 216 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>( 217 SI->getOperand(1))) { 218 Value *StoredVal = SI->getOperand(0); 219 if (StoredVal == GV->getInitializer()) { 220 if (GS.StoredType < GlobalStatus::isInitializerStored) 221 GS.StoredType = GlobalStatus::isInitializerStored; 222 } else if (isa<LoadInst>(StoredVal) && 223 cast<LoadInst>(StoredVal)->getOperand(0) == GV) { 224 if (GS.StoredType < GlobalStatus::isInitializerStored) 225 GS.StoredType = GlobalStatus::isInitializerStored; 226 } else if (GS.StoredType < GlobalStatus::isStoredOnce) { 227 GS.StoredType = GlobalStatus::isStoredOnce; 228 GS.StoredOnceValue = StoredVal; 229 } else if (GS.StoredType == GlobalStatus::isStoredOnce && 230 GS.StoredOnceValue == StoredVal) { 231 // noop. 232 } else { 233 GS.StoredType = GlobalStatus::isStored; 234 } 235 } else { 236 GS.StoredType = GlobalStatus::isStored; 237 } 238 } 239 } else if (isa<GetElementPtrInst>(I)) { 240 if (AnalyzeGlobal(I, GS, PHIUsers)) return true; 241 } else if (isa<SelectInst>(I)) { 242 if (AnalyzeGlobal(I, GS, PHIUsers)) return true; 243 } else if (const PHINode *PN = dyn_cast<PHINode>(I)) { 244 // PHI nodes we can check just like select or GEP instructions, but we 245 // have to be careful about infinite recursion. 246 if (PHIUsers.insert(PN)) // Not already visited. 247 if (AnalyzeGlobal(I, GS, PHIUsers)) return true; 248 GS.HasPHIUser = true; 249 } else if (isa<CmpInst>(I)) { 250 GS.isCompared = true; 251 } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) { 252 if (MTI->isVolatile()) return true; 253 if (MTI->getArgOperand(0) == V) 254 GS.StoredType = GlobalStatus::isStored; 255 if (MTI->getArgOperand(1) == V) 256 GS.isLoaded = true; 257 } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) { 258 assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!"); 259 if (MSI->isVolatile()) return true; 260 GS.StoredType = GlobalStatus::isStored; 261 } else { 262 return true; // Any other non-load instruction might take address! 263 } 264 } else if (const Constant *C = dyn_cast<Constant>(U)) { 265 GS.HasNonInstructionUser = true; 266 // We might have a dead and dangling constant hanging off of here. 267 if (!SafeToDestroyConstant(C)) 268 return true; 269 } else { 270 GS.HasNonInstructionUser = true; 271 // Otherwise must be some other user. 272 return true; 273 } 274 } 275 276 return false; 277} 278 279/// CleanupConstantGlobalUsers - We just marked GV constant. Loop over all 280/// users of the global, cleaning up the obvious ones. This is largely just a 281/// quick scan over the use list to clean up the easy and obvious cruft. This 282/// returns true if it made a change. 283static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { 284 bool Changed = false; 285 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) { 286 User *U = *UI++; 287 288 if (LoadInst *LI = dyn_cast<LoadInst>(U)) { 289 if (Init) { 290 // Replace the load with the initializer. 291 LI->replaceAllUsesWith(Init); 292 LI->eraseFromParent(); 293 Changed = true; 294 } 295 } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { 296 // Store must be unreachable or storing Init into the global. 297 SI->eraseFromParent(); 298 Changed = true; 299 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { 300 if (CE->getOpcode() == Instruction::GetElementPtr) { 301 Constant *SubInit = 0; 302 if (Init) 303 SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); 304 Changed |= CleanupConstantGlobalUsers(CE, SubInit); 305 } else if (CE->getOpcode() == Instruction::BitCast && 306 CE->getType()->isPointerTy()) { 307 // Pointer cast, delete any stores and memsets to the global. 308 Changed |= CleanupConstantGlobalUsers(CE, 0); 309 } 310 311 if (CE->use_empty()) { 312 CE->destroyConstant(); 313 Changed = true; 314 } 315 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { 316 // Do not transform "gepinst (gep constexpr (GV))" here, because forming 317 // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold 318 // and will invalidate our notion of what Init is. 319 Constant *SubInit = 0; 320 if (!isa<ConstantExpr>(GEP->getOperand(0))) { 321 // FIXME: use TargetData/TargetLibraryInfo for smarter constant folding. 322 ConstantExpr *CE = 323 dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP)); 324 if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) 325 SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); 326 } 327 Changed |= CleanupConstantGlobalUsers(GEP, SubInit); 328 329 if (GEP->use_empty()) { 330 GEP->eraseFromParent(); 331 Changed = true; 332 } 333 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv 334 if (MI->getRawDest() == V) { 335 MI->eraseFromParent(); 336 Changed = true; 337 } 338 339 } else if (Constant *C = dyn_cast<Constant>(U)) { 340 // If we have a chain of dead constantexprs or other things dangling from 341 // us, and if they are all dead, nuke them without remorse. 342 if (SafeToDestroyConstant(C)) { 343 C->destroyConstant(); 344 // This could have invalidated UI, start over from scratch. 345 CleanupConstantGlobalUsers(V, Init); 346 return true; 347 } 348 } 349 } 350 return Changed; 351} 352 353/// isSafeSROAElementUse - Return true if the specified instruction is a safe 354/// user of a derived expression from a global that we want to SROA. 355static bool isSafeSROAElementUse(Value *V) { 356 // We might have a dead and dangling constant hanging off of here. 357 if (Constant *C = dyn_cast<Constant>(V)) 358 return SafeToDestroyConstant(C); 359 360 Instruction *I = dyn_cast<Instruction>(V); 361 if (!I) return false; 362 363 // Loads are ok. 364 if (isa<LoadInst>(I)) return true; 365 366 // Stores *to* the pointer are ok. 367 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 368 return SI->getOperand(0) != V; 369 370 // Otherwise, it must be a GEP. 371 GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I); 372 if (GEPI == 0) return false; 373 374 if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) || 375 !cast<Constant>(GEPI->getOperand(1))->isNullValue()) 376 return false; 377 378 for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end(); 379 I != E; ++I) 380 if (!isSafeSROAElementUse(*I)) 381 return false; 382 return true; 383} 384 385 386/// IsUserOfGlobalSafeForSRA - U is a direct user of the specified global value. 387/// Look at it and its uses and decide whether it is safe to SROA this global. 388/// 389static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { 390 // The user of the global must be a GEP Inst or a ConstantExpr GEP. 391 if (!isa<GetElementPtrInst>(U) && 392 (!isa<ConstantExpr>(U) || 393 cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr)) 394 return false; 395 396 // Check to see if this ConstantExpr GEP is SRA'able. In particular, we 397 // don't like < 3 operand CE's, and we don't like non-constant integer 398 // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some 399 // value of C. 400 if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) || 401 !cast<Constant>(U->getOperand(1))->isNullValue() || 402 !isa<ConstantInt>(U->getOperand(2))) 403 return false; 404 405 gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U); 406 ++GEPI; // Skip over the pointer index. 407 408 // If this is a use of an array allocation, do a bit more checking for sanity. 409 if (ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) { 410 uint64_t NumElements = AT->getNumElements(); 411 ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2)); 412 413 // Check to make sure that index falls within the array. If not, 414 // something funny is going on, so we won't do the optimization. 415 // 416 if (Idx->getZExtValue() >= NumElements) 417 return false; 418 419 // We cannot scalar repl this level of the array unless any array 420 // sub-indices are in-range constants. In particular, consider: 421 // A[0][i]. We cannot know that the user isn't doing invalid things like 422 // allowing i to index an out-of-range subscript that accesses A[1]. 423 // 424 // Scalar replacing *just* the outer index of the array is probably not 425 // going to be a win anyway, so just give up. 426 for (++GEPI; // Skip array index. 427 GEPI != E; 428 ++GEPI) { 429 uint64_t NumElements; 430 if (ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI)) 431 NumElements = SubArrayTy->getNumElements(); 432 else if (VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI)) 433 NumElements = SubVectorTy->getNumElements(); 434 else { 435 assert((*GEPI)->isStructTy() && 436 "Indexed GEP type is not array, vector, or struct!"); 437 continue; 438 } 439 440 ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand()); 441 if (!IdxVal || IdxVal->getZExtValue() >= NumElements) 442 return false; 443 } 444 } 445 446 for (Value::use_iterator I = U->use_begin(), E = U->use_end(); I != E; ++I) 447 if (!isSafeSROAElementUse(*I)) 448 return false; 449 return true; 450} 451 452/// GlobalUsersSafeToSRA - Look at all uses of the global and decide whether it 453/// is safe for us to perform this transformation. 454/// 455static bool GlobalUsersSafeToSRA(GlobalValue *GV) { 456 for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); 457 UI != E; ++UI) { 458 if (!IsUserOfGlobalSafeForSRA(*UI, GV)) 459 return false; 460 } 461 return true; 462} 463 464 465/// SRAGlobal - Perform scalar replacement of aggregates on the specified global 466/// variable. This opens the door for other optimizations by exposing the 467/// behavior of the program in a more fine-grained way. We have determined that 468/// this transformation is safe already. We return the first global variable we 469/// insert so that the caller can reprocess it. 470static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { 471 // Make sure this global only has simple uses that we can SRA. 472 if (!GlobalUsersSafeToSRA(GV)) 473 return 0; 474 475 assert(GV->hasLocalLinkage() && !GV->isConstant()); 476 Constant *Init = GV->getInitializer(); 477 Type *Ty = Init->getType(); 478 479 std::vector<GlobalVariable*> NewGlobals; 480 Module::GlobalListType &Globals = GV->getParent()->getGlobalList(); 481 482 // Get the alignment of the global, either explicit or target-specific. 483 unsigned StartAlignment = GV->getAlignment(); 484 if (StartAlignment == 0) 485 StartAlignment = TD.getABITypeAlignment(GV->getType()); 486 487 if (StructType *STy = dyn_cast<StructType>(Ty)) { 488 NewGlobals.reserve(STy->getNumElements()); 489 const StructLayout &Layout = *TD.getStructLayout(STy); 490 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 491 Constant *In = Init->getAggregateElement(i); 492 assert(In && "Couldn't get element of initializer?"); 493 GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false, 494 GlobalVariable::InternalLinkage, 495 In, GV->getName()+"."+Twine(i), 496 GV->isThreadLocal(), 497 GV->getType()->getAddressSpace()); 498 Globals.insert(GV, NGV); 499 NewGlobals.push_back(NGV); 500 501 // Calculate the known alignment of the field. If the original aggregate 502 // had 256 byte alignment for example, something might depend on that: 503 // propagate info to each field. 504 uint64_t FieldOffset = Layout.getElementOffset(i); 505 unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset); 506 if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i))) 507 NGV->setAlignment(NewAlign); 508 } 509 } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) { 510 unsigned NumElements = 0; 511 if (ArrayType *ATy = dyn_cast<ArrayType>(STy)) 512 NumElements = ATy->getNumElements(); 513 else 514 NumElements = cast<VectorType>(STy)->getNumElements(); 515 516 if (NumElements > 16 && GV->hasNUsesOrMore(16)) 517 return 0; // It's not worth it. 518 NewGlobals.reserve(NumElements); 519 520 uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType()); 521 unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType()); 522 for (unsigned i = 0, e = NumElements; i != e; ++i) { 523 Constant *In = Init->getAggregateElement(i); 524 assert(In && "Couldn't get element of initializer?"); 525 526 GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false, 527 GlobalVariable::InternalLinkage, 528 In, GV->getName()+"."+Twine(i), 529 GV->isThreadLocal(), 530 GV->getType()->getAddressSpace()); 531 Globals.insert(GV, NGV); 532 NewGlobals.push_back(NGV); 533 534 // Calculate the known alignment of the field. If the original aggregate 535 // had 256 byte alignment for example, something might depend on that: 536 // propagate info to each field. 537 unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i); 538 if (NewAlign > EltAlign) 539 NGV->setAlignment(NewAlign); 540 } 541 } 542 543 if (NewGlobals.empty()) 544 return 0; 545 546 DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV); 547 548 Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext())); 549 550 // Loop over all of the uses of the global, replacing the constantexpr geps, 551 // with smaller constantexpr geps or direct references. 552 while (!GV->use_empty()) { 553 User *GEP = GV->use_back(); 554 assert(((isa<ConstantExpr>(GEP) && 555 cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)|| 556 isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!"); 557 558 // Ignore the 1th operand, which has to be zero or else the program is quite 559 // broken (undefined). Get the 2nd operand, which is the structure or array 560 // index. 561 unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue(); 562 if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access. 563 564 Value *NewPtr = NewGlobals[Val]; 565 566 // Form a shorter GEP if needed. 567 if (GEP->getNumOperands() > 3) { 568 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) { 569 SmallVector<Constant*, 8> Idxs; 570 Idxs.push_back(NullInt); 571 for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i) 572 Idxs.push_back(CE->getOperand(i)); 573 NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), Idxs); 574 } else { 575 GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP); 576 SmallVector<Value*, 8> Idxs; 577 Idxs.push_back(NullInt); 578 for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i) 579 Idxs.push_back(GEPI->getOperand(i)); 580 NewPtr = GetElementPtrInst::Create(NewPtr, Idxs, 581 GEPI->getName()+"."+Twine(Val),GEPI); 582 } 583 } 584 GEP->replaceAllUsesWith(NewPtr); 585 586 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP)) 587 GEPI->eraseFromParent(); 588 else 589 cast<ConstantExpr>(GEP)->destroyConstant(); 590 } 591 592 // Delete the old global, now that it is dead. 593 Globals.erase(GV); 594 ++NumSRA; 595 596 // Loop over the new globals array deleting any globals that are obviously 597 // dead. This can arise due to scalarization of a structure or an array that 598 // has elements that are dead. 599 unsigned FirstGlobal = 0; 600 for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i) 601 if (NewGlobals[i]->use_empty()) { 602 Globals.erase(NewGlobals[i]); 603 if (FirstGlobal == i) ++FirstGlobal; 604 } 605 606 return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : 0; 607} 608 609/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified 610/// value will trap if the value is dynamically null. PHIs keeps track of any 611/// phi nodes we've seen to avoid reprocessing them. 612static bool AllUsesOfValueWillTrapIfNull(const Value *V, 613 SmallPtrSet<const PHINode*, 8> &PHIs) { 614 for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; 615 ++UI) { 616 const User *U = *UI; 617 618 if (isa<LoadInst>(U)) { 619 // Will trap. 620 } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { 621 if (SI->getOperand(0) == V) { 622 //cerr << "NONTRAPPING USE: " << *U; 623 return false; // Storing the value. 624 } 625 } else if (const CallInst *CI = dyn_cast<CallInst>(U)) { 626 if (CI->getCalledValue() != V) { 627 //cerr << "NONTRAPPING USE: " << *U; 628 return false; // Not calling the ptr 629 } 630 } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) { 631 if (II->getCalledValue() != V) { 632 //cerr << "NONTRAPPING USE: " << *U; 633 return false; // Not calling the ptr 634 } 635 } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) { 636 if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false; 637 } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { 638 if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false; 639 } else if (const PHINode *PN = dyn_cast<PHINode>(U)) { 640 // If we've already seen this phi node, ignore it, it has already been 641 // checked. 642 if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs)) 643 return false; 644 } else if (isa<ICmpInst>(U) && 645 isa<ConstantPointerNull>(UI->getOperand(1))) { 646 // Ignore icmp X, null 647 } else { 648 //cerr << "NONTRAPPING USE: " << *U; 649 return false; 650 } 651 } 652 return true; 653} 654 655/// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads 656/// from GV will trap if the loaded value is null. Note that this also permits 657/// comparisons of the loaded value against null, as a special case. 658static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) { 659 for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); 660 UI != E; ++UI) { 661 const User *U = *UI; 662 663 if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { 664 SmallPtrSet<const PHINode*, 8> PHIs; 665 if (!AllUsesOfValueWillTrapIfNull(LI, PHIs)) 666 return false; 667 } else if (isa<StoreInst>(U)) { 668 // Ignore stores to the global. 669 } else { 670 // We don't know or understand this user, bail out. 671 //cerr << "UNKNOWN USER OF GLOBAL!: " << *U; 672 return false; 673 } 674 } 675 return true; 676} 677 678static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { 679 bool Changed = false; 680 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) { 681 Instruction *I = cast<Instruction>(*UI++); 682 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 683 LI->setOperand(0, NewV); 684 Changed = true; 685 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 686 if (SI->getOperand(1) == V) { 687 SI->setOperand(1, NewV); 688 Changed = true; 689 } 690 } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) { 691 CallSite CS(I); 692 if (CS.getCalledValue() == V) { 693 // Calling through the pointer! Turn into a direct call, but be careful 694 // that the pointer is not also being passed as an argument. 695 CS.setCalledFunction(NewV); 696 Changed = true; 697 bool PassedAsArg = false; 698 for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) 699 if (CS.getArgument(i) == V) { 700 PassedAsArg = true; 701 CS.setArgument(i, NewV); 702 } 703 704 if (PassedAsArg) { 705 // Being passed as an argument also. Be careful to not invalidate UI! 706 UI = V->use_begin(); 707 } 708 } 709 } else if (CastInst *CI = dyn_cast<CastInst>(I)) { 710 Changed |= OptimizeAwayTrappingUsesOfValue(CI, 711 ConstantExpr::getCast(CI->getOpcode(), 712 NewV, CI->getType())); 713 if (CI->use_empty()) { 714 Changed = true; 715 CI->eraseFromParent(); 716 } 717 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { 718 // Should handle GEP here. 719 SmallVector<Constant*, 8> Idxs; 720 Idxs.reserve(GEPI->getNumOperands()-1); 721 for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end(); 722 i != e; ++i) 723 if (Constant *C = dyn_cast<Constant>(*i)) 724 Idxs.push_back(C); 725 else 726 break; 727 if (Idxs.size() == GEPI->getNumOperands()-1) 728 Changed |= OptimizeAwayTrappingUsesOfValue(GEPI, 729 ConstantExpr::getGetElementPtr(NewV, Idxs)); 730 if (GEPI->use_empty()) { 731 Changed = true; 732 GEPI->eraseFromParent(); 733 } 734 } 735 } 736 737 return Changed; 738} 739 740 741/// OptimizeAwayTrappingUsesOfLoads - The specified global has only one non-null 742/// value stored into it. If there are uses of the loaded value that would trap 743/// if the loaded value is dynamically null, then we know that they cannot be 744/// reachable with a null optimize away the load. 745static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { 746 bool Changed = false; 747 748 // Keep track of whether we are able to remove all the uses of the global 749 // other than the store that defines it. 750 bool AllNonStoreUsesGone = true; 751 752 // Replace all uses of loads with uses of uses of the stored value. 753 for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){ 754 User *GlobalUser = *GUI++; 755 if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) { 756 Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV); 757 // If we were able to delete all uses of the loads 758 if (LI->use_empty()) { 759 LI->eraseFromParent(); 760 Changed = true; 761 } else { 762 AllNonStoreUsesGone = false; 763 } 764 } else if (isa<StoreInst>(GlobalUser)) { 765 // Ignore the store that stores "LV" to the global. 766 assert(GlobalUser->getOperand(1) == GV && 767 "Must be storing *to* the global"); 768 } else { 769 AllNonStoreUsesGone = false; 770 771 // If we get here we could have other crazy uses that are transitively 772 // loaded. 773 assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) || 774 isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser)) && 775 "Only expect load and stores!"); 776 } 777 } 778 779 if (Changed) { 780 DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV); 781 ++NumGlobUses; 782 } 783 784 // If we nuked all of the loads, then none of the stores are needed either, 785 // nor is the global. 786 if (AllNonStoreUsesGone) { 787 DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n"); 788 CleanupConstantGlobalUsers(GV, 0); 789 if (GV->use_empty()) { 790 GV->eraseFromParent(); 791 ++NumDeleted; 792 } 793 Changed = true; 794 } 795 return Changed; 796} 797 798/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the 799/// instructions that are foldable. 800static void ConstantPropUsersOf(Value *V) { 801 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) 802 if (Instruction *I = dyn_cast<Instruction>(*UI++)) 803 // FIXME: use TargetData/TargetLibraryInfo for smarter constant folding. 804 if (Constant *NewC = ConstantFoldInstruction(I)) { 805 I->replaceAllUsesWith(NewC); 806 807 // Advance UI to the next non-I use to avoid invalidating it! 808 // Instructions could multiply use V. 809 while (UI != E && *UI == I) 810 ++UI; 811 I->eraseFromParent(); 812 } 813} 814 815/// OptimizeGlobalAddressOfMalloc - This function takes the specified global 816/// variable, and transforms the program as if it always contained the result of 817/// the specified malloc. Because it is always the result of the specified 818/// malloc, there is no reason to actually DO the malloc. Instead, turn the 819/// malloc into a global, and any loads of GV as uses of the new global. 820static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, 821 CallInst *CI, 822 Type *AllocTy, 823 ConstantInt *NElements, 824 TargetData *TD) { 825 DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); 826 827 Type *GlobalType; 828 if (NElements->getZExtValue() == 1) 829 GlobalType = AllocTy; 830 else 831 // If we have an array allocation, the global variable is of an array. 832 GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue()); 833 834 // Create the new global variable. The contents of the malloc'd memory is 835 // undefined, so initialize with an undef value. 836 GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), 837 GlobalType, false, 838 GlobalValue::InternalLinkage, 839 UndefValue::get(GlobalType), 840 GV->getName()+".body", 841 GV, 842 GV->isThreadLocal()); 843 844 // If there are bitcast users of the malloc (which is typical, usually we have 845 // a malloc + bitcast) then replace them with uses of the new global. Update 846 // other users to use the global as well. 847 BitCastInst *TheBC = 0; 848 while (!CI->use_empty()) { 849 Instruction *User = cast<Instruction>(CI->use_back()); 850 if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) { 851 if (BCI->getType() == NewGV->getType()) { 852 BCI->replaceAllUsesWith(NewGV); 853 BCI->eraseFromParent(); 854 } else { 855 BCI->setOperand(0, NewGV); 856 } 857 } else { 858 if (TheBC == 0) 859 TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI); 860 User->replaceUsesOfWith(CI, TheBC); 861 } 862 } 863 864 Constant *RepValue = NewGV; 865 if (NewGV->getType() != GV->getType()->getElementType()) 866 RepValue = ConstantExpr::getBitCast(RepValue, 867 GV->getType()->getElementType()); 868 869 // If there is a comparison against null, we will insert a global bool to 870 // keep track of whether the global was initialized yet or not. 871 GlobalVariable *InitBool = 872 new GlobalVariable(Type::getInt1Ty(GV->getContext()), false, 873 GlobalValue::InternalLinkage, 874 ConstantInt::getFalse(GV->getContext()), 875 GV->getName()+".init", GV->isThreadLocal()); 876 bool InitBoolUsed = false; 877 878 // Loop over all uses of GV, processing them in turn. 879 while (!GV->use_empty()) { 880 if (StoreInst *SI = dyn_cast<StoreInst>(GV->use_back())) { 881 // The global is initialized when the store to it occurs. 882 new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, SI); 883 SI->eraseFromParent(); 884 continue; 885 } 886 887 LoadInst *LI = cast<LoadInst>(GV->use_back()); 888 while (!LI->use_empty()) { 889 Use &LoadUse = LI->use_begin().getUse(); 890 if (!isa<ICmpInst>(LoadUse.getUser())) { 891 LoadUse = RepValue; 892 continue; 893 } 894 895 ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser()); 896 // Replace the cmp X, 0 with a use of the bool value. 897 Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI); 898 InitBoolUsed = true; 899 switch (ICI->getPredicate()) { 900 default: llvm_unreachable("Unknown ICmp Predicate!"); 901 case ICmpInst::ICMP_ULT: 902 case ICmpInst::ICMP_SLT: // X < null -> always false 903 LV = ConstantInt::getFalse(GV->getContext()); 904 break; 905 case ICmpInst::ICMP_ULE: 906 case ICmpInst::ICMP_SLE: 907 case ICmpInst::ICMP_EQ: 908 LV = BinaryOperator::CreateNot(LV, "notinit", ICI); 909 break; 910 case ICmpInst::ICMP_NE: 911 case ICmpInst::ICMP_UGE: 912 case ICmpInst::ICMP_SGE: 913 case ICmpInst::ICMP_UGT: 914 case ICmpInst::ICMP_SGT: 915 break; // no change. 916 } 917 ICI->replaceAllUsesWith(LV); 918 ICI->eraseFromParent(); 919 } 920 LI->eraseFromParent(); 921 } 922 923 // If the initialization boolean was used, insert it, otherwise delete it. 924 if (!InitBoolUsed) { 925 while (!InitBool->use_empty()) // Delete initializations 926 cast<StoreInst>(InitBool->use_back())->eraseFromParent(); 927 delete InitBool; 928 } else 929 GV->getParent()->getGlobalList().insert(GV, InitBool); 930 931 // Now the GV is dead, nuke it and the malloc.. 932 GV->eraseFromParent(); 933 CI->eraseFromParent(); 934 935 // To further other optimizations, loop over all users of NewGV and try to 936 // constant prop them. This will promote GEP instructions with constant 937 // indices into GEP constant-exprs, which will allow global-opt to hack on it. 938 ConstantPropUsersOf(NewGV); 939 if (RepValue != NewGV) 940 ConstantPropUsersOf(RepValue); 941 942 return NewGV; 943} 944 945/// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking 946/// to make sure that there are no complex uses of V. We permit simple things 947/// like dereferencing the pointer, but not storing through the address, unless 948/// it is to the specified global. 949static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, 950 const GlobalVariable *GV, 951 SmallPtrSet<const PHINode*, 8> &PHIs) { 952 for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); 953 UI != E; ++UI) { 954 const Instruction *Inst = cast<Instruction>(*UI); 955 956 if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) { 957 continue; // Fine, ignore. 958 } 959 960 if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 961 if (SI->getOperand(0) == V && SI->getOperand(1) != GV) 962 return false; // Storing the pointer itself... bad. 963 continue; // Otherwise, storing through it, or storing into GV... fine. 964 } 965 966 // Must index into the array and into the struct. 967 if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) { 968 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs)) 969 return false; 970 continue; 971 } 972 973 if (const PHINode *PN = dyn_cast<PHINode>(Inst)) { 974 // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI 975 // cycles. 976 if (PHIs.insert(PN)) 977 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs)) 978 return false; 979 continue; 980 } 981 982 if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) { 983 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) 984 return false; 985 continue; 986 } 987 988 return false; 989 } 990 return true; 991} 992 993/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV 994/// somewhere. Transform all uses of the allocation into loads from the 995/// global and uses of the resultant pointer. Further, delete the store into 996/// GV. This assumes that these value pass the 997/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate. 998static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, 999 GlobalVariable *GV) { 1000 while (!Alloc->use_empty()) { 1001 Instruction *U = cast<Instruction>(*Alloc->use_begin()); 1002 Instruction *InsertPt = U; 1003 if (StoreInst *SI = dyn_cast<StoreInst>(U)) { 1004 // If this is the store of the allocation into the global, remove it. 1005 if (SI->getOperand(1) == GV) { 1006 SI->eraseFromParent(); 1007 continue; 1008 } 1009 } else if (PHINode *PN = dyn_cast<PHINode>(U)) { 1010 // Insert the load in the corresponding predecessor, not right before the 1011 // PHI. 1012 InsertPt = PN->getIncomingBlock(Alloc->use_begin())->getTerminator(); 1013 } else if (isa<BitCastInst>(U)) { 1014 // Must be bitcast between the malloc and store to initialize the global. 1015 ReplaceUsesOfMallocWithGlobal(U, GV); 1016 U->eraseFromParent(); 1017 continue; 1018 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { 1019 // If this is a "GEP bitcast" and the user is a store to the global, then 1020 // just process it as a bitcast. 1021 if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse()) 1022 if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->use_back())) 1023 if (SI->getOperand(1) == GV) { 1024 // Must be bitcast GEP between the malloc and store to initialize 1025 // the global. 1026 ReplaceUsesOfMallocWithGlobal(GEPI, GV); 1027 GEPI->eraseFromParent(); 1028 continue; 1029 } 1030 } 1031 1032 // Insert a load from the global, and use it instead of the malloc. 1033 Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt); 1034 U->replaceUsesOfWith(Alloc, NL); 1035 } 1036} 1037 1038/// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi 1039/// of a load) are simple enough to perform heap SRA on. This permits GEP's 1040/// that index through the array and struct field, icmps of null, and PHIs. 1041static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, 1042 SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs, 1043 SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) { 1044 // We permit two users of the load: setcc comparing against the null 1045 // pointer, and a getelementptr of a specific form. 1046 for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; 1047 ++UI) { 1048 const Instruction *User = cast<Instruction>(*UI); 1049 1050 // Comparison against null is ok. 1051 if (const ICmpInst *ICI = dyn_cast<ICmpInst>(User)) { 1052 if (!isa<ConstantPointerNull>(ICI->getOperand(1))) 1053 return false; 1054 continue; 1055 } 1056 1057 // getelementptr is also ok, but only a simple form. 1058 if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { 1059 // Must index into the array and into the struct. 1060 if (GEPI->getNumOperands() < 3) 1061 return false; 1062 1063 // Otherwise the GEP is ok. 1064 continue; 1065 } 1066 1067 if (const PHINode *PN = dyn_cast<PHINode>(User)) { 1068 if (!LoadUsingPHIsPerLoad.insert(PN)) 1069 // This means some phi nodes are dependent on each other. 1070 // Avoid infinite looping! 1071 return false; 1072 if (!LoadUsingPHIs.insert(PN)) 1073 // If we have already analyzed this PHI, then it is safe. 1074 continue; 1075 1076 // Make sure all uses of the PHI are simple enough to transform. 1077 if (!LoadUsesSimpleEnoughForHeapSRA(PN, 1078 LoadUsingPHIs, LoadUsingPHIsPerLoad)) 1079 return false; 1080 1081 continue; 1082 } 1083 1084 // Otherwise we don't know what this is, not ok. 1085 return false; 1086 } 1087 1088 return true; 1089} 1090 1091 1092/// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from 1093/// GV are simple enough to perform HeapSRA, return true. 1094static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV, 1095 Instruction *StoredVal) { 1096 SmallPtrSet<const PHINode*, 32> LoadUsingPHIs; 1097 SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad; 1098 for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); 1099 UI != E; ++UI) 1100 if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) { 1101 if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs, 1102 LoadUsingPHIsPerLoad)) 1103 return false; 1104 LoadUsingPHIsPerLoad.clear(); 1105 } 1106 1107 // If we reach here, we know that all uses of the loads and transitive uses 1108 // (through PHI nodes) are simple enough to transform. However, we don't know 1109 // that all inputs the to the PHI nodes are in the same equivalence sets. 1110 // Check to verify that all operands of the PHIs are either PHIS that can be 1111 // transformed, loads from GV, or MI itself. 1112 for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin() 1113 , E = LoadUsingPHIs.end(); I != E; ++I) { 1114 const PHINode *PN = *I; 1115 for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) { 1116 Value *InVal = PN->getIncomingValue(op); 1117 1118 // PHI of the stored value itself is ok. 1119 if (InVal == StoredVal) continue; 1120 1121 if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) { 1122 // One of the PHIs in our set is (optimistically) ok. 1123 if (LoadUsingPHIs.count(InPN)) 1124 continue; 1125 return false; 1126 } 1127 1128 // Load from GV is ok. 1129 if (const LoadInst *LI = dyn_cast<LoadInst>(InVal)) 1130 if (LI->getOperand(0) == GV) 1131 continue; 1132 1133 // UNDEF? NULL? 1134 1135 // Anything else is rejected. 1136 return false; 1137 } 1138 } 1139 1140 return true; 1141} 1142 1143static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, 1144 DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, 1145 std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { 1146 std::vector<Value*> &FieldVals = InsertedScalarizedValues[V]; 1147 1148 if (FieldNo >= FieldVals.size()) 1149 FieldVals.resize(FieldNo+1); 1150 1151 // If we already have this value, just reuse the previously scalarized 1152 // version. 1153 if (Value *FieldVal = FieldVals[FieldNo]) 1154 return FieldVal; 1155 1156 // Depending on what instruction this is, we have several cases. 1157 Value *Result; 1158 if (LoadInst *LI = dyn_cast<LoadInst>(V)) { 1159 // This is a scalarized version of the load from the global. Just create 1160 // a new Load of the scalarized global. 1161 Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo, 1162 InsertedScalarizedValues, 1163 PHIsToRewrite), 1164 LI->getName()+".f"+Twine(FieldNo), LI); 1165 } else if (PHINode *PN = dyn_cast<PHINode>(V)) { 1166 // PN's type is pointer to struct. Make a new PHI of pointer to struct 1167 // field. 1168 StructType *ST = 1169 cast<StructType>(cast<PointerType>(PN->getType())->getElementType()); 1170 1171 PHINode *NewPN = 1172 PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)), 1173 PN->getNumIncomingValues(), 1174 PN->getName()+".f"+Twine(FieldNo), PN); 1175 Result = NewPN; 1176 PHIsToRewrite.push_back(std::make_pair(PN, FieldNo)); 1177 } else { 1178 llvm_unreachable("Unknown usable value"); 1179 } 1180 1181 return FieldVals[FieldNo] = Result; 1182} 1183 1184/// RewriteHeapSROALoadUser - Given a load instruction and a value derived from 1185/// the load, rewrite the derived value to use the HeapSRoA'd load. 1186static void RewriteHeapSROALoadUser(Instruction *LoadUser, 1187 DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, 1188 std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { 1189 // If this is a comparison against null, handle it. 1190 if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) { 1191 assert(isa<ConstantPointerNull>(SCI->getOperand(1))); 1192 // If we have a setcc of the loaded pointer, we can use a setcc of any 1193 // field. 1194 Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0, 1195 InsertedScalarizedValues, PHIsToRewrite); 1196 1197 Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr, 1198 Constant::getNullValue(NPtr->getType()), 1199 SCI->getName()); 1200 SCI->replaceAllUsesWith(New); 1201 SCI->eraseFromParent(); 1202 return; 1203 } 1204 1205 // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...' 1206 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) { 1207 assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2)) 1208 && "Unexpected GEPI!"); 1209 1210 // Load the pointer for this field. 1211 unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue(); 1212 Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo, 1213 InsertedScalarizedValues, PHIsToRewrite); 1214 1215 // Create the new GEP idx vector. 1216 SmallVector<Value*, 8> GEPIdx; 1217 GEPIdx.push_back(GEPI->getOperand(1)); 1218 GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end()); 1219 1220 Value *NGEPI = GetElementPtrInst::Create(NewPtr, GEPIdx, 1221 GEPI->getName(), GEPI); 1222 GEPI->replaceAllUsesWith(NGEPI); 1223 GEPI->eraseFromParent(); 1224 return; 1225 } 1226 1227 // Recursively transform the users of PHI nodes. This will lazily create the 1228 // PHIs that are needed for individual elements. Keep track of what PHIs we 1229 // see in InsertedScalarizedValues so that we don't get infinite loops (very 1230 // antisocial). If the PHI is already in InsertedScalarizedValues, it has 1231 // already been seen first by another load, so its uses have already been 1232 // processed. 1233 PHINode *PN = cast<PHINode>(LoadUser); 1234 if (!InsertedScalarizedValues.insert(std::make_pair(PN, 1235 std::vector<Value*>())).second) 1236 return; 1237 1238 // If this is the first time we've seen this PHI, recursively process all 1239 // users. 1240 for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) { 1241 Instruction *User = cast<Instruction>(*UI++); 1242 RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); 1243 } 1244} 1245 1246/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global. Ptr 1247/// is a value loaded from the global. Eliminate all uses of Ptr, making them 1248/// use FieldGlobals instead. All uses of loaded values satisfy 1249/// AllGlobalLoadUsesSimpleEnoughForHeapSRA. 1250static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, 1251 DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, 1252 std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { 1253 for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end(); 1254 UI != E; ) { 1255 Instruction *User = cast<Instruction>(*UI++); 1256 RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); 1257 } 1258 1259 if (Load->use_empty()) { 1260 Load->eraseFromParent(); 1261 InsertedScalarizedValues.erase(Load); 1262 } 1263} 1264 1265/// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break 1266/// it up into multiple allocations of arrays of the fields. 1267static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, 1268 Value *NElems, TargetData *TD) { 1269 DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n'); 1270 Type *MAT = getMallocAllocatedType(CI); 1271 StructType *STy = cast<StructType>(MAT); 1272 1273 // There is guaranteed to be at least one use of the malloc (storing 1274 // it into GV). If there are other uses, change them to be uses of 1275 // the global to simplify later code. This also deletes the store 1276 // into GV. 1277 ReplaceUsesOfMallocWithGlobal(CI, GV); 1278 1279 // Okay, at this point, there are no users of the malloc. Insert N 1280 // new mallocs at the same place as CI, and N globals. 1281 std::vector<Value*> FieldGlobals; 1282 std::vector<Value*> FieldMallocs; 1283 1284 for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ 1285 Type *FieldTy = STy->getElementType(FieldNo); 1286 PointerType *PFieldTy = PointerType::getUnqual(FieldTy); 1287 1288 GlobalVariable *NGV = 1289 new GlobalVariable(*GV->getParent(), 1290 PFieldTy, false, GlobalValue::InternalLinkage, 1291 Constant::getNullValue(PFieldTy), 1292 GV->getName() + ".f" + Twine(FieldNo), GV, 1293 GV->isThreadLocal()); 1294 FieldGlobals.push_back(NGV); 1295 1296 unsigned TypeSize = TD->getTypeAllocSize(FieldTy); 1297 if (StructType *ST = dyn_cast<StructType>(FieldTy)) 1298 TypeSize = TD->getStructLayout(ST)->getSizeInBytes(); 1299 Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); 1300 Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, 1301 ConstantInt::get(IntPtrTy, TypeSize), 1302 NElems, 0, 1303 CI->getName() + ".f" + Twine(FieldNo)); 1304 FieldMallocs.push_back(NMI); 1305 new StoreInst(NMI, NGV, CI); 1306 } 1307 1308 // The tricky aspect of this transformation is handling the case when malloc 1309 // fails. In the original code, malloc failing would set the result pointer 1310 // of malloc to null. In this case, some mallocs could succeed and others 1311 // could fail. As such, we emit code that looks like this: 1312 // F0 = malloc(field0) 1313 // F1 = malloc(field1) 1314 // F2 = malloc(field2) 1315 // if (F0 == 0 || F1 == 0 || F2 == 0) { 1316 // if (F0) { free(F0); F0 = 0; } 1317 // if (F1) { free(F1); F1 = 0; } 1318 // if (F2) { free(F2); F2 = 0; } 1319 // } 1320 // The malloc can also fail if its argument is too large. 1321 Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0); 1322 Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0), 1323 ConstantZero, "isneg"); 1324 for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { 1325 Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i], 1326 Constant::getNullValue(FieldMallocs[i]->getType()), 1327 "isnull"); 1328 RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI); 1329 } 1330 1331 // Split the basic block at the old malloc. 1332 BasicBlock *OrigBB = CI->getParent(); 1333 BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont"); 1334 1335 // Create the block to check the first condition. Put all these blocks at the 1336 // end of the function as they are unlikely to be executed. 1337 BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(), 1338 "malloc_ret_null", 1339 OrigBB->getParent()); 1340 1341 // Remove the uncond branch from OrigBB to ContBB, turning it into a cond 1342 // branch on RunningOr. 1343 OrigBB->getTerminator()->eraseFromParent(); 1344 BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB); 1345 1346 // Within the NullPtrBlock, we need to emit a comparison and branch for each 1347 // pointer, because some may be null while others are not. 1348 for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { 1349 Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); 1350 Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, 1351 Constant::getNullValue(GVVal->getType())); 1352 BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it", 1353 OrigBB->getParent()); 1354 BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next", 1355 OrigBB->getParent()); 1356 Instruction *BI = BranchInst::Create(FreeBlock, NextBlock, 1357 Cmp, NullPtrBlock); 1358 1359 // Fill in FreeBlock. 1360 CallInst::CreateFree(GVVal, BI); 1361 new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i], 1362 FreeBlock); 1363 BranchInst::Create(NextBlock, FreeBlock); 1364 1365 NullPtrBlock = NextBlock; 1366 } 1367 1368 BranchInst::Create(ContBB, NullPtrBlock); 1369 1370 // CI is no longer needed, remove it. 1371 CI->eraseFromParent(); 1372 1373 /// InsertedScalarizedLoads - As we process loads, if we can't immediately 1374 /// update all uses of the load, keep track of what scalarized loads are 1375 /// inserted for a given load. 1376 DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues; 1377 InsertedScalarizedValues[GV] = FieldGlobals; 1378 1379 std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite; 1380 1381 // Okay, the malloc site is completely handled. All of the uses of GV are now 1382 // loads, and all uses of those loads are simple. Rewrite them to use loads 1383 // of the per-field globals instead. 1384 for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) { 1385 Instruction *User = cast<Instruction>(*UI++); 1386 1387 if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 1388 RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite); 1389 continue; 1390 } 1391 1392 // Must be a store of null. 1393 StoreInst *SI = cast<StoreInst>(User); 1394 assert(isa<ConstantPointerNull>(SI->getOperand(0)) && 1395 "Unexpected heap-sra user!"); 1396 1397 // Insert a store of null into each global. 1398 for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { 1399 PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType()); 1400 Constant *Null = Constant::getNullValue(PT->getElementType()); 1401 new StoreInst(Null, FieldGlobals[i], SI); 1402 } 1403 // Erase the original store. 1404 SI->eraseFromParent(); 1405 } 1406 1407 // While we have PHIs that are interesting to rewrite, do it. 1408 while (!PHIsToRewrite.empty()) { 1409 PHINode *PN = PHIsToRewrite.back().first; 1410 unsigned FieldNo = PHIsToRewrite.back().second; 1411 PHIsToRewrite.pop_back(); 1412 PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]); 1413 assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi"); 1414 1415 // Add all the incoming values. This can materialize more phis. 1416 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 1417 Value *InVal = PN->getIncomingValue(i); 1418 InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues, 1419 PHIsToRewrite); 1420 FieldPN->addIncoming(InVal, PN->getIncomingBlock(i)); 1421 } 1422 } 1423 1424 // Drop all inter-phi links and any loads that made it this far. 1425 for (DenseMap<Value*, std::vector<Value*> >::iterator 1426 I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); 1427 I != E; ++I) { 1428 if (PHINode *PN = dyn_cast<PHINode>(I->first)) 1429 PN->dropAllReferences(); 1430 else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) 1431 LI->dropAllReferences(); 1432 } 1433 1434 // Delete all the phis and loads now that inter-references are dead. 1435 for (DenseMap<Value*, std::vector<Value*> >::iterator 1436 I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); 1437 I != E; ++I) { 1438 if (PHINode *PN = dyn_cast<PHINode>(I->first)) 1439 PN->eraseFromParent(); 1440 else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) 1441 LI->eraseFromParent(); 1442 } 1443 1444 // The old global is now dead, remove it. 1445 GV->eraseFromParent(); 1446 1447 ++NumHeapSRA; 1448 return cast<GlobalVariable>(FieldGlobals[0]); 1449} 1450 1451/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a 1452/// pointer global variable with a single value stored it that is a malloc or 1453/// cast of malloc. 1454static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, 1455 CallInst *CI, 1456 Type *AllocTy, 1457 Module::global_iterator &GVI, 1458 TargetData *TD) { 1459 if (!TD) 1460 return false; 1461 1462 // If this is a malloc of an abstract type, don't touch it. 1463 if (!AllocTy->isSized()) 1464 return false; 1465 1466 // We can't optimize this global unless all uses of it are *known* to be 1467 // of the malloc value, not of the null initializer value (consider a use 1468 // that compares the global's value against zero to see if the malloc has 1469 // been reached). To do this, we check to see if all uses of the global 1470 // would trap if the global were null: this proves that they must all 1471 // happen after the malloc. 1472 if (!AllUsesOfLoadedValueWillTrapIfNull(GV)) 1473 return false; 1474 1475 // We can't optimize this if the malloc itself is used in a complex way, 1476 // for example, being stored into multiple globals. This allows the 1477 // malloc to be stored into the specified global, loaded icmp'd, and 1478 // GEP'd. These are all things we could transform to using the global 1479 // for. 1480 SmallPtrSet<const PHINode*, 8> PHIs; 1481 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs)) 1482 return false; 1483 1484 // If we have a global that is only initialized with a fixed size malloc, 1485 // transform the program to use global memory instead of malloc'd memory. 1486 // This eliminates dynamic allocation, avoids an indirection accessing the 1487 // data, and exposes the resultant global to further GlobalOpt. 1488 // We cannot optimize the malloc if we cannot determine malloc array size. 1489 Value *NElems = getMallocArraySize(CI, TD, true); 1490 if (!NElems) 1491 return false; 1492 1493 if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems)) 1494 // Restrict this transformation to only working on small allocations 1495 // (2048 bytes currently), as we don't want to introduce a 16M global or 1496 // something. 1497 if (NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { 1498 GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD); 1499 return true; 1500 } 1501 1502 // If the allocation is an array of structures, consider transforming this 1503 // into multiple malloc'd arrays, one for each field. This is basically 1504 // SRoA for malloc'd memory. 1505 1506 // If this is an allocation of a fixed size array of structs, analyze as a 1507 // variable size array. malloc [100 x struct],1 -> malloc struct, 100 1508 if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1)) 1509 if (ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) 1510 AllocTy = AT->getElementType(); 1511 1512 StructType *AllocSTy = dyn_cast<StructType>(AllocTy); 1513 if (!AllocSTy) 1514 return false; 1515 1516 // This the structure has an unreasonable number of fields, leave it 1517 // alone. 1518 if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && 1519 AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) { 1520 1521 // If this is a fixed size array, transform the Malloc to be an alloc of 1522 // structs. malloc [100 x struct],1 -> malloc struct, 100 1523 if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { 1524 Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); 1525 unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes(); 1526 Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); 1527 Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); 1528 Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, 1529 AllocSize, NumElements, 1530 0, CI->getName()); 1531 Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); 1532 CI->replaceAllUsesWith(Cast); 1533 CI->eraseFromParent(); 1534 CI = dyn_cast<BitCastInst>(Malloc) ? 1535 extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc); 1536 } 1537 1538 GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true), TD); 1539 return true; 1540 } 1541 1542 return false; 1543} 1544 1545// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge 1546// that only one value (besides its initializer) is ever stored to the global. 1547static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, 1548 Module::global_iterator &GVI, 1549 TargetData *TD) { 1550 // Ignore no-op GEPs and bitcasts. 1551 StoredOnceVal = StoredOnceVal->stripPointerCasts(); 1552 1553 // If we are dealing with a pointer global that is initialized to null and 1554 // only has one (non-null) value stored into it, then we can optimize any 1555 // users of the loaded value (often calls and loads) that would trap if the 1556 // value was null. 1557 if (GV->getInitializer()->getType()->isPointerTy() && 1558 GV->getInitializer()->isNullValue()) { 1559 if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) { 1560 if (GV->getInitializer()->getType() != SOVC->getType()) 1561 SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); 1562 1563 // Optimize away any trapping uses of the loaded value. 1564 if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC)) 1565 return true; 1566 } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) { 1567 Type *MallocType = getMallocAllocatedType(CI); 1568 if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, 1569 GVI, TD)) 1570 return true; 1571 } 1572 } 1573 1574 return false; 1575} 1576 1577/// TryToShrinkGlobalToBoolean - At this point, we have learned that the only 1578/// two values ever stored into GV are its initializer and OtherVal. See if we 1579/// can shrink the global into a boolean and select between the two values 1580/// whenever it is used. This exposes the values to other scalar optimizations. 1581static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { 1582 Type *GVElType = GV->getType()->getElementType(); 1583 1584 // If GVElType is already i1, it is already shrunk. If the type of the GV is 1585 // an FP value, pointer or vector, don't do this optimization because a select 1586 // between them is very expensive and unlikely to lead to later 1587 // simplification. In these cases, we typically end up with "cond ? v1 : v2" 1588 // where v1 and v2 both require constant pool loads, a big loss. 1589 if (GVElType == Type::getInt1Ty(GV->getContext()) || 1590 GVElType->isFloatingPointTy() || 1591 GVElType->isPointerTy() || GVElType->isVectorTy()) 1592 return false; 1593 1594 // Walk the use list of the global seeing if all the uses are load or store. 1595 // If there is anything else, bail out. 1596 for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){ 1597 User *U = *I; 1598 if (!isa<LoadInst>(U) && !isa<StoreInst>(U)) 1599 return false; 1600 } 1601 1602 DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV); 1603 1604 // Create the new global, initializing it to false. 1605 GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()), 1606 false, 1607 GlobalValue::InternalLinkage, 1608 ConstantInt::getFalse(GV->getContext()), 1609 GV->getName()+".b", 1610 GV->isThreadLocal()); 1611 GV->getParent()->getGlobalList().insert(GV, NewGV); 1612 1613 Constant *InitVal = GV->getInitializer(); 1614 assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) && 1615 "No reason to shrink to bool!"); 1616 1617 // If initialized to zero and storing one into the global, we can use a cast 1618 // instead of a select to synthesize the desired value. 1619 bool IsOneZero = false; 1620 if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) 1621 IsOneZero = InitVal->isNullValue() && CI->isOne(); 1622 1623 while (!GV->use_empty()) { 1624 Instruction *UI = cast<Instruction>(GV->use_back()); 1625 if (StoreInst *SI = dyn_cast<StoreInst>(UI)) { 1626 // Change the store into a boolean store. 1627 bool StoringOther = SI->getOperand(0) == OtherVal; 1628 // Only do this if we weren't storing a loaded value. 1629 Value *StoreVal; 1630 if (StoringOther || SI->getOperand(0) == InitVal) 1631 StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()), 1632 StoringOther); 1633 else { 1634 // Otherwise, we are storing a previously loaded copy. To do this, 1635 // change the copy from copying the original value to just copying the 1636 // bool. 1637 Instruction *StoredVal = cast<Instruction>(SI->getOperand(0)); 1638 1639 // If we've already replaced the input, StoredVal will be a cast or 1640 // select instruction. If not, it will be a load of the original 1641 // global. 1642 if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) { 1643 assert(LI->getOperand(0) == GV && "Not a copy!"); 1644 // Insert a new load, to preserve the saved value. 1645 StoreVal = new LoadInst(NewGV, LI->getName()+".b", LI); 1646 } else { 1647 assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) && 1648 "This is not a form that we understand!"); 1649 StoreVal = StoredVal->getOperand(0); 1650 assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!"); 1651 } 1652 } 1653 new StoreInst(StoreVal, NewGV, SI); 1654 } else { 1655 // Change the load into a load of bool then a select. 1656 LoadInst *LI = cast<LoadInst>(UI); 1657 LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", LI); 1658 Value *NSI; 1659 if (IsOneZero) 1660 NSI = new ZExtInst(NLI, LI->getType(), "", LI); 1661 else 1662 NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI); 1663 NSI->takeName(LI); 1664 LI->replaceAllUsesWith(NSI); 1665 } 1666 UI->eraseFromParent(); 1667 } 1668 1669 GV->eraseFromParent(); 1670 return true; 1671} 1672 1673 1674/// ProcessInternalGlobal - Analyze the specified global variable and optimize 1675/// it if possible. If we make a change, return true. 1676bool GlobalOpt::ProcessGlobal(GlobalVariable *GV, 1677 Module::global_iterator &GVI) { 1678 if (!GV->hasLocalLinkage()) 1679 return false; 1680 1681 // Do more involved optimizations if the global is internal. 1682 GV->removeDeadConstantUsers(); 1683 1684 if (GV->use_empty()) { 1685 DEBUG(dbgs() << "GLOBAL DEAD: " << *GV); 1686 GV->eraseFromParent(); 1687 ++NumDeleted; 1688 return true; 1689 } 1690 1691 SmallPtrSet<const PHINode*, 16> PHIUsers; 1692 GlobalStatus GS; 1693 1694 if (AnalyzeGlobal(GV, GS, PHIUsers)) 1695 return false; 1696 1697 if (!GS.isCompared && !GV->hasUnnamedAddr()) { 1698 GV->setUnnamedAddr(true); 1699 NumUnnamed++; 1700 } 1701 1702 if (GV->isConstant() || !GV->hasInitializer()) 1703 return false; 1704 1705 return ProcessInternalGlobal(GV, GVI, PHIUsers, GS); 1706} 1707 1708/// ProcessInternalGlobal - Analyze the specified global variable and optimize 1709/// it if possible. If we make a change, return true. 1710bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, 1711 Module::global_iterator &GVI, 1712 const SmallPtrSet<const PHINode*, 16> &PHIUsers, 1713 const GlobalStatus &GS) { 1714 // If this is a first class global and has only one accessing function 1715 // and this function is main (which we know is not recursive we can make 1716 // this global a local variable) we replace the global with a local alloca 1717 // in this function. 1718 // 1719 // NOTE: It doesn't make sense to promote non single-value types since we 1720 // are just replacing static memory to stack memory. 1721 // 1722 // If the global is in different address space, don't bring it to stack. 1723 if (!GS.HasMultipleAccessingFunctions && 1724 GS.AccessingFunction && !GS.HasNonInstructionUser && 1725 GV->getType()->getElementType()->isSingleValueType() && 1726 GS.AccessingFunction->getName() == "main" && 1727 GS.AccessingFunction->hasExternalLinkage() && 1728 GV->getType()->getAddressSpace() == 0) { 1729 DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV); 1730 Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction 1731 ->getEntryBlock().begin()); 1732 Type *ElemTy = GV->getType()->getElementType(); 1733 // FIXME: Pass Global's alignment when globals have alignment 1734 AllocaInst *Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI); 1735 if (!isa<UndefValue>(GV->getInitializer())) 1736 new StoreInst(GV->getInitializer(), Alloca, &FirstI); 1737 1738 GV->replaceAllUsesWith(Alloca); 1739 GV->eraseFromParent(); 1740 ++NumLocalized; 1741 return true; 1742 } 1743 1744 // If the global is never loaded (but may be stored to), it is dead. 1745 // Delete it now. 1746 if (!GS.isLoaded) { 1747 DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV); 1748 1749 // Delete any stores we can find to the global. We may not be able to 1750 // make it completely dead though. 1751 bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer()); 1752 1753 // If the global is dead now, delete it. 1754 if (GV->use_empty()) { 1755 GV->eraseFromParent(); 1756 ++NumDeleted; 1757 Changed = true; 1758 } 1759 return Changed; 1760 1761 } else if (GS.StoredType <= GlobalStatus::isInitializerStored) { 1762 DEBUG(dbgs() << "MARKING CONSTANT: " << *GV); 1763 GV->setConstant(true); 1764 1765 // Clean up any obviously simplifiable users now. 1766 CleanupConstantGlobalUsers(GV, GV->getInitializer()); 1767 1768 // If the global is dead now, just nuke it. 1769 if (GV->use_empty()) { 1770 DEBUG(dbgs() << " *** Marking constant allowed us to simplify " 1771 << "all users and delete global!\n"); 1772 GV->eraseFromParent(); 1773 ++NumDeleted; 1774 } 1775 1776 ++NumMarked; 1777 return true; 1778 } else if (!GV->getInitializer()->getType()->isSingleValueType()) { 1779 if (TargetData *TD = getAnalysisIfAvailable<TargetData>()) 1780 if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) { 1781 GVI = FirstNewGV; // Don't skip the newly produced globals! 1782 return true; 1783 } 1784 } else if (GS.StoredType == GlobalStatus::isStoredOnce) { 1785 // If the initial value for the global was an undef value, and if only 1786 // one other value was stored into it, we can just change the 1787 // initializer to be the stored value, then delete all stores to the 1788 // global. This allows us to mark it constant. 1789 if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) 1790 if (isa<UndefValue>(GV->getInitializer())) { 1791 // Change the initial value here. 1792 GV->setInitializer(SOVConstant); 1793 1794 // Clean up any obviously simplifiable users now. 1795 CleanupConstantGlobalUsers(GV, GV->getInitializer()); 1796 1797 if (GV->use_empty()) { 1798 DEBUG(dbgs() << " *** Substituting initializer allowed us to " 1799 << "simplify all users and delete global!\n"); 1800 GV->eraseFromParent(); 1801 ++NumDeleted; 1802 } else { 1803 GVI = GV; 1804 } 1805 ++NumSubstitute; 1806 return true; 1807 } 1808 1809 // Try to optimize globals based on the knowledge that only one value 1810 // (besides its initializer) is ever stored to the global. 1811 if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI, 1812 getAnalysisIfAvailable<TargetData>())) 1813 return true; 1814 1815 // Otherwise, if the global was not a boolean, we can shrink it to be a 1816 // boolean. 1817 if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) 1818 if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { 1819 ++NumShrunkToBool; 1820 return true; 1821 } 1822 } 1823 1824 return false; 1825} 1826 1827/// ChangeCalleesToFastCall - Walk all of the direct calls of the specified 1828/// function, changing them to FastCC. 1829static void ChangeCalleesToFastCall(Function *F) { 1830 for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){ 1831 CallSite User(cast<Instruction>(*UI)); 1832 User.setCallingConv(CallingConv::Fast); 1833 } 1834} 1835 1836static AttrListPtr StripNest(const AttrListPtr &Attrs) { 1837 for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) { 1838 if ((Attrs.getSlot(i).Attrs & Attribute::Nest) == 0) 1839 continue; 1840 1841 // There can be only one. 1842 return Attrs.removeAttr(Attrs.getSlot(i).Index, Attribute::Nest); 1843 } 1844 1845 return Attrs; 1846} 1847 1848static void RemoveNestAttribute(Function *F) { 1849 F->setAttributes(StripNest(F->getAttributes())); 1850 for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){ 1851 CallSite User(cast<Instruction>(*UI)); 1852 User.setAttributes(StripNest(User.getAttributes())); 1853 } 1854} 1855 1856bool GlobalOpt::OptimizeFunctions(Module &M) { 1857 bool Changed = false; 1858 // Optimize functions. 1859 for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) { 1860 Function *F = FI++; 1861 // Functions without names cannot be referenced outside this module. 1862 if (!F->hasName() && !F->isDeclaration()) 1863 F->setLinkage(GlobalValue::InternalLinkage); 1864 F->removeDeadConstantUsers(); 1865 if (F->isDefTriviallyDead()) { 1866 F->eraseFromParent(); 1867 Changed = true; 1868 ++NumFnDeleted; 1869 } else if (F->hasLocalLinkage()) { 1870 if (F->getCallingConv() == CallingConv::C && !F->isVarArg() && 1871 !F->hasAddressTaken()) { 1872 // If this function has C calling conventions, is not a varargs 1873 // function, and is only called directly, promote it to use the Fast 1874 // calling convention. 1875 F->setCallingConv(CallingConv::Fast); 1876 ChangeCalleesToFastCall(F); 1877 ++NumFastCallFns; 1878 Changed = true; 1879 } 1880 1881 if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) && 1882 !F->hasAddressTaken()) { 1883 // The function is not used by a trampoline intrinsic, so it is safe 1884 // to remove the 'nest' attribute. 1885 RemoveNestAttribute(F); 1886 ++NumNestRemoved; 1887 Changed = true; 1888 } 1889 } 1890 } 1891 return Changed; 1892} 1893 1894bool GlobalOpt::OptimizeGlobalVars(Module &M) { 1895 bool Changed = false; 1896 for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); 1897 GVI != E; ) { 1898 GlobalVariable *GV = GVI++; 1899 // Global variables without names cannot be referenced outside this module. 1900 if (!GV->hasName() && !GV->isDeclaration()) 1901 GV->setLinkage(GlobalValue::InternalLinkage); 1902 // Simplify the initializer. 1903 if (GV->hasInitializer()) 1904 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) { 1905 TargetData *TD = getAnalysisIfAvailable<TargetData>(); 1906 TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); 1907 Constant *New = ConstantFoldConstantExpression(CE, TD, TLI); 1908 if (New && New != CE) 1909 GV->setInitializer(New); 1910 } 1911 1912 Changed |= ProcessGlobal(GV, GVI); 1913 } 1914 return Changed; 1915} 1916 1917/// FindGlobalCtors - Find the llvm.global_ctors list, verifying that all 1918/// initializers have an init priority of 65535. 1919GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { 1920 GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); 1921 if (GV == 0) return 0; 1922 1923 // Verify that the initializer is simple enough for us to handle. We are 1924 // only allowed to optimize the initializer if it is unique. 1925 if (!GV->hasUniqueInitializer()) return 0; 1926 1927 if (isa<ConstantAggregateZero>(GV->getInitializer())) 1928 return GV; 1929 ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); 1930 1931 for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) { 1932 if (isa<ConstantAggregateZero>(*i)) 1933 continue; 1934 ConstantStruct *CS = cast<ConstantStruct>(*i); 1935 if (isa<ConstantPointerNull>(CS->getOperand(1))) 1936 continue; 1937 1938 // Must have a function or null ptr. 1939 if (!isa<Function>(CS->getOperand(1))) 1940 return 0; 1941 1942 // Init priority must be standard. 1943 ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0)); 1944 if (CI->getZExtValue() != 65535) 1945 return 0; 1946 } 1947 1948 return GV; 1949} 1950 1951/// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand, 1952/// return a list of the functions and null terminator as a vector. 1953static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) { 1954 if (GV->getInitializer()->isNullValue()) 1955 return std::vector<Function*>(); 1956 ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); 1957 std::vector<Function*> Result; 1958 Result.reserve(CA->getNumOperands()); 1959 for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) { 1960 ConstantStruct *CS = cast<ConstantStruct>(*i); 1961 Result.push_back(dyn_cast<Function>(CS->getOperand(1))); 1962 } 1963 return Result; 1964} 1965 1966/// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the 1967/// specified array, returning the new global to use. 1968static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, 1969 const std::vector<Function*> &Ctors) { 1970 // If we made a change, reassemble the initializer list. 1971 Constant *CSVals[2]; 1972 CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 65535); 1973 CSVals[1] = 0; 1974 1975 StructType *StructTy = 1976 cast <StructType>( 1977 cast<ArrayType>(GCL->getType()->getElementType())->getElementType()); 1978 1979 // Create the new init list. 1980 std::vector<Constant*> CAList; 1981 for (unsigned i = 0, e = Ctors.size(); i != e; ++i) { 1982 if (Ctors[i]) { 1983 CSVals[1] = Ctors[i]; 1984 } else { 1985 Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()), 1986 false); 1987 PointerType *PFTy = PointerType::getUnqual(FTy); 1988 CSVals[1] = Constant::getNullValue(PFTy); 1989 CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 1990 0x7fffffff); 1991 } 1992 CAList.push_back(ConstantStruct::get(StructTy, CSVals)); 1993 } 1994 1995 // Create the array initializer. 1996 Constant *CA = ConstantArray::get(ArrayType::get(StructTy, 1997 CAList.size()), CAList); 1998 1999 // If we didn't change the number of elements, don't create a new GV. 2000 if (CA->getType() == GCL->getInitializer()->getType()) { 2001 GCL->setInitializer(CA); 2002 return GCL; 2003 } 2004 2005 // Create the new global and insert it next to the existing list. 2006 GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(), 2007 GCL->getLinkage(), CA, "", 2008 GCL->isThreadLocal()); 2009 GCL->getParent()->getGlobalList().insert(GCL, NGV); 2010 NGV->takeName(GCL); 2011 2012 // Nuke the old list, replacing any uses with the new one. 2013 if (!GCL->use_empty()) { 2014 Constant *V = NGV; 2015 if (V->getType() != GCL->getType()) 2016 V = ConstantExpr::getBitCast(V, GCL->getType()); 2017 GCL->replaceAllUsesWith(V); 2018 } 2019 GCL->eraseFromParent(); 2020 2021 if (Ctors.size()) 2022 return NGV; 2023 else 2024 return 0; 2025} 2026 2027 2028static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues, Value *V) { 2029 if (Constant *CV = dyn_cast<Constant>(V)) return CV; 2030 Constant *R = ComputedValues[V]; 2031 assert(R && "Reference to an uncomputed value!"); 2032 return R; 2033} 2034 2035static inline bool 2036isSimpleEnoughValueToCommit(Constant *C, 2037 SmallPtrSet<Constant*, 8> &SimpleConstants, 2038 const TargetData *TD); 2039 2040 2041/// isSimpleEnoughValueToCommit - Return true if the specified constant can be 2042/// handled by the code generator. We don't want to generate something like: 2043/// void *X = &X/42; 2044/// because the code generator doesn't have a relocation that can handle that. 2045/// 2046/// This function should be called if C was not found (but just got inserted) 2047/// in SimpleConstants to avoid having to rescan the same constants all the 2048/// time. 2049static bool isSimpleEnoughValueToCommitHelper(Constant *C, 2050 SmallPtrSet<Constant*, 8> &SimpleConstants, 2051 const TargetData *TD) { 2052 // Simple integer, undef, constant aggregate zero, global addresses, etc are 2053 // all supported. 2054 if (C->getNumOperands() == 0 || isa<BlockAddress>(C) || 2055 isa<GlobalValue>(C)) 2056 return true; 2057 2058 // Aggregate values are safe if all their elements are. 2059 if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) || 2060 isa<ConstantVector>(C)) { 2061 for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { 2062 Constant *Op = cast<Constant>(C->getOperand(i)); 2063 if (!isSimpleEnoughValueToCommit(Op, SimpleConstants, TD)) 2064 return false; 2065 } 2066 return true; 2067 } 2068 2069 // We don't know exactly what relocations are allowed in constant expressions, 2070 // so we allow &global+constantoffset, which is safe and uniformly supported 2071 // across targets. 2072 ConstantExpr *CE = cast<ConstantExpr>(C); 2073 switch (CE->getOpcode()) { 2074 case Instruction::BitCast: 2075 // Bitcast is fine if the casted value is fine. 2076 return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD); 2077 2078 case Instruction::IntToPtr: 2079 case Instruction::PtrToInt: 2080 // int <=> ptr is fine if the int type is the same size as the 2081 // pointer type. 2082 if (!TD || TD->getTypeSizeInBits(CE->getType()) != 2083 TD->getTypeSizeInBits(CE->getOperand(0)->getType())) 2084 return false; 2085 return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD); 2086 2087 // GEP is fine if it is simple + constant offset. 2088 case Instruction::GetElementPtr: 2089 for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i) 2090 if (!isa<ConstantInt>(CE->getOperand(i))) 2091 return false; 2092 return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD); 2093 2094 case Instruction::Add: 2095 // We allow simple+cst. 2096 if (!isa<ConstantInt>(CE->getOperand(1))) 2097 return false; 2098 return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD); 2099 } 2100 return false; 2101} 2102 2103static inline bool 2104isSimpleEnoughValueToCommit(Constant *C, 2105 SmallPtrSet<Constant*, 8> &SimpleConstants, 2106 const TargetData *TD) { 2107 // If we already checked this constant, we win. 2108 if (!SimpleConstants.insert(C)) return true; 2109 // Check the constant. 2110 return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, TD); 2111} 2112 2113 2114/// isSimpleEnoughPointerToCommit - Return true if this constant is simple 2115/// enough for us to understand. In particular, if it is a cast to anything 2116/// other than from one pointer type to another pointer type, we punt. 2117/// We basically just support direct accesses to globals and GEP's of 2118/// globals. This should be kept up to date with CommitValueTo. 2119static bool isSimpleEnoughPointerToCommit(Constant *C) { 2120 // Conservatively, avoid aggregate types. This is because we don't 2121 // want to worry about them partially overlapping other stores. 2122 if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType()) 2123 return false; 2124 2125 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) 2126 // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or 2127 // external globals. 2128 return GV->hasUniqueInitializer(); 2129 2130 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { 2131 // Handle a constantexpr gep. 2132 if (CE->getOpcode() == Instruction::GetElementPtr && 2133 isa<GlobalVariable>(CE->getOperand(0)) && 2134 cast<GEPOperator>(CE)->isInBounds()) { 2135 GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); 2136 // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or 2137 // external globals. 2138 if (!GV->hasUniqueInitializer()) 2139 return false; 2140 2141 // The first index must be zero. 2142 ConstantInt *CI = dyn_cast<ConstantInt>(*llvm::next(CE->op_begin())); 2143 if (!CI || !CI->isZero()) return false; 2144 2145 // The remaining indices must be compile-time known integers within the 2146 // notional bounds of the corresponding static array types. 2147 if (!CE->isGEPWithNoNotionalOverIndexing()) 2148 return false; 2149 2150 return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); 2151 2152 // A constantexpr bitcast from a pointer to another pointer is a no-op, 2153 // and we know how to evaluate it by moving the bitcast from the pointer 2154 // operand to the value operand. 2155 } else if (CE->getOpcode() == Instruction::BitCast && 2156 isa<GlobalVariable>(CE->getOperand(0))) { 2157 // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or 2158 // external globals. 2159 return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer(); 2160 } 2161 } 2162 2163 return false; 2164} 2165 2166/// EvaluateStoreInto - Evaluate a piece of a constantexpr store into a global 2167/// initializer. This returns 'Init' modified to reflect 'Val' stored into it. 2168/// At this point, the GEP operands of Addr [0, OpNo) have been stepped into. 2169static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, 2170 ConstantExpr *Addr, unsigned OpNo) { 2171 // Base case of the recursion. 2172 if (OpNo == Addr->getNumOperands()) { 2173 assert(Val->getType() == Init->getType() && "Type mismatch!"); 2174 return Val; 2175 } 2176 2177 SmallVector<Constant*, 32> Elts; 2178 if (StructType *STy = dyn_cast<StructType>(Init->getType())) { 2179 // Break up the constant into its elements. 2180 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) 2181 Elts.push_back(Init->getAggregateElement(i)); 2182 2183 // Replace the element that we are supposed to. 2184 ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo)); 2185 unsigned Idx = CU->getZExtValue(); 2186 assert(Idx < STy->getNumElements() && "Struct index out of range!"); 2187 Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1); 2188 2189 // Return the modified struct. 2190 return ConstantStruct::get(STy, Elts); 2191 } 2192 2193 ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo)); 2194 SequentialType *InitTy = cast<SequentialType>(Init->getType()); 2195 2196 uint64_t NumElts; 2197 if (ArrayType *ATy = dyn_cast<ArrayType>(InitTy)) 2198 NumElts = ATy->getNumElements(); 2199 else 2200 NumElts = InitTy->getVectorNumElements(); 2201 2202 // Break up the array into elements. 2203 for (uint64_t i = 0, e = NumElts; i != e; ++i) 2204 Elts.push_back(Init->getAggregateElement(i)); 2205 2206 assert(CI->getZExtValue() < NumElts); 2207 Elts[CI->getZExtValue()] = 2208 EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); 2209 2210 if (Init->getType()->isArrayTy()) 2211 return ConstantArray::get(cast<ArrayType>(InitTy), Elts); 2212 return ConstantVector::get(Elts); 2213} 2214 2215/// CommitValueTo - We have decided that Addr (which satisfies the predicate 2216/// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen. 2217static void CommitValueTo(Constant *Val, Constant *Addr) { 2218 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { 2219 assert(GV->hasInitializer()); 2220 GV->setInitializer(Val); 2221 return; 2222 } 2223 2224 ConstantExpr *CE = cast<ConstantExpr>(Addr); 2225 GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); 2226 GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2)); 2227} 2228 2229/// ComputeLoadResult - Return the value that would be computed by a load from 2230/// P after the stores reflected by 'memory' have been performed. If we can't 2231/// decide, return null. 2232static Constant *ComputeLoadResult(Constant *P, 2233 const DenseMap<Constant*, Constant*> &Memory) { 2234 // If this memory location has been recently stored, use the stored value: it 2235 // is the most up-to-date. 2236 DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P); 2237 if (I != Memory.end()) return I->second; 2238 2239 // Access it. 2240 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) { 2241 if (GV->hasDefinitiveInitializer()) 2242 return GV->getInitializer(); 2243 return 0; 2244 } 2245 2246 // Handle a constantexpr getelementptr. 2247 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P)) 2248 if (CE->getOpcode() == Instruction::GetElementPtr && 2249 isa<GlobalVariable>(CE->getOperand(0))) { 2250 GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); 2251 if (GV->hasDefinitiveInitializer()) 2252 return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); 2253 } 2254 2255 return 0; // don't know how to evaluate. 2256} 2257 2258/// EvaluateFunction - Evaluate a call to function F, returning true if 2259/// successful, false if we can't evaluate it. ActualArgs contains the formal 2260/// arguments for the function. 2261static bool EvaluateFunction(Function *F, Constant *&RetVal, 2262 const SmallVectorImpl<Constant*> &ActualArgs, 2263 std::vector<Function*> &CallStack, 2264 DenseMap<Constant*, Constant*> &MutatedMemory, 2265 std::vector<GlobalVariable*> &AllocaTmps, 2266 SmallPtrSet<Constant*, 8> &SimpleConstants, 2267 const TargetData *TD, 2268 const TargetLibraryInfo *TLI) { 2269 // Check to see if this function is already executing (recursion). If so, 2270 // bail out. TODO: we might want to accept limited recursion. 2271 if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end()) 2272 return false; 2273 2274 CallStack.push_back(F); 2275 2276 /// Values - As we compute SSA register values, we store their contents here. 2277 DenseMap<Value*, Constant*> Values; 2278 2279 // Initialize arguments to the incoming values specified. 2280 unsigned ArgNo = 0; 2281 for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; 2282 ++AI, ++ArgNo) 2283 Values[AI] = ActualArgs[ArgNo]; 2284 2285 /// ExecutedBlocks - We only handle non-looping, non-recursive code. As such, 2286 /// we can only evaluate any one basic block at most once. This set keeps 2287 /// track of what we have executed so we can detect recursive cases etc. 2288 SmallPtrSet<BasicBlock*, 32> ExecutedBlocks; 2289 2290 // CurInst - The current instruction we're evaluating. 2291 BasicBlock::iterator CurInst = F->begin()->begin(); 2292 2293 // This is the main evaluation loop. 2294 while (1) { 2295 Constant *InstResult = 0; 2296 2297 if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) { 2298 if (!SI->isSimple()) return false; // no volatile/atomic accesses. 2299 Constant *Ptr = getVal(Values, SI->getOperand(1)); 2300 if (!isSimpleEnoughPointerToCommit(Ptr)) 2301 // If this is too complex for us to commit, reject it. 2302 return false; 2303 2304 Constant *Val = getVal(Values, SI->getOperand(0)); 2305 2306 // If this might be too difficult for the backend to handle (e.g. the addr 2307 // of one global variable divided by another) then we can't commit it. 2308 if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, TD)) 2309 return false; 2310 2311 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) 2312 if (CE->getOpcode() == Instruction::BitCast) { 2313 // If we're evaluating a store through a bitcast, then we need 2314 // to pull the bitcast off the pointer type and push it onto the 2315 // stored value. 2316 Ptr = CE->getOperand(0); 2317 2318 Type *NewTy=cast<PointerType>(Ptr->getType())->getElementType(); 2319 2320 // In order to push the bitcast onto the stored value, a bitcast 2321 // from NewTy to Val's type must be legal. If it's not, we can try 2322 // introspecting NewTy to find a legal conversion. 2323 while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) { 2324 // If NewTy is a struct, we can convert the pointer to the struct 2325 // into a pointer to its first member. 2326 // FIXME: This could be extended to support arrays as well. 2327 if (StructType *STy = dyn_cast<StructType>(NewTy)) { 2328 NewTy = STy->getTypeAtIndex(0U); 2329 2330 IntegerType *IdxTy =IntegerType::get(NewTy->getContext(), 32); 2331 Constant *IdxZero = ConstantInt::get(IdxTy, 0, false); 2332 Constant * const IdxList[] = {IdxZero, IdxZero}; 2333 2334 Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList); 2335 2336 // If we can't improve the situation by introspecting NewTy, 2337 // we have to give up. 2338 } else { 2339 return 0; 2340 } 2341 } 2342 2343 // If we found compatible types, go ahead and push the bitcast 2344 // onto the stored value. 2345 Val = ConstantExpr::getBitCast(Val, NewTy); 2346 } 2347 2348 MutatedMemory[Ptr] = Val; 2349 } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) { 2350 InstResult = ConstantExpr::get(BO->getOpcode(), 2351 getVal(Values, BO->getOperand(0)), 2352 getVal(Values, BO->getOperand(1))); 2353 } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) { 2354 InstResult = ConstantExpr::getCompare(CI->getPredicate(), 2355 getVal(Values, CI->getOperand(0)), 2356 getVal(Values, CI->getOperand(1))); 2357 } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) { 2358 InstResult = ConstantExpr::getCast(CI->getOpcode(), 2359 getVal(Values, CI->getOperand(0)), 2360 CI->getType()); 2361 } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) { 2362 InstResult = ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)), 2363 getVal(Values, SI->getOperand(1)), 2364 getVal(Values, SI->getOperand(2))); 2365 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) { 2366 Constant *P = getVal(Values, GEP->getOperand(0)); 2367 SmallVector<Constant*, 8> GEPOps; 2368 for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); 2369 i != e; ++i) 2370 GEPOps.push_back(getVal(Values, *i)); 2371 InstResult = 2372 ConstantExpr::getGetElementPtr(P, GEPOps, 2373 cast<GEPOperator>(GEP)->isInBounds()); 2374 } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { 2375 if (!LI->isSimple()) return false; // no volatile/atomic accesses. 2376 InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)), 2377 MutatedMemory); 2378 if (InstResult == 0) return false; // Could not evaluate load. 2379 } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) { 2380 if (AI->isArrayAllocation()) return false; // Cannot handle array allocs. 2381 Type *Ty = AI->getType()->getElementType(); 2382 AllocaTmps.push_back(new GlobalVariable(Ty, false, 2383 GlobalValue::InternalLinkage, 2384 UndefValue::get(Ty), 2385 AI->getName())); 2386 InstResult = AllocaTmps.back(); 2387 } else if (CallInst *CI = dyn_cast<CallInst>(CurInst)) { 2388 2389 // Debug info can safely be ignored here. 2390 if (isa<DbgInfoIntrinsic>(CI)) { 2391 ++CurInst; 2392 continue; 2393 } 2394 2395 // Cannot handle inline asm. 2396 if (isa<InlineAsm>(CI->getCalledValue())) return false; 2397 2398 if (MemSetInst *MSI = dyn_cast<MemSetInst>(CI)) { 2399 if (MSI->isVolatile()) return false; 2400 Constant *Ptr = getVal(Values, MSI->getDest()); 2401 Constant *Val = getVal(Values, MSI->getValue()); 2402 Constant *DestVal = ComputeLoadResult(getVal(Values, Ptr), 2403 MutatedMemory); 2404 if (Val->isNullValue() && DestVal && DestVal->isNullValue()) { 2405 // This memset is a no-op. 2406 ++CurInst; 2407 continue; 2408 } 2409 return false; 2410 } 2411 2412 // Resolve function pointers. 2413 Function *Callee = dyn_cast<Function>(getVal(Values, 2414 CI->getCalledValue())); 2415 if (!Callee) return false; // Cannot resolve. 2416 2417 SmallVector<Constant*, 8> Formals; 2418 CallSite CS(CI); 2419 for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); 2420 i != e; ++i) 2421 Formals.push_back(getVal(Values, *i)); 2422 2423 if (Callee->isDeclaration()) { 2424 // If this is a function we can constant fold, do it. 2425 if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) { 2426 InstResult = C; 2427 } else { 2428 return false; 2429 } 2430 } else { 2431 if (Callee->getFunctionType()->isVarArg()) 2432 return false; 2433 2434 Constant *RetVal; 2435 // Execute the call, if successful, use the return value. 2436 if (!EvaluateFunction(Callee, RetVal, Formals, CallStack, 2437 MutatedMemory, AllocaTmps, SimpleConstants, TD, 2438 TLI)) 2439 return false; 2440 InstResult = RetVal; 2441 } 2442 } else if (isa<TerminatorInst>(CurInst)) { 2443 BasicBlock *NewBB = 0; 2444 if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) { 2445 if (BI->isUnconditional()) { 2446 NewBB = BI->getSuccessor(0); 2447 } else { 2448 ConstantInt *Cond = 2449 dyn_cast<ConstantInt>(getVal(Values, BI->getCondition())); 2450 if (!Cond) return false; // Cannot determine. 2451 2452 NewBB = BI->getSuccessor(!Cond->getZExtValue()); 2453 } 2454 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) { 2455 ConstantInt *Val = 2456 dyn_cast<ConstantInt>(getVal(Values, SI->getCondition())); 2457 if (!Val) return false; // Cannot determine. 2458 unsigned ValTISucc = SI->resolveSuccessorIndex(SI->findCaseValue(Val)); 2459 NewBB = SI->getSuccessor(ValTISucc); 2460 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) { 2461 Value *Val = getVal(Values, IBI->getAddress())->stripPointerCasts(); 2462 if (BlockAddress *BA = dyn_cast<BlockAddress>(Val)) 2463 NewBB = BA->getBasicBlock(); 2464 else 2465 return false; // Cannot determine. 2466 } else if (ReturnInst *RI = dyn_cast<ReturnInst>(CurInst)) { 2467 if (RI->getNumOperands()) 2468 RetVal = getVal(Values, RI->getOperand(0)); 2469 2470 CallStack.pop_back(); // return from fn. 2471 return true; // We succeeded at evaluating this ctor! 2472 } else { 2473 // invoke, unwind, resume, unreachable. 2474 return false; // Cannot handle this terminator. 2475 } 2476 2477 // Okay, we succeeded in evaluating this control flow. See if we have 2478 // executed the new block before. If so, we have a looping function, 2479 // which we cannot evaluate in reasonable time. 2480 if (!ExecutedBlocks.insert(NewBB)) 2481 return false; // looped! 2482 2483 // Okay, we have never been in this block before. Check to see if there 2484 // are any PHI nodes. If so, evaluate them with information about where 2485 // we came from. 2486 BasicBlock *OldBB = CurInst->getParent(); 2487 CurInst = NewBB->begin(); 2488 PHINode *PN; 2489 for (; (PN = dyn_cast<PHINode>(CurInst)); ++CurInst) 2490 Values[PN] = getVal(Values, PN->getIncomingValueForBlock(OldBB)); 2491 2492 // Do NOT increment CurInst. We know that the terminator had no value. 2493 continue; 2494 } else { 2495 // Did not know how to evaluate this! 2496 return false; 2497 } 2498 2499 if (!CurInst->use_empty()) { 2500 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult)) 2501 InstResult = ConstantFoldConstantExpression(CE, TD, TLI); 2502 2503 Values[CurInst] = InstResult; 2504 } 2505 2506 // Advance program counter. 2507 ++CurInst; 2508 } 2509} 2510 2511/// EvaluateStaticConstructor - Evaluate static constructors in the function, if 2512/// we can. Return true if we can, false otherwise. 2513static bool EvaluateStaticConstructor(Function *F, const TargetData *TD, 2514 const TargetLibraryInfo *TLI) { 2515 /// MutatedMemory - For each store we execute, we update this map. Loads 2516 /// check this to get the most up-to-date value. If evaluation is successful, 2517 /// this state is committed to the process. 2518 DenseMap<Constant*, Constant*> MutatedMemory; 2519 2520 /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable 2521 /// to represent its body. This vector is needed so we can delete the 2522 /// temporary globals when we are done. 2523 std::vector<GlobalVariable*> AllocaTmps; 2524 2525 /// CallStack - This is used to detect recursion. In pathological situations 2526 /// we could hit exponential behavior, but at least there is nothing 2527 /// unbounded. 2528 std::vector<Function*> CallStack; 2529 2530 /// SimpleConstants - These are constants we have checked and know to be 2531 /// simple enough to live in a static initializer of a global. 2532 SmallPtrSet<Constant*, 8> SimpleConstants; 2533 2534 // Call the function. 2535 Constant *RetValDummy; 2536 bool EvalSuccess = EvaluateFunction(F, RetValDummy, 2537 SmallVector<Constant*, 0>(), CallStack, 2538 MutatedMemory, AllocaTmps, 2539 SimpleConstants, TD, TLI); 2540 2541 if (EvalSuccess) { 2542 // We succeeded at evaluation: commit the result. 2543 DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" 2544 << F->getName() << "' to " << MutatedMemory.size() 2545 << " stores.\n"); 2546 for (DenseMap<Constant*, Constant*>::iterator I = MutatedMemory.begin(), 2547 E = MutatedMemory.end(); I != E; ++I) 2548 CommitValueTo(I->second, I->first); 2549 } 2550 2551 // At this point, we are done interpreting. If we created any 'alloca' 2552 // temporaries, release them now. 2553 while (!AllocaTmps.empty()) { 2554 GlobalVariable *Tmp = AllocaTmps.back(); 2555 AllocaTmps.pop_back(); 2556 2557 // If there are still users of the alloca, the program is doing something 2558 // silly, e.g. storing the address of the alloca somewhere and using it 2559 // later. Since this is undefined, we'll just make it be null. 2560 if (!Tmp->use_empty()) 2561 Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType())); 2562 delete Tmp; 2563 } 2564 2565 return EvalSuccess; 2566} 2567 2568/// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible. 2569/// Return true if anything changed. 2570bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { 2571 std::vector<Function*> Ctors = ParseGlobalCtors(GCL); 2572 bool MadeChange = false; 2573 if (Ctors.empty()) return false; 2574 2575 const TargetData *TD = getAnalysisIfAvailable<TargetData>(); 2576 const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); 2577 2578 // Loop over global ctors, optimizing them when we can. 2579 for (unsigned i = 0; i != Ctors.size(); ++i) { 2580 Function *F = Ctors[i]; 2581 // Found a null terminator in the middle of the list, prune off the rest of 2582 // the list. 2583 if (F == 0) { 2584 if (i != Ctors.size()-1) { 2585 Ctors.resize(i+1); 2586 MadeChange = true; 2587 } 2588 break; 2589 } 2590 2591 // We cannot simplify external ctor functions. 2592 if (F->empty()) continue; 2593 2594 // If we can evaluate the ctor at compile time, do. 2595 if (EvaluateStaticConstructor(F, TD, TLI)) { 2596 Ctors.erase(Ctors.begin()+i); 2597 MadeChange = true; 2598 --i; 2599 ++NumCtorsEvaluated; 2600 continue; 2601 } 2602 } 2603 2604 if (!MadeChange) return false; 2605 2606 GCL = InstallGlobalCtors(GCL, Ctors); 2607 return true; 2608} 2609 2610bool GlobalOpt::OptimizeGlobalAliases(Module &M) { 2611 bool Changed = false; 2612 2613 for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); 2614 I != E;) { 2615 Module::alias_iterator J = I++; 2616 // Aliases without names cannot be referenced outside this module. 2617 if (!J->hasName() && !J->isDeclaration()) 2618 J->setLinkage(GlobalValue::InternalLinkage); 2619 // If the aliasee may change at link time, nothing can be done - bail out. 2620 if (J->mayBeOverridden()) 2621 continue; 2622 2623 Constant *Aliasee = J->getAliasee(); 2624 GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts()); 2625 Target->removeDeadConstantUsers(); 2626 bool hasOneUse = Target->hasOneUse() && Aliasee->hasOneUse(); 2627 2628 // Make all users of the alias use the aliasee instead. 2629 if (!J->use_empty()) { 2630 J->replaceAllUsesWith(Aliasee); 2631 ++NumAliasesResolved; 2632 Changed = true; 2633 } 2634 2635 // If the alias is externally visible, we may still be able to simplify it. 2636 if (!J->hasLocalLinkage()) { 2637 // If the aliasee has internal linkage, give it the name and linkage 2638 // of the alias, and delete the alias. This turns: 2639 // define internal ... @f(...) 2640 // @a = alias ... @f 2641 // into: 2642 // define ... @a(...) 2643 if (!Target->hasLocalLinkage()) 2644 continue; 2645 2646 // Do not perform the transform if multiple aliases potentially target the 2647 // aliasee. This check also ensures that it is safe to replace the section 2648 // and other attributes of the aliasee with those of the alias. 2649 if (!hasOneUse) 2650 continue; 2651 2652 // Give the aliasee the name, linkage and other attributes of the alias. 2653 Target->takeName(J); 2654 Target->setLinkage(J->getLinkage()); 2655 Target->GlobalValue::copyAttributesFrom(J); 2656 } 2657 2658 // Delete the alias. 2659 M.getAliasList().erase(J); 2660 ++NumAliasesRemoved; 2661 Changed = true; 2662 } 2663 2664 return Changed; 2665} 2666 2667static Function *FindCXAAtExit(Module &M) { 2668 Function *Fn = M.getFunction("__cxa_atexit"); 2669 2670 if (!Fn) 2671 return 0; 2672 2673 FunctionType *FTy = Fn->getFunctionType(); 2674 2675 // Checking that the function has the right return type, the right number of 2676 // parameters and that they all have pointer types should be enough. 2677 if (!FTy->getReturnType()->isIntegerTy() || 2678 FTy->getNumParams() != 3 || 2679 !FTy->getParamType(0)->isPointerTy() || 2680 !FTy->getParamType(1)->isPointerTy() || 2681 !FTy->getParamType(2)->isPointerTy()) 2682 return 0; 2683 2684 return Fn; 2685} 2686 2687/// cxxDtorIsEmpty - Returns whether the given function is an empty C++ 2688/// destructor and can therefore be eliminated. 2689/// Note that we assume that other optimization passes have already simplified 2690/// the code so we only look for a function with a single basic block, where 2691/// the only allowed instructions are 'ret' or 'call' to empty C++ dtor. 2692static bool cxxDtorIsEmpty(const Function &Fn, 2693 SmallPtrSet<const Function *, 8> &CalledFunctions) { 2694 // FIXME: We could eliminate C++ destructors if they're readonly/readnone and 2695 // nounwind, but that doesn't seem worth doing. 2696 if (Fn.isDeclaration()) 2697 return false; 2698 2699 if (++Fn.begin() != Fn.end()) 2700 return false; 2701 2702 const BasicBlock &EntryBlock = Fn.getEntryBlock(); 2703 for (BasicBlock::const_iterator I = EntryBlock.begin(), E = EntryBlock.end(); 2704 I != E; ++I) { 2705 if (const CallInst *CI = dyn_cast<CallInst>(I)) { 2706 // Ignore debug intrinsics. 2707 if (isa<DbgInfoIntrinsic>(CI)) 2708 continue; 2709 2710 const Function *CalledFn = CI->getCalledFunction(); 2711 2712 if (!CalledFn) 2713 return false; 2714 2715 SmallPtrSet<const Function *, 8> NewCalledFunctions(CalledFunctions); 2716 2717 // Don't treat recursive functions as empty. 2718 if (!NewCalledFunctions.insert(CalledFn)) 2719 return false; 2720 2721 if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions)) 2722 return false; 2723 } else if (isa<ReturnInst>(*I)) 2724 return true; 2725 else 2726 return false; 2727 } 2728 2729 return false; 2730} 2731 2732bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { 2733 /// Itanium C++ ABI p3.3.5: 2734 /// 2735 /// After constructing a global (or local static) object, that will require 2736 /// destruction on exit, a termination function is registered as follows: 2737 /// 2738 /// extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d ); 2739 /// 2740 /// This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the 2741 /// call f(p) when DSO d is unloaded, before all such termination calls 2742 /// registered before this one. It returns zero if registration is 2743 /// successful, nonzero on failure. 2744 2745 // This pass will look for calls to __cxa_atexit where the function is trivial 2746 // and remove them. 2747 bool Changed = false; 2748 2749 for (Function::use_iterator I = CXAAtExitFn->use_begin(), 2750 E = CXAAtExitFn->use_end(); I != E;) { 2751 // We're only interested in calls. Theoretically, we could handle invoke 2752 // instructions as well, but neither llvm-gcc nor clang generate invokes 2753 // to __cxa_atexit. 2754 CallInst *CI = dyn_cast<CallInst>(*I++); 2755 if (!CI) 2756 continue; 2757 2758 Function *DtorFn = 2759 dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts()); 2760 if (!DtorFn) 2761 continue; 2762 2763 SmallPtrSet<const Function *, 8> CalledFunctions; 2764 if (!cxxDtorIsEmpty(*DtorFn, CalledFunctions)) 2765 continue; 2766 2767 // Just remove the call. 2768 CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); 2769 CI->eraseFromParent(); 2770 2771 ++NumCXXDtorsRemoved; 2772 2773 Changed |= true; 2774 } 2775 2776 return Changed; 2777} 2778 2779bool GlobalOpt::runOnModule(Module &M) { 2780 bool Changed = false; 2781 2782 // Try to find the llvm.globalctors list. 2783 GlobalVariable *GlobalCtors = FindGlobalCtors(M); 2784 2785 Function *CXAAtExitFn = FindCXAAtExit(M); 2786 2787 bool LocalChange = true; 2788 while (LocalChange) { 2789 LocalChange = false; 2790 2791 // Delete functions that are trivially dead, ccc -> fastcc 2792 LocalChange |= OptimizeFunctions(M); 2793 2794 // Optimize global_ctors list. 2795 if (GlobalCtors) 2796 LocalChange |= OptimizeGlobalCtorsList(GlobalCtors); 2797 2798 // Optimize non-address-taken globals. 2799 LocalChange |= OptimizeGlobalVars(M); 2800 2801 // Resolve aliases, when possible. 2802 LocalChange |= OptimizeGlobalAliases(M); 2803 2804 // Try to remove trivial global destructors. 2805 if (CXAAtExitFn) 2806 LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn); 2807 2808 Changed |= LocalChange; 2809 } 2810 2811 // TODO: Move all global ctors functions to the end of the module for code 2812 // layout. 2813 2814 return Changed; 2815} 2816