GlobalOpt.cpp revision 3302c000f59d2b6f9e4e4841e3b0bc953fb12005
1//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass transforms simple global variables that never have their address 11// taken. If obviously true, it marks read/write globals as constant, deletes 12// variables only stored to, etc. 13// 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "globalopt" 17#include "llvm/Transforms/IPO.h" 18#include "llvm/CallingConv.h" 19#include "llvm/Constants.h" 20#include "llvm/DerivedTypes.h" 21#include "llvm/Instructions.h" 22#include "llvm/IntrinsicInst.h" 23#include "llvm/Module.h" 24#include "llvm/Pass.h" 25#include "llvm/Analysis/ConstantFolding.h" 26#include "llvm/Analysis/MemoryBuiltins.h" 27#include "llvm/Target/TargetData.h" 28#include "llvm/Support/CallSite.h" 29#include "llvm/Support/Debug.h" 30#include "llvm/Support/ErrorHandling.h" 31#include "llvm/Support/GetElementPtrTypeIterator.h" 32#include "llvm/Support/MathExtras.h" 33#include "llvm/Support/raw_ostream.h" 34#include "llvm/ADT/DenseMap.h" 35#include "llvm/ADT/SmallPtrSet.h" 36#include "llvm/ADT/SmallVector.h" 37#include "llvm/ADT/Statistic.h" 38#include "llvm/ADT/STLExtras.h" 39#include <algorithm> 40using namespace llvm; 41 42STATISTIC(NumMarked , "Number of globals marked constant"); 43STATISTIC(NumSRA , "Number of aggregate globals broken into scalars"); 44STATISTIC(NumHeapSRA , "Number of heap objects SRA'd"); 45STATISTIC(NumSubstitute,"Number of globals with initializers stored into them"); 46STATISTIC(NumDeleted , "Number of globals deleted"); 47STATISTIC(NumFnDeleted , "Number of functions deleted"); 48STATISTIC(NumGlobUses , "Number of global uses devirtualized"); 49STATISTIC(NumLocalized , "Number of globals localized"); 50STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans"); 51STATISTIC(NumFastCallFns , "Number of functions converted to fastcc"); 52STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated"); 53STATISTIC(NumNestRemoved , "Number of nest attributes removed"); 54STATISTIC(NumAliasesResolved, "Number of global aliases resolved"); 55STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated"); 56 57namespace { 58 struct GlobalOpt : public ModulePass { 59 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 60 } 61 static char ID; // Pass identification, replacement for typeid 62 GlobalOpt() : ModulePass(ID) {} 63 64 bool runOnModule(Module &M); 65 66 private: 67 GlobalVariable *FindGlobalCtors(Module &M); 68 bool OptimizeFunctions(Module &M); 69 bool OptimizeGlobalVars(Module &M); 70 bool OptimizeGlobalAliases(Module &M); 71 bool OptimizeGlobalCtorsList(GlobalVariable *&GCL); 72 bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI); 73 }; 74} 75 76char GlobalOpt::ID = 0; 77INITIALIZE_PASS(GlobalOpt, "globalopt", 78 "Global Variable Optimizer", false, false) 79 80ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); } 81 82namespace { 83 84/// GlobalStatus - As we analyze each global, keep track of some information 85/// about it. If we find out that the address of the global is taken, none of 86/// this info will be accurate. 87struct GlobalStatus { 88 /// isLoaded - True if the global is ever loaded. If the global isn't ever 89 /// loaded it can be deleted. 90 bool isLoaded; 91 92 /// StoredType - Keep track of what stores to the global look like. 93 /// 94 enum StoredType { 95 /// NotStored - There is no store to this global. It can thus be marked 96 /// constant. 97 NotStored, 98 99 /// isInitializerStored - This global is stored to, but the only thing 100 /// stored is the constant it was initialized with. This is only tracked 101 /// for scalar globals. 102 isInitializerStored, 103 104 /// isStoredOnce - This global is stored to, but only its initializer and 105 /// one other value is ever stored to it. If this global isStoredOnce, we 106 /// track the value stored to it in StoredOnceValue below. This is only 107 /// tracked for scalar globals. 108 isStoredOnce, 109 110 /// isStored - This global is stored to by multiple values or something else 111 /// that we cannot track. 112 isStored 113 } StoredType; 114 115 /// StoredOnceValue - If only one value (besides the initializer constant) is 116 /// ever stored to this global, keep track of what value it is. 117 Value *StoredOnceValue; 118 119 /// AccessingFunction/HasMultipleAccessingFunctions - These start out 120 /// null/false. When the first accessing function is noticed, it is recorded. 121 /// When a second different accessing function is noticed, 122 /// HasMultipleAccessingFunctions is set to true. 123 const Function *AccessingFunction; 124 bool HasMultipleAccessingFunctions; 125 126 /// HasNonInstructionUser - Set to true if this global has a user that is not 127 /// an instruction (e.g. a constant expr or GV initializer). 128 bool HasNonInstructionUser; 129 130 /// HasPHIUser - Set to true if this global has a user that is a PHI node. 131 bool HasPHIUser; 132 133 GlobalStatus() : isLoaded(false), StoredType(NotStored), StoredOnceValue(0), 134 AccessingFunction(0), HasMultipleAccessingFunctions(false), 135 HasNonInstructionUser(false), HasPHIUser(false) {} 136}; 137 138} 139 140// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used 141// by constants itself. Note that constants cannot be cyclic, so this test is 142// pretty easy to implement recursively. 143// 144static bool SafeToDestroyConstant(const Constant *C) { 145 if (isa<GlobalValue>(C)) return false; 146 147 for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; 148 ++UI) 149 if (const Constant *CU = dyn_cast<Constant>(*UI)) { 150 if (!SafeToDestroyConstant(CU)) return false; 151 } else 152 return false; 153 return true; 154} 155 156 157/// AnalyzeGlobal - Look at all uses of the global and fill in the GlobalStatus 158/// structure. If the global has its address taken, return true to indicate we 159/// can't do anything with it. 160/// 161static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, 162 SmallPtrSet<const PHINode*, 16> &PHIUsers) { 163 for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; 164 ++UI) { 165 const User *U = *UI; 166 if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { 167 GS.HasNonInstructionUser = true; 168 if (AnalyzeGlobal(CE, GS, PHIUsers)) return true; 169 } else if (const Instruction *I = dyn_cast<Instruction>(U)) { 170 if (!GS.HasMultipleAccessingFunctions) { 171 const Function *F = I->getParent()->getParent(); 172 if (GS.AccessingFunction == 0) 173 GS.AccessingFunction = F; 174 else if (GS.AccessingFunction != F) 175 GS.HasMultipleAccessingFunctions = true; 176 } 177 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { 178 GS.isLoaded = true; 179 if (LI->isVolatile()) return true; // Don't hack on volatile loads. 180 } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) { 181 // Don't allow a store OF the address, only stores TO the address. 182 if (SI->getOperand(0) == V) return true; 183 184 if (SI->isVolatile()) return true; // Don't hack on volatile stores. 185 186 // If this is a direct store to the global (i.e., the global is a scalar 187 // value, not an aggregate), keep more specific information about 188 // stores. 189 if (GS.StoredType != GlobalStatus::isStored) { 190 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>( 191 SI->getOperand(1))) { 192 Value *StoredVal = SI->getOperand(0); 193 if (StoredVal == GV->getInitializer()) { 194 if (GS.StoredType < GlobalStatus::isInitializerStored) 195 GS.StoredType = GlobalStatus::isInitializerStored; 196 } else if (isa<LoadInst>(StoredVal) && 197 cast<LoadInst>(StoredVal)->getOperand(0) == GV) { 198 if (GS.StoredType < GlobalStatus::isInitializerStored) 199 GS.StoredType = GlobalStatus::isInitializerStored; 200 } else if (GS.StoredType < GlobalStatus::isStoredOnce) { 201 GS.StoredType = GlobalStatus::isStoredOnce; 202 GS.StoredOnceValue = StoredVal; 203 } else if (GS.StoredType == GlobalStatus::isStoredOnce && 204 GS.StoredOnceValue == StoredVal) { 205 // noop. 206 } else { 207 GS.StoredType = GlobalStatus::isStored; 208 } 209 } else { 210 GS.StoredType = GlobalStatus::isStored; 211 } 212 } 213 } else if (isa<GetElementPtrInst>(I)) { 214 if (AnalyzeGlobal(I, GS, PHIUsers)) return true; 215 } else if (isa<SelectInst>(I)) { 216 if (AnalyzeGlobal(I, GS, PHIUsers)) return true; 217 } else if (const PHINode *PN = dyn_cast<PHINode>(I)) { 218 // PHI nodes we can check just like select or GEP instructions, but we 219 // have to be careful about infinite recursion. 220 if (PHIUsers.insert(PN)) // Not already visited. 221 if (AnalyzeGlobal(I, GS, PHIUsers)) return true; 222 GS.HasPHIUser = true; 223 } else if (isa<CmpInst>(I)) { 224 // Nothing to analyse. 225 } else if (isa<MemTransferInst>(I)) { 226 const MemTransferInst *MTI = cast<MemTransferInst>(I); 227 if (MTI->getArgOperand(0) == V) 228 GS.StoredType = GlobalStatus::isStored; 229 if (MTI->getArgOperand(1) == V) 230 GS.isLoaded = true; 231 } else if (isa<MemSetInst>(I)) { 232 assert(cast<MemSetInst>(I)->getArgOperand(0) == V && 233 "Memset only takes one pointer!"); 234 GS.StoredType = GlobalStatus::isStored; 235 } else { 236 return true; // Any other non-load instruction might take address! 237 } 238 } else if (const Constant *C = dyn_cast<Constant>(U)) { 239 GS.HasNonInstructionUser = true; 240 // We might have a dead and dangling constant hanging off of here. 241 if (!SafeToDestroyConstant(C)) 242 return true; 243 } else { 244 GS.HasNonInstructionUser = true; 245 // Otherwise must be some other user. 246 return true; 247 } 248 } 249 250 return false; 251} 252 253static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) { 254 ConstantInt *CI = dyn_cast<ConstantInt>(Idx); 255 if (!CI) return 0; 256 unsigned IdxV = CI->getZExtValue(); 257 258 if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Agg)) { 259 if (IdxV < CS->getNumOperands()) return CS->getOperand(IdxV); 260 } else if (ConstantArray *CA = dyn_cast<ConstantArray>(Agg)) { 261 if (IdxV < CA->getNumOperands()) return CA->getOperand(IdxV); 262 } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Agg)) { 263 if (IdxV < CP->getNumOperands()) return CP->getOperand(IdxV); 264 } else if (isa<ConstantAggregateZero>(Agg)) { 265 if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) { 266 if (IdxV < STy->getNumElements()) 267 return Constant::getNullValue(STy->getElementType(IdxV)); 268 } else if (const SequentialType *STy = 269 dyn_cast<SequentialType>(Agg->getType())) { 270 return Constant::getNullValue(STy->getElementType()); 271 } 272 } else if (isa<UndefValue>(Agg)) { 273 if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) { 274 if (IdxV < STy->getNumElements()) 275 return UndefValue::get(STy->getElementType(IdxV)); 276 } else if (const SequentialType *STy = 277 dyn_cast<SequentialType>(Agg->getType())) { 278 return UndefValue::get(STy->getElementType()); 279 } 280 } 281 return 0; 282} 283 284 285/// CleanupConstantGlobalUsers - We just marked GV constant. Loop over all 286/// users of the global, cleaning up the obvious ones. This is largely just a 287/// quick scan over the use list to clean up the easy and obvious cruft. This 288/// returns true if it made a change. 289static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { 290 bool Changed = false; 291 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) { 292 User *U = *UI++; 293 294 if (LoadInst *LI = dyn_cast<LoadInst>(U)) { 295 if (Init) { 296 // Replace the load with the initializer. 297 LI->replaceAllUsesWith(Init); 298 LI->eraseFromParent(); 299 Changed = true; 300 } 301 } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { 302 // Store must be unreachable or storing Init into the global. 303 SI->eraseFromParent(); 304 Changed = true; 305 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { 306 if (CE->getOpcode() == Instruction::GetElementPtr) { 307 Constant *SubInit = 0; 308 if (Init) 309 SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); 310 Changed |= CleanupConstantGlobalUsers(CE, SubInit); 311 } else if (CE->getOpcode() == Instruction::BitCast && 312 CE->getType()->isPointerTy()) { 313 // Pointer cast, delete any stores and memsets to the global. 314 Changed |= CleanupConstantGlobalUsers(CE, 0); 315 } 316 317 if (CE->use_empty()) { 318 CE->destroyConstant(); 319 Changed = true; 320 } 321 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { 322 // Do not transform "gepinst (gep constexpr (GV))" here, because forming 323 // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold 324 // and will invalidate our notion of what Init is. 325 Constant *SubInit = 0; 326 if (!isa<ConstantExpr>(GEP->getOperand(0))) { 327 ConstantExpr *CE = 328 dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP)); 329 if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) 330 SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); 331 } 332 Changed |= CleanupConstantGlobalUsers(GEP, SubInit); 333 334 if (GEP->use_empty()) { 335 GEP->eraseFromParent(); 336 Changed = true; 337 } 338 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv 339 if (MI->getRawDest() == V) { 340 MI->eraseFromParent(); 341 Changed = true; 342 } 343 344 } else if (Constant *C = dyn_cast<Constant>(U)) { 345 // If we have a chain of dead constantexprs or other things dangling from 346 // us, and if they are all dead, nuke them without remorse. 347 if (SafeToDestroyConstant(C)) { 348 C->destroyConstant(); 349 // This could have invalidated UI, start over from scratch. 350 CleanupConstantGlobalUsers(V, Init); 351 return true; 352 } 353 } 354 } 355 return Changed; 356} 357 358/// isSafeSROAElementUse - Return true if the specified instruction is a safe 359/// user of a derived expression from a global that we want to SROA. 360static bool isSafeSROAElementUse(Value *V) { 361 // We might have a dead and dangling constant hanging off of here. 362 if (Constant *C = dyn_cast<Constant>(V)) 363 return SafeToDestroyConstant(C); 364 365 Instruction *I = dyn_cast<Instruction>(V); 366 if (!I) return false; 367 368 // Loads are ok. 369 if (isa<LoadInst>(I)) return true; 370 371 // Stores *to* the pointer are ok. 372 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 373 return SI->getOperand(0) != V; 374 375 // Otherwise, it must be a GEP. 376 GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I); 377 if (GEPI == 0) return false; 378 379 if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) || 380 !cast<Constant>(GEPI->getOperand(1))->isNullValue()) 381 return false; 382 383 for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end(); 384 I != E; ++I) 385 if (!isSafeSROAElementUse(*I)) 386 return false; 387 return true; 388} 389 390 391/// IsUserOfGlobalSafeForSRA - U is a direct user of the specified global value. 392/// Look at it and its uses and decide whether it is safe to SROA this global. 393/// 394static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { 395 // The user of the global must be a GEP Inst or a ConstantExpr GEP. 396 if (!isa<GetElementPtrInst>(U) && 397 (!isa<ConstantExpr>(U) || 398 cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr)) 399 return false; 400 401 // Check to see if this ConstantExpr GEP is SRA'able. In particular, we 402 // don't like < 3 operand CE's, and we don't like non-constant integer 403 // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some 404 // value of C. 405 if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) || 406 !cast<Constant>(U->getOperand(1))->isNullValue() || 407 !isa<ConstantInt>(U->getOperand(2))) 408 return false; 409 410 gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U); 411 ++GEPI; // Skip over the pointer index. 412 413 // If this is a use of an array allocation, do a bit more checking for sanity. 414 if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) { 415 uint64_t NumElements = AT->getNumElements(); 416 ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2)); 417 418 // Check to make sure that index falls within the array. If not, 419 // something funny is going on, so we won't do the optimization. 420 // 421 if (Idx->getZExtValue() >= NumElements) 422 return false; 423 424 // We cannot scalar repl this level of the array unless any array 425 // sub-indices are in-range constants. In particular, consider: 426 // A[0][i]. We cannot know that the user isn't doing invalid things like 427 // allowing i to index an out-of-range subscript that accesses A[1]. 428 // 429 // Scalar replacing *just* the outer index of the array is probably not 430 // going to be a win anyway, so just give up. 431 for (++GEPI; // Skip array index. 432 GEPI != E; 433 ++GEPI) { 434 uint64_t NumElements; 435 if (const ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI)) 436 NumElements = SubArrayTy->getNumElements(); 437 else if (const VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI)) 438 NumElements = SubVectorTy->getNumElements(); 439 else { 440 assert((*GEPI)->isStructTy() && 441 "Indexed GEP type is not array, vector, or struct!"); 442 continue; 443 } 444 445 ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand()); 446 if (!IdxVal || IdxVal->getZExtValue() >= NumElements) 447 return false; 448 } 449 } 450 451 for (Value::use_iterator I = U->use_begin(), E = U->use_end(); I != E; ++I) 452 if (!isSafeSROAElementUse(*I)) 453 return false; 454 return true; 455} 456 457/// GlobalUsersSafeToSRA - Look at all uses of the global and decide whether it 458/// is safe for us to perform this transformation. 459/// 460static bool GlobalUsersSafeToSRA(GlobalValue *GV) { 461 for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); 462 UI != E; ++UI) { 463 if (!IsUserOfGlobalSafeForSRA(*UI, GV)) 464 return false; 465 } 466 return true; 467} 468 469 470/// SRAGlobal - Perform scalar replacement of aggregates on the specified global 471/// variable. This opens the door for other optimizations by exposing the 472/// behavior of the program in a more fine-grained way. We have determined that 473/// this transformation is safe already. We return the first global variable we 474/// insert so that the caller can reprocess it. 475static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { 476 // Make sure this global only has simple uses that we can SRA. 477 if (!GlobalUsersSafeToSRA(GV)) 478 return 0; 479 480 assert(GV->hasLocalLinkage() && !GV->isConstant()); 481 Constant *Init = GV->getInitializer(); 482 const Type *Ty = Init->getType(); 483 484 std::vector<GlobalVariable*> NewGlobals; 485 Module::GlobalListType &Globals = GV->getParent()->getGlobalList(); 486 487 // Get the alignment of the global, either explicit or target-specific. 488 unsigned StartAlignment = GV->getAlignment(); 489 if (StartAlignment == 0) 490 StartAlignment = TD.getABITypeAlignment(GV->getType()); 491 492 if (const StructType *STy = dyn_cast<StructType>(Ty)) { 493 NewGlobals.reserve(STy->getNumElements()); 494 const StructLayout &Layout = *TD.getStructLayout(STy); 495 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 496 Constant *In = getAggregateConstantElement(Init, 497 ConstantInt::get(Type::getInt32Ty(STy->getContext()), i)); 498 assert(In && "Couldn't get element of initializer?"); 499 GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false, 500 GlobalVariable::InternalLinkage, 501 In, GV->getName()+"."+Twine(i), 502 GV->isThreadLocal(), 503 GV->getType()->getAddressSpace()); 504 Globals.insert(GV, NGV); 505 NewGlobals.push_back(NGV); 506 507 // Calculate the known alignment of the field. If the original aggregate 508 // had 256 byte alignment for example, something might depend on that: 509 // propagate info to each field. 510 uint64_t FieldOffset = Layout.getElementOffset(i); 511 unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset); 512 if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i))) 513 NGV->setAlignment(NewAlign); 514 } 515 } else if (const SequentialType *STy = dyn_cast<SequentialType>(Ty)) { 516 unsigned NumElements = 0; 517 if (const ArrayType *ATy = dyn_cast<ArrayType>(STy)) 518 NumElements = ATy->getNumElements(); 519 else 520 NumElements = cast<VectorType>(STy)->getNumElements(); 521 522 if (NumElements > 16 && GV->hasNUsesOrMore(16)) 523 return 0; // It's not worth it. 524 NewGlobals.reserve(NumElements); 525 526 uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType()); 527 unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType()); 528 for (unsigned i = 0, e = NumElements; i != e; ++i) { 529 Constant *In = getAggregateConstantElement(Init, 530 ConstantInt::get(Type::getInt32Ty(Init->getContext()), i)); 531 assert(In && "Couldn't get element of initializer?"); 532 533 GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false, 534 GlobalVariable::InternalLinkage, 535 In, GV->getName()+"."+Twine(i), 536 GV->isThreadLocal(), 537 GV->getType()->getAddressSpace()); 538 Globals.insert(GV, NGV); 539 NewGlobals.push_back(NGV); 540 541 // Calculate the known alignment of the field. If the original aggregate 542 // had 256 byte alignment for example, something might depend on that: 543 // propagate info to each field. 544 unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i); 545 if (NewAlign > EltAlign) 546 NGV->setAlignment(NewAlign); 547 } 548 } 549 550 if (NewGlobals.empty()) 551 return 0; 552 553 DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV); 554 555 Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext())); 556 557 // Loop over all of the uses of the global, replacing the constantexpr geps, 558 // with smaller constantexpr geps or direct references. 559 while (!GV->use_empty()) { 560 User *GEP = GV->use_back(); 561 assert(((isa<ConstantExpr>(GEP) && 562 cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)|| 563 isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!"); 564 565 // Ignore the 1th operand, which has to be zero or else the program is quite 566 // broken (undefined). Get the 2nd operand, which is the structure or array 567 // index. 568 unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue(); 569 if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access. 570 571 Value *NewPtr = NewGlobals[Val]; 572 573 // Form a shorter GEP if needed. 574 if (GEP->getNumOperands() > 3) { 575 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) { 576 SmallVector<Constant*, 8> Idxs; 577 Idxs.push_back(NullInt); 578 for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i) 579 Idxs.push_back(CE->getOperand(i)); 580 NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), 581 &Idxs[0], Idxs.size()); 582 } else { 583 GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP); 584 SmallVector<Value*, 8> Idxs; 585 Idxs.push_back(NullInt); 586 for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i) 587 Idxs.push_back(GEPI->getOperand(i)); 588 NewPtr = GetElementPtrInst::Create(NewPtr, Idxs.begin(), Idxs.end(), 589 GEPI->getName()+"."+Twine(Val),GEPI); 590 } 591 } 592 GEP->replaceAllUsesWith(NewPtr); 593 594 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP)) 595 GEPI->eraseFromParent(); 596 else 597 cast<ConstantExpr>(GEP)->destroyConstant(); 598 } 599 600 // Delete the old global, now that it is dead. 601 Globals.erase(GV); 602 ++NumSRA; 603 604 // Loop over the new globals array deleting any globals that are obviously 605 // dead. This can arise due to scalarization of a structure or an array that 606 // has elements that are dead. 607 unsigned FirstGlobal = 0; 608 for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i) 609 if (NewGlobals[i]->use_empty()) { 610 Globals.erase(NewGlobals[i]); 611 if (FirstGlobal == i) ++FirstGlobal; 612 } 613 614 return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : 0; 615} 616 617/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified 618/// value will trap if the value is dynamically null. PHIs keeps track of any 619/// phi nodes we've seen to avoid reprocessing them. 620static bool AllUsesOfValueWillTrapIfNull(const Value *V, 621 SmallPtrSet<const PHINode*, 8> &PHIs) { 622 for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; 623 ++UI) { 624 const User *U = *UI; 625 626 if (isa<LoadInst>(U)) { 627 // Will trap. 628 } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { 629 if (SI->getOperand(0) == V) { 630 //cerr << "NONTRAPPING USE: " << *U; 631 return false; // Storing the value. 632 } 633 } else if (const CallInst *CI = dyn_cast<CallInst>(U)) { 634 if (CI->getCalledValue() != V) { 635 //cerr << "NONTRAPPING USE: " << *U; 636 return false; // Not calling the ptr 637 } 638 } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) { 639 if (II->getCalledValue() != V) { 640 //cerr << "NONTRAPPING USE: " << *U; 641 return false; // Not calling the ptr 642 } 643 } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) { 644 if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false; 645 } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { 646 if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false; 647 } else if (const PHINode *PN = dyn_cast<PHINode>(U)) { 648 // If we've already seen this phi node, ignore it, it has already been 649 // checked. 650 if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs)) 651 return false; 652 } else if (isa<ICmpInst>(U) && 653 isa<ConstantPointerNull>(UI->getOperand(1))) { 654 // Ignore icmp X, null 655 } else { 656 //cerr << "NONTRAPPING USE: " << *U; 657 return false; 658 } 659 } 660 return true; 661} 662 663/// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads 664/// from GV will trap if the loaded value is null. Note that this also permits 665/// comparisons of the loaded value against null, as a special case. 666static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) { 667 for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); 668 UI != E; ++UI) { 669 const User *U = *UI; 670 671 if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { 672 SmallPtrSet<const PHINode*, 8> PHIs; 673 if (!AllUsesOfValueWillTrapIfNull(LI, PHIs)) 674 return false; 675 } else if (isa<StoreInst>(U)) { 676 // Ignore stores to the global. 677 } else { 678 // We don't know or understand this user, bail out. 679 //cerr << "UNKNOWN USER OF GLOBAL!: " << *U; 680 return false; 681 } 682 } 683 return true; 684} 685 686static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { 687 bool Changed = false; 688 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) { 689 Instruction *I = cast<Instruction>(*UI++); 690 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 691 LI->setOperand(0, NewV); 692 Changed = true; 693 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 694 if (SI->getOperand(1) == V) { 695 SI->setOperand(1, NewV); 696 Changed = true; 697 } 698 } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) { 699 CallSite CS(I); 700 if (CS.getCalledValue() == V) { 701 // Calling through the pointer! Turn into a direct call, but be careful 702 // that the pointer is not also being passed as an argument. 703 CS.setCalledFunction(NewV); 704 Changed = true; 705 bool PassedAsArg = false; 706 for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) 707 if (CS.getArgument(i) == V) { 708 PassedAsArg = true; 709 CS.setArgument(i, NewV); 710 } 711 712 if (PassedAsArg) { 713 // Being passed as an argument also. Be careful to not invalidate UI! 714 UI = V->use_begin(); 715 } 716 } 717 } else if (CastInst *CI = dyn_cast<CastInst>(I)) { 718 Changed |= OptimizeAwayTrappingUsesOfValue(CI, 719 ConstantExpr::getCast(CI->getOpcode(), 720 NewV, CI->getType())); 721 if (CI->use_empty()) { 722 Changed = true; 723 CI->eraseFromParent(); 724 } 725 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { 726 // Should handle GEP here. 727 SmallVector<Constant*, 8> Idxs; 728 Idxs.reserve(GEPI->getNumOperands()-1); 729 for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end(); 730 i != e; ++i) 731 if (Constant *C = dyn_cast<Constant>(*i)) 732 Idxs.push_back(C); 733 else 734 break; 735 if (Idxs.size() == GEPI->getNumOperands()-1) 736 Changed |= OptimizeAwayTrappingUsesOfValue(GEPI, 737 ConstantExpr::getGetElementPtr(NewV, &Idxs[0], 738 Idxs.size())); 739 if (GEPI->use_empty()) { 740 Changed = true; 741 GEPI->eraseFromParent(); 742 } 743 } 744 } 745 746 return Changed; 747} 748 749 750/// OptimizeAwayTrappingUsesOfLoads - The specified global has only one non-null 751/// value stored into it. If there are uses of the loaded value that would trap 752/// if the loaded value is dynamically null, then we know that they cannot be 753/// reachable with a null optimize away the load. 754static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { 755 bool Changed = false; 756 757 // Keep track of whether we are able to remove all the uses of the global 758 // other than the store that defines it. 759 bool AllNonStoreUsesGone = true; 760 761 // Replace all uses of loads with uses of uses of the stored value. 762 for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){ 763 User *GlobalUser = *GUI++; 764 if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) { 765 Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV); 766 // If we were able to delete all uses of the loads 767 if (LI->use_empty()) { 768 LI->eraseFromParent(); 769 Changed = true; 770 } else { 771 AllNonStoreUsesGone = false; 772 } 773 } else if (isa<StoreInst>(GlobalUser)) { 774 // Ignore the store that stores "LV" to the global. 775 assert(GlobalUser->getOperand(1) == GV && 776 "Must be storing *to* the global"); 777 } else { 778 AllNonStoreUsesGone = false; 779 780 // If we get here we could have other crazy uses that are transitively 781 // loaded. 782 assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) || 783 isa<ConstantExpr>(GlobalUser)) && "Only expect load and stores!"); 784 } 785 } 786 787 if (Changed) { 788 DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV); 789 ++NumGlobUses; 790 } 791 792 // If we nuked all of the loads, then none of the stores are needed either, 793 // nor is the global. 794 if (AllNonStoreUsesGone) { 795 DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n"); 796 CleanupConstantGlobalUsers(GV, 0); 797 if (GV->use_empty()) { 798 GV->eraseFromParent(); 799 ++NumDeleted; 800 } 801 Changed = true; 802 } 803 return Changed; 804} 805 806/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the 807/// instructions that are foldable. 808static void ConstantPropUsersOf(Value *V) { 809 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) 810 if (Instruction *I = dyn_cast<Instruction>(*UI++)) 811 if (Constant *NewC = ConstantFoldInstruction(I)) { 812 I->replaceAllUsesWith(NewC); 813 814 // Advance UI to the next non-I use to avoid invalidating it! 815 // Instructions could multiply use V. 816 while (UI != E && *UI == I) 817 ++UI; 818 I->eraseFromParent(); 819 } 820} 821 822/// OptimizeGlobalAddressOfMalloc - This function takes the specified global 823/// variable, and transforms the program as if it always contained the result of 824/// the specified malloc. Because it is always the result of the specified 825/// malloc, there is no reason to actually DO the malloc. Instead, turn the 826/// malloc into a global, and any loads of GV as uses of the new global. 827static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, 828 CallInst *CI, 829 const Type *AllocTy, 830 ConstantInt *NElements, 831 TargetData* TD) { 832 DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); 833 834 const Type *GlobalType; 835 if (NElements->getZExtValue() == 1) 836 GlobalType = AllocTy; 837 else 838 // If we have an array allocation, the global variable is of an array. 839 GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue()); 840 841 // Create the new global variable. The contents of the malloc'd memory is 842 // undefined, so initialize with an undef value. 843 GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), 844 GlobalType, false, 845 GlobalValue::InternalLinkage, 846 UndefValue::get(GlobalType), 847 GV->getName()+".body", 848 GV, 849 GV->isThreadLocal()); 850 851 // If there are bitcast users of the malloc (which is typical, usually we have 852 // a malloc + bitcast) then replace them with uses of the new global. Update 853 // other users to use the global as well. 854 BitCastInst *TheBC = 0; 855 while (!CI->use_empty()) { 856 Instruction *User = cast<Instruction>(CI->use_back()); 857 if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) { 858 if (BCI->getType() == NewGV->getType()) { 859 BCI->replaceAllUsesWith(NewGV); 860 BCI->eraseFromParent(); 861 } else { 862 BCI->setOperand(0, NewGV); 863 } 864 } else { 865 if (TheBC == 0) 866 TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI); 867 User->replaceUsesOfWith(CI, TheBC); 868 } 869 } 870 871 Constant *RepValue = NewGV; 872 if (NewGV->getType() != GV->getType()->getElementType()) 873 RepValue = ConstantExpr::getBitCast(RepValue, 874 GV->getType()->getElementType()); 875 876 // If there is a comparison against null, we will insert a global bool to 877 // keep track of whether the global was initialized yet or not. 878 GlobalVariable *InitBool = 879 new GlobalVariable(Type::getInt1Ty(GV->getContext()), false, 880 GlobalValue::InternalLinkage, 881 ConstantInt::getFalse(GV->getContext()), 882 GV->getName()+".init", GV->isThreadLocal()); 883 bool InitBoolUsed = false; 884 885 // Loop over all uses of GV, processing them in turn. 886 while (!GV->use_empty()) { 887 if (StoreInst *SI = dyn_cast<StoreInst>(GV->use_back())) { 888 // The global is initialized when the store to it occurs. 889 new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, SI); 890 SI->eraseFromParent(); 891 continue; 892 } 893 894 LoadInst *LI = cast<LoadInst>(GV->use_back()); 895 while (!LI->use_empty()) { 896 Use &LoadUse = LI->use_begin().getUse(); 897 if (!isa<ICmpInst>(LoadUse.getUser())) { 898 LoadUse = RepValue; 899 continue; 900 } 901 902 ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser()); 903 // Replace the cmp X, 0 with a use of the bool value. 904 Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI); 905 InitBoolUsed = true; 906 switch (ICI->getPredicate()) { 907 default: llvm_unreachable("Unknown ICmp Predicate!"); 908 case ICmpInst::ICMP_ULT: 909 case ICmpInst::ICMP_SLT: // X < null -> always false 910 LV = ConstantInt::getFalse(GV->getContext()); 911 break; 912 case ICmpInst::ICMP_ULE: 913 case ICmpInst::ICMP_SLE: 914 case ICmpInst::ICMP_EQ: 915 LV = BinaryOperator::CreateNot(LV, "notinit", ICI); 916 break; 917 case ICmpInst::ICMP_NE: 918 case ICmpInst::ICMP_UGE: 919 case ICmpInst::ICMP_SGE: 920 case ICmpInst::ICMP_UGT: 921 case ICmpInst::ICMP_SGT: 922 break; // no change. 923 } 924 ICI->replaceAllUsesWith(LV); 925 ICI->eraseFromParent(); 926 } 927 LI->eraseFromParent(); 928 } 929 930 // If the initialization boolean was used, insert it, otherwise delete it. 931 if (!InitBoolUsed) { 932 while (!InitBool->use_empty()) // Delete initializations 933 cast<StoreInst>(InitBool->use_back())->eraseFromParent(); 934 delete InitBool; 935 } else 936 GV->getParent()->getGlobalList().insert(GV, InitBool); 937 938 // Now the GV is dead, nuke it and the malloc.. 939 GV->eraseFromParent(); 940 CI->eraseFromParent(); 941 942 // To further other optimizations, loop over all users of NewGV and try to 943 // constant prop them. This will promote GEP instructions with constant 944 // indices into GEP constant-exprs, which will allow global-opt to hack on it. 945 ConstantPropUsersOf(NewGV); 946 if (RepValue != NewGV) 947 ConstantPropUsersOf(RepValue); 948 949 return NewGV; 950} 951 952/// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking 953/// to make sure that there are no complex uses of V. We permit simple things 954/// like dereferencing the pointer, but not storing through the address, unless 955/// it is to the specified global. 956static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, 957 const GlobalVariable *GV, 958 SmallPtrSet<const PHINode*, 8> &PHIs) { 959 for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); 960 UI != E; ++UI) { 961 const Instruction *Inst = cast<Instruction>(*UI); 962 963 if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) { 964 continue; // Fine, ignore. 965 } 966 967 if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 968 if (SI->getOperand(0) == V && SI->getOperand(1) != GV) 969 return false; // Storing the pointer itself... bad. 970 continue; // Otherwise, storing through it, or storing into GV... fine. 971 } 972 973 // Must index into the array and into the struct. 974 if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) { 975 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs)) 976 return false; 977 continue; 978 } 979 980 if (const PHINode *PN = dyn_cast<PHINode>(Inst)) { 981 // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI 982 // cycles. 983 if (PHIs.insert(PN)) 984 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs)) 985 return false; 986 continue; 987 } 988 989 if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) { 990 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) 991 return false; 992 continue; 993 } 994 995 return false; 996 } 997 return true; 998} 999 1000/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV 1001/// somewhere. Transform all uses of the allocation into loads from the 1002/// global and uses of the resultant pointer. Further, delete the store into 1003/// GV. This assumes that these value pass the 1004/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate. 1005static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, 1006 GlobalVariable *GV) { 1007 while (!Alloc->use_empty()) { 1008 Instruction *U = cast<Instruction>(*Alloc->use_begin()); 1009 Instruction *InsertPt = U; 1010 if (StoreInst *SI = dyn_cast<StoreInst>(U)) { 1011 // If this is the store of the allocation into the global, remove it. 1012 if (SI->getOperand(1) == GV) { 1013 SI->eraseFromParent(); 1014 continue; 1015 } 1016 } else if (PHINode *PN = dyn_cast<PHINode>(U)) { 1017 // Insert the load in the corresponding predecessor, not right before the 1018 // PHI. 1019 InsertPt = PN->getIncomingBlock(Alloc->use_begin())->getTerminator(); 1020 } else if (isa<BitCastInst>(U)) { 1021 // Must be bitcast between the malloc and store to initialize the global. 1022 ReplaceUsesOfMallocWithGlobal(U, GV); 1023 U->eraseFromParent(); 1024 continue; 1025 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { 1026 // If this is a "GEP bitcast" and the user is a store to the global, then 1027 // just process it as a bitcast. 1028 if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse()) 1029 if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->use_back())) 1030 if (SI->getOperand(1) == GV) { 1031 // Must be bitcast GEP between the malloc and store to initialize 1032 // the global. 1033 ReplaceUsesOfMallocWithGlobal(GEPI, GV); 1034 GEPI->eraseFromParent(); 1035 continue; 1036 } 1037 } 1038 1039 // Insert a load from the global, and use it instead of the malloc. 1040 Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt); 1041 U->replaceUsesOfWith(Alloc, NL); 1042 } 1043} 1044 1045/// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi 1046/// of a load) are simple enough to perform heap SRA on. This permits GEP's 1047/// that index through the array and struct field, icmps of null, and PHIs. 1048static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, 1049 SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs, 1050 SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) { 1051 // We permit two users of the load: setcc comparing against the null 1052 // pointer, and a getelementptr of a specific form. 1053 for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; 1054 ++UI) { 1055 const Instruction *User = cast<Instruction>(*UI); 1056 1057 // Comparison against null is ok. 1058 if (const ICmpInst *ICI = dyn_cast<ICmpInst>(User)) { 1059 if (!isa<ConstantPointerNull>(ICI->getOperand(1))) 1060 return false; 1061 continue; 1062 } 1063 1064 // getelementptr is also ok, but only a simple form. 1065 if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { 1066 // Must index into the array and into the struct. 1067 if (GEPI->getNumOperands() < 3) 1068 return false; 1069 1070 // Otherwise the GEP is ok. 1071 continue; 1072 } 1073 1074 if (const PHINode *PN = dyn_cast<PHINode>(User)) { 1075 if (!LoadUsingPHIsPerLoad.insert(PN)) 1076 // This means some phi nodes are dependent on each other. 1077 // Avoid infinite looping! 1078 return false; 1079 if (!LoadUsingPHIs.insert(PN)) 1080 // If we have already analyzed this PHI, then it is safe. 1081 continue; 1082 1083 // Make sure all uses of the PHI are simple enough to transform. 1084 if (!LoadUsesSimpleEnoughForHeapSRA(PN, 1085 LoadUsingPHIs, LoadUsingPHIsPerLoad)) 1086 return false; 1087 1088 continue; 1089 } 1090 1091 // Otherwise we don't know what this is, not ok. 1092 return false; 1093 } 1094 1095 return true; 1096} 1097 1098 1099/// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from 1100/// GV are simple enough to perform HeapSRA, return true. 1101static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV, 1102 Instruction *StoredVal) { 1103 SmallPtrSet<const PHINode*, 32> LoadUsingPHIs; 1104 SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad; 1105 for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); 1106 UI != E; ++UI) 1107 if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) { 1108 if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs, 1109 LoadUsingPHIsPerLoad)) 1110 return false; 1111 LoadUsingPHIsPerLoad.clear(); 1112 } 1113 1114 // If we reach here, we know that all uses of the loads and transitive uses 1115 // (through PHI nodes) are simple enough to transform. However, we don't know 1116 // that all inputs the to the PHI nodes are in the same equivalence sets. 1117 // Check to verify that all operands of the PHIs are either PHIS that can be 1118 // transformed, loads from GV, or MI itself. 1119 for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin() 1120 , E = LoadUsingPHIs.end(); I != E; ++I) { 1121 const PHINode *PN = *I; 1122 for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) { 1123 Value *InVal = PN->getIncomingValue(op); 1124 1125 // PHI of the stored value itself is ok. 1126 if (InVal == StoredVal) continue; 1127 1128 if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) { 1129 // One of the PHIs in our set is (optimistically) ok. 1130 if (LoadUsingPHIs.count(InPN)) 1131 continue; 1132 return false; 1133 } 1134 1135 // Load from GV is ok. 1136 if (const LoadInst *LI = dyn_cast<LoadInst>(InVal)) 1137 if (LI->getOperand(0) == GV) 1138 continue; 1139 1140 // UNDEF? NULL? 1141 1142 // Anything else is rejected. 1143 return false; 1144 } 1145 } 1146 1147 return true; 1148} 1149 1150static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, 1151 DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, 1152 std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { 1153 std::vector<Value*> &FieldVals = InsertedScalarizedValues[V]; 1154 1155 if (FieldNo >= FieldVals.size()) 1156 FieldVals.resize(FieldNo+1); 1157 1158 // If we already have this value, just reuse the previously scalarized 1159 // version. 1160 if (Value *FieldVal = FieldVals[FieldNo]) 1161 return FieldVal; 1162 1163 // Depending on what instruction this is, we have several cases. 1164 Value *Result; 1165 if (LoadInst *LI = dyn_cast<LoadInst>(V)) { 1166 // This is a scalarized version of the load from the global. Just create 1167 // a new Load of the scalarized global. 1168 Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo, 1169 InsertedScalarizedValues, 1170 PHIsToRewrite), 1171 LI->getName()+".f"+Twine(FieldNo), LI); 1172 } else if (PHINode *PN = dyn_cast<PHINode>(V)) { 1173 // PN's type is pointer to struct. Make a new PHI of pointer to struct 1174 // field. 1175 const StructType *ST = 1176 cast<StructType>(cast<PointerType>(PN->getType())->getElementType()); 1177 1178 Result = 1179 PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)), 1180 PN->getName()+".f"+Twine(FieldNo), PN); 1181 PHIsToRewrite.push_back(std::make_pair(PN, FieldNo)); 1182 } else { 1183 llvm_unreachable("Unknown usable value"); 1184 Result = 0; 1185 } 1186 1187 return FieldVals[FieldNo] = Result; 1188} 1189 1190/// RewriteHeapSROALoadUser - Given a load instruction and a value derived from 1191/// the load, rewrite the derived value to use the HeapSRoA'd load. 1192static void RewriteHeapSROALoadUser(Instruction *LoadUser, 1193 DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, 1194 std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { 1195 // If this is a comparison against null, handle it. 1196 if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) { 1197 assert(isa<ConstantPointerNull>(SCI->getOperand(1))); 1198 // If we have a setcc of the loaded pointer, we can use a setcc of any 1199 // field. 1200 Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0, 1201 InsertedScalarizedValues, PHIsToRewrite); 1202 1203 Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr, 1204 Constant::getNullValue(NPtr->getType()), 1205 SCI->getName()); 1206 SCI->replaceAllUsesWith(New); 1207 SCI->eraseFromParent(); 1208 return; 1209 } 1210 1211 // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...' 1212 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) { 1213 assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2)) 1214 && "Unexpected GEPI!"); 1215 1216 // Load the pointer for this field. 1217 unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue(); 1218 Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo, 1219 InsertedScalarizedValues, PHIsToRewrite); 1220 1221 // Create the new GEP idx vector. 1222 SmallVector<Value*, 8> GEPIdx; 1223 GEPIdx.push_back(GEPI->getOperand(1)); 1224 GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end()); 1225 1226 Value *NGEPI = GetElementPtrInst::Create(NewPtr, 1227 GEPIdx.begin(), GEPIdx.end(), 1228 GEPI->getName(), GEPI); 1229 GEPI->replaceAllUsesWith(NGEPI); 1230 GEPI->eraseFromParent(); 1231 return; 1232 } 1233 1234 // Recursively transform the users of PHI nodes. This will lazily create the 1235 // PHIs that are needed for individual elements. Keep track of what PHIs we 1236 // see in InsertedScalarizedValues so that we don't get infinite loops (very 1237 // antisocial). If the PHI is already in InsertedScalarizedValues, it has 1238 // already been seen first by another load, so its uses have already been 1239 // processed. 1240 PHINode *PN = cast<PHINode>(LoadUser); 1241 bool Inserted; 1242 DenseMap<Value*, std::vector<Value*> >::iterator InsertPos; 1243 tie(InsertPos, Inserted) = 1244 InsertedScalarizedValues.insert(std::make_pair(PN, std::vector<Value*>())); 1245 if (!Inserted) return; 1246 1247 // If this is the first time we've seen this PHI, recursively process all 1248 // users. 1249 for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) { 1250 Instruction *User = cast<Instruction>(*UI++); 1251 RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); 1252 } 1253} 1254 1255/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global. Ptr 1256/// is a value loaded from the global. Eliminate all uses of Ptr, making them 1257/// use FieldGlobals instead. All uses of loaded values satisfy 1258/// AllGlobalLoadUsesSimpleEnoughForHeapSRA. 1259static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, 1260 DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, 1261 std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { 1262 for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end(); 1263 UI != E; ) { 1264 Instruction *User = cast<Instruction>(*UI++); 1265 RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); 1266 } 1267 1268 if (Load->use_empty()) { 1269 Load->eraseFromParent(); 1270 InsertedScalarizedValues.erase(Load); 1271 } 1272} 1273 1274/// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break 1275/// it up into multiple allocations of arrays of the fields. 1276static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, 1277 Value* NElems, TargetData *TD) { 1278 DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n'); 1279 const Type* MAT = getMallocAllocatedType(CI); 1280 const StructType *STy = cast<StructType>(MAT); 1281 1282 // There is guaranteed to be at least one use of the malloc (storing 1283 // it into GV). If there are other uses, change them to be uses of 1284 // the global to simplify later code. This also deletes the store 1285 // into GV. 1286 ReplaceUsesOfMallocWithGlobal(CI, GV); 1287 1288 // Okay, at this point, there are no users of the malloc. Insert N 1289 // new mallocs at the same place as CI, and N globals. 1290 std::vector<Value*> FieldGlobals; 1291 std::vector<Value*> FieldMallocs; 1292 1293 for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ 1294 const Type *FieldTy = STy->getElementType(FieldNo); 1295 const PointerType *PFieldTy = PointerType::getUnqual(FieldTy); 1296 1297 GlobalVariable *NGV = 1298 new GlobalVariable(*GV->getParent(), 1299 PFieldTy, false, GlobalValue::InternalLinkage, 1300 Constant::getNullValue(PFieldTy), 1301 GV->getName() + ".f" + Twine(FieldNo), GV, 1302 GV->isThreadLocal()); 1303 FieldGlobals.push_back(NGV); 1304 1305 unsigned TypeSize = TD->getTypeAllocSize(FieldTy); 1306 if (const StructType *ST = dyn_cast<StructType>(FieldTy)) 1307 TypeSize = TD->getStructLayout(ST)->getSizeInBytes(); 1308 const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); 1309 Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, 1310 ConstantInt::get(IntPtrTy, TypeSize), 1311 NElems, 0, 1312 CI->getName() + ".f" + Twine(FieldNo)); 1313 FieldMallocs.push_back(NMI); 1314 new StoreInst(NMI, NGV, CI); 1315 } 1316 1317 // The tricky aspect of this transformation is handling the case when malloc 1318 // fails. In the original code, malloc failing would set the result pointer 1319 // of malloc to null. In this case, some mallocs could succeed and others 1320 // could fail. As such, we emit code that looks like this: 1321 // F0 = malloc(field0) 1322 // F1 = malloc(field1) 1323 // F2 = malloc(field2) 1324 // if (F0 == 0 || F1 == 0 || F2 == 0) { 1325 // if (F0) { free(F0); F0 = 0; } 1326 // if (F1) { free(F1); F1 = 0; } 1327 // if (F2) { free(F2); F2 = 0; } 1328 // } 1329 // The malloc can also fail if its argument is too large. 1330 Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0); 1331 Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0), 1332 ConstantZero, "isneg"); 1333 for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { 1334 Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i], 1335 Constant::getNullValue(FieldMallocs[i]->getType()), 1336 "isnull"); 1337 RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI); 1338 } 1339 1340 // Split the basic block at the old malloc. 1341 BasicBlock *OrigBB = CI->getParent(); 1342 BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont"); 1343 1344 // Create the block to check the first condition. Put all these blocks at the 1345 // end of the function as they are unlikely to be executed. 1346 BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(), 1347 "malloc_ret_null", 1348 OrigBB->getParent()); 1349 1350 // Remove the uncond branch from OrigBB to ContBB, turning it into a cond 1351 // branch on RunningOr. 1352 OrigBB->getTerminator()->eraseFromParent(); 1353 BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB); 1354 1355 // Within the NullPtrBlock, we need to emit a comparison and branch for each 1356 // pointer, because some may be null while others are not. 1357 for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { 1358 Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); 1359 Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, 1360 Constant::getNullValue(GVVal->getType()), 1361 "tmp"); 1362 BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it", 1363 OrigBB->getParent()); 1364 BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next", 1365 OrigBB->getParent()); 1366 Instruction *BI = BranchInst::Create(FreeBlock, NextBlock, 1367 Cmp, NullPtrBlock); 1368 1369 // Fill in FreeBlock. 1370 CallInst::CreateFree(GVVal, BI); 1371 new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i], 1372 FreeBlock); 1373 BranchInst::Create(NextBlock, FreeBlock); 1374 1375 NullPtrBlock = NextBlock; 1376 } 1377 1378 BranchInst::Create(ContBB, NullPtrBlock); 1379 1380 // CI is no longer needed, remove it. 1381 CI->eraseFromParent(); 1382 1383 /// InsertedScalarizedLoads - As we process loads, if we can't immediately 1384 /// update all uses of the load, keep track of what scalarized loads are 1385 /// inserted for a given load. 1386 DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues; 1387 InsertedScalarizedValues[GV] = FieldGlobals; 1388 1389 std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite; 1390 1391 // Okay, the malloc site is completely handled. All of the uses of GV are now 1392 // loads, and all uses of those loads are simple. Rewrite them to use loads 1393 // of the per-field globals instead. 1394 for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) { 1395 Instruction *User = cast<Instruction>(*UI++); 1396 1397 if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 1398 RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite); 1399 continue; 1400 } 1401 1402 // Must be a store of null. 1403 StoreInst *SI = cast<StoreInst>(User); 1404 assert(isa<ConstantPointerNull>(SI->getOperand(0)) && 1405 "Unexpected heap-sra user!"); 1406 1407 // Insert a store of null into each global. 1408 for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { 1409 const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType()); 1410 Constant *Null = Constant::getNullValue(PT->getElementType()); 1411 new StoreInst(Null, FieldGlobals[i], SI); 1412 } 1413 // Erase the original store. 1414 SI->eraseFromParent(); 1415 } 1416 1417 // While we have PHIs that are interesting to rewrite, do it. 1418 while (!PHIsToRewrite.empty()) { 1419 PHINode *PN = PHIsToRewrite.back().first; 1420 unsigned FieldNo = PHIsToRewrite.back().second; 1421 PHIsToRewrite.pop_back(); 1422 PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]); 1423 assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi"); 1424 1425 // Add all the incoming values. This can materialize more phis. 1426 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 1427 Value *InVal = PN->getIncomingValue(i); 1428 InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues, 1429 PHIsToRewrite); 1430 FieldPN->addIncoming(InVal, PN->getIncomingBlock(i)); 1431 } 1432 } 1433 1434 // Drop all inter-phi links and any loads that made it this far. 1435 for (DenseMap<Value*, std::vector<Value*> >::iterator 1436 I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); 1437 I != E; ++I) { 1438 if (PHINode *PN = dyn_cast<PHINode>(I->first)) 1439 PN->dropAllReferences(); 1440 else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) 1441 LI->dropAllReferences(); 1442 } 1443 1444 // Delete all the phis and loads now that inter-references are dead. 1445 for (DenseMap<Value*, std::vector<Value*> >::iterator 1446 I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); 1447 I != E; ++I) { 1448 if (PHINode *PN = dyn_cast<PHINode>(I->first)) 1449 PN->eraseFromParent(); 1450 else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) 1451 LI->eraseFromParent(); 1452 } 1453 1454 // The old global is now dead, remove it. 1455 GV->eraseFromParent(); 1456 1457 ++NumHeapSRA; 1458 return cast<GlobalVariable>(FieldGlobals[0]); 1459} 1460 1461/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a 1462/// pointer global variable with a single value stored it that is a malloc or 1463/// cast of malloc. 1464static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, 1465 CallInst *CI, 1466 const Type *AllocTy, 1467 Module::global_iterator &GVI, 1468 TargetData *TD) { 1469 if (!TD) 1470 return false; 1471 1472 // If this is a malloc of an abstract type, don't touch it. 1473 if (!AllocTy->isSized()) 1474 return false; 1475 1476 // We can't optimize this global unless all uses of it are *known* to be 1477 // of the malloc value, not of the null initializer value (consider a use 1478 // that compares the global's value against zero to see if the malloc has 1479 // been reached). To do this, we check to see if all uses of the global 1480 // would trap if the global were null: this proves that they must all 1481 // happen after the malloc. 1482 if (!AllUsesOfLoadedValueWillTrapIfNull(GV)) 1483 return false; 1484 1485 // We can't optimize this if the malloc itself is used in a complex way, 1486 // for example, being stored into multiple globals. This allows the 1487 // malloc to be stored into the specified global, loaded setcc'd, and 1488 // GEP'd. These are all things we could transform to using the global 1489 // for. 1490 SmallPtrSet<const PHINode*, 8> PHIs; 1491 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs)) 1492 return false; 1493 1494 // If we have a global that is only initialized with a fixed size malloc, 1495 // transform the program to use global memory instead of malloc'd memory. 1496 // This eliminates dynamic allocation, avoids an indirection accessing the 1497 // data, and exposes the resultant global to further GlobalOpt. 1498 // We cannot optimize the malloc if we cannot determine malloc array size. 1499 Value *NElems = getMallocArraySize(CI, TD, true); 1500 if (!NElems) 1501 return false; 1502 1503 if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems)) 1504 // Restrict this transformation to only working on small allocations 1505 // (2048 bytes currently), as we don't want to introduce a 16M global or 1506 // something. 1507 if (NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { 1508 GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD); 1509 return true; 1510 } 1511 1512 // If the allocation is an array of structures, consider transforming this 1513 // into multiple malloc'd arrays, one for each field. This is basically 1514 // SRoA for malloc'd memory. 1515 1516 // If this is an allocation of a fixed size array of structs, analyze as a 1517 // variable size array. malloc [100 x struct],1 -> malloc struct, 100 1518 if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1)) 1519 if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) 1520 AllocTy = AT->getElementType(); 1521 1522 const StructType *AllocSTy = dyn_cast<StructType>(AllocTy); 1523 if (!AllocSTy) 1524 return false; 1525 1526 // This the structure has an unreasonable number of fields, leave it 1527 // alone. 1528 if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && 1529 AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) { 1530 1531 // If this is a fixed size array, transform the Malloc to be an alloc of 1532 // structs. malloc [100 x struct],1 -> malloc struct, 100 1533 if (const ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { 1534 const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); 1535 unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes(); 1536 Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); 1537 Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); 1538 Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, 1539 AllocSize, NumElements, 1540 0, CI->getName()); 1541 Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); 1542 CI->replaceAllUsesWith(Cast); 1543 CI->eraseFromParent(); 1544 CI = dyn_cast<BitCastInst>(Malloc) ? 1545 extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc); 1546 } 1547 1548 GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD); 1549 return true; 1550 } 1551 1552 return false; 1553} 1554 1555// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge 1556// that only one value (besides its initializer) is ever stored to the global. 1557static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, 1558 Module::global_iterator &GVI, 1559 TargetData *TD) { 1560 // Ignore no-op GEPs and bitcasts. 1561 StoredOnceVal = StoredOnceVal->stripPointerCasts(); 1562 1563 // If we are dealing with a pointer global that is initialized to null and 1564 // only has one (non-null) value stored into it, then we can optimize any 1565 // users of the loaded value (often calls and loads) that would trap if the 1566 // value was null. 1567 if (GV->getInitializer()->getType()->isPointerTy() && 1568 GV->getInitializer()->isNullValue()) { 1569 if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) { 1570 if (GV->getInitializer()->getType() != SOVC->getType()) 1571 SOVC = 1572 ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); 1573 1574 // Optimize away any trapping uses of the loaded value. 1575 if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC)) 1576 return true; 1577 } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) { 1578 const Type* MallocType = getMallocAllocatedType(CI); 1579 if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, 1580 GVI, TD)) 1581 return true; 1582 } 1583 } 1584 1585 return false; 1586} 1587 1588/// TryToShrinkGlobalToBoolean - At this point, we have learned that the only 1589/// two values ever stored into GV are its initializer and OtherVal. See if we 1590/// can shrink the global into a boolean and select between the two values 1591/// whenever it is used. This exposes the values to other scalar optimizations. 1592static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { 1593 const Type *GVElType = GV->getType()->getElementType(); 1594 1595 // If GVElType is already i1, it is already shrunk. If the type of the GV is 1596 // an FP value, pointer or vector, don't do this optimization because a select 1597 // between them is very expensive and unlikely to lead to later 1598 // simplification. In these cases, we typically end up with "cond ? v1 : v2" 1599 // where v1 and v2 both require constant pool loads, a big loss. 1600 if (GVElType == Type::getInt1Ty(GV->getContext()) || 1601 GVElType->isFloatingPointTy() || 1602 GVElType->isPointerTy() || GVElType->isVectorTy()) 1603 return false; 1604 1605 // Walk the use list of the global seeing if all the uses are load or store. 1606 // If there is anything else, bail out. 1607 for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){ 1608 User *U = *I; 1609 if (!isa<LoadInst>(U) && !isa<StoreInst>(U)) 1610 return false; 1611 } 1612 1613 DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV); 1614 1615 // Create the new global, initializing it to false. 1616 GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()), 1617 false, 1618 GlobalValue::InternalLinkage, 1619 ConstantInt::getFalse(GV->getContext()), 1620 GV->getName()+".b", 1621 GV->isThreadLocal()); 1622 GV->getParent()->getGlobalList().insert(GV, NewGV); 1623 1624 Constant *InitVal = GV->getInitializer(); 1625 assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) && 1626 "No reason to shrink to bool!"); 1627 1628 // If initialized to zero and storing one into the global, we can use a cast 1629 // instead of a select to synthesize the desired value. 1630 bool IsOneZero = false; 1631 if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) 1632 IsOneZero = InitVal->isNullValue() && CI->isOne(); 1633 1634 while (!GV->use_empty()) { 1635 Instruction *UI = cast<Instruction>(GV->use_back()); 1636 if (StoreInst *SI = dyn_cast<StoreInst>(UI)) { 1637 // Change the store into a boolean store. 1638 bool StoringOther = SI->getOperand(0) == OtherVal; 1639 // Only do this if we weren't storing a loaded value. 1640 Value *StoreVal; 1641 if (StoringOther || SI->getOperand(0) == InitVal) 1642 StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()), 1643 StoringOther); 1644 else { 1645 // Otherwise, we are storing a previously loaded copy. To do this, 1646 // change the copy from copying the original value to just copying the 1647 // bool. 1648 Instruction *StoredVal = cast<Instruction>(SI->getOperand(0)); 1649 1650 // If we've already replaced the input, StoredVal will be a cast or 1651 // select instruction. If not, it will be a load of the original 1652 // global. 1653 if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) { 1654 assert(LI->getOperand(0) == GV && "Not a copy!"); 1655 // Insert a new load, to preserve the saved value. 1656 StoreVal = new LoadInst(NewGV, LI->getName()+".b", LI); 1657 } else { 1658 assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) && 1659 "This is not a form that we understand!"); 1660 StoreVal = StoredVal->getOperand(0); 1661 assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!"); 1662 } 1663 } 1664 new StoreInst(StoreVal, NewGV, SI); 1665 } else { 1666 // Change the load into a load of bool then a select. 1667 LoadInst *LI = cast<LoadInst>(UI); 1668 LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", LI); 1669 Value *NSI; 1670 if (IsOneZero) 1671 NSI = new ZExtInst(NLI, LI->getType(), "", LI); 1672 else 1673 NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI); 1674 NSI->takeName(LI); 1675 LI->replaceAllUsesWith(NSI); 1676 } 1677 UI->eraseFromParent(); 1678 } 1679 1680 GV->eraseFromParent(); 1681 return true; 1682} 1683 1684 1685/// ProcessInternalGlobal - Analyze the specified global variable and optimize 1686/// it if possible. If we make a change, return true. 1687bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, 1688 Module::global_iterator &GVI) { 1689 SmallPtrSet<const PHINode*, 16> PHIUsers; 1690 GlobalStatus GS; 1691 GV->removeDeadConstantUsers(); 1692 1693 if (GV->use_empty()) { 1694 DEBUG(dbgs() << "GLOBAL DEAD: " << *GV); 1695 GV->eraseFromParent(); 1696 ++NumDeleted; 1697 return true; 1698 } 1699 1700 if (!AnalyzeGlobal(GV, GS, PHIUsers)) { 1701#if 0 1702 DEBUG(dbgs() << "Global: " << *GV); 1703 DEBUG(dbgs() << " isLoaded = " << GS.isLoaded << "\n"); 1704 DEBUG(dbgs() << " StoredType = "); 1705 switch (GS.StoredType) { 1706 case GlobalStatus::NotStored: DEBUG(dbgs() << "NEVER STORED\n"); break; 1707 case GlobalStatus::isInitializerStored: DEBUG(dbgs() << "INIT STORED\n"); 1708 break; 1709 case GlobalStatus::isStoredOnce: DEBUG(dbgs() << "STORED ONCE\n"); break; 1710 case GlobalStatus::isStored: DEBUG(dbgs() << "stored\n"); break; 1711 } 1712 if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue) 1713 DEBUG(dbgs() << " StoredOnceValue = " << *GS.StoredOnceValue << "\n"); 1714 if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions) 1715 DEBUG(dbgs() << " AccessingFunction = " 1716 << GS.AccessingFunction->getName() << "\n"); 1717 DEBUG(dbgs() << " HasMultipleAccessingFunctions = " 1718 << GS.HasMultipleAccessingFunctions << "\n"); 1719 DEBUG(dbgs() << " HasNonInstructionUser = " 1720 << GS.HasNonInstructionUser<<"\n"); 1721 DEBUG(dbgs() << "\n"); 1722#endif 1723 1724 // If this is a first class global and has only one accessing function 1725 // and this function is main (which we know is not recursive we can make 1726 // this global a local variable) we replace the global with a local alloca 1727 // in this function. 1728 // 1729 // NOTE: It doesn't make sense to promote non single-value types since we 1730 // are just replacing static memory to stack memory. 1731 // 1732 // If the global is in different address space, don't bring it to stack. 1733 if (!GS.HasMultipleAccessingFunctions && 1734 GS.AccessingFunction && !GS.HasNonInstructionUser && 1735 GV->getType()->getElementType()->isSingleValueType() && 1736 GS.AccessingFunction->getName() == "main" && 1737 GS.AccessingFunction->hasExternalLinkage() && 1738 GV->getType()->getAddressSpace() == 0) { 1739 DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV); 1740 Instruction& FirstI = const_cast<Instruction&>(*GS.AccessingFunction 1741 ->getEntryBlock().begin()); 1742 const Type* ElemTy = GV->getType()->getElementType(); 1743 // FIXME: Pass Global's alignment when globals have alignment 1744 AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI); 1745 if (!isa<UndefValue>(GV->getInitializer())) 1746 new StoreInst(GV->getInitializer(), Alloca, &FirstI); 1747 1748 GV->replaceAllUsesWith(Alloca); 1749 GV->eraseFromParent(); 1750 ++NumLocalized; 1751 return true; 1752 } 1753 1754 // If the global is never loaded (but may be stored to), it is dead. 1755 // Delete it now. 1756 if (!GS.isLoaded) { 1757 DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV); 1758 1759 // Delete any stores we can find to the global. We may not be able to 1760 // make it completely dead though. 1761 bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer()); 1762 1763 // If the global is dead now, delete it. 1764 if (GV->use_empty()) { 1765 GV->eraseFromParent(); 1766 ++NumDeleted; 1767 Changed = true; 1768 } 1769 return Changed; 1770 1771 } else if (GS.StoredType <= GlobalStatus::isInitializerStored) { 1772 DEBUG(dbgs() << "MARKING CONSTANT: " << *GV); 1773 GV->setConstant(true); 1774 1775 // Clean up any obviously simplifiable users now. 1776 CleanupConstantGlobalUsers(GV, GV->getInitializer()); 1777 1778 // If the global is dead now, just nuke it. 1779 if (GV->use_empty()) { 1780 DEBUG(dbgs() << " *** Marking constant allowed us to simplify " 1781 << "all users and delete global!\n"); 1782 GV->eraseFromParent(); 1783 ++NumDeleted; 1784 } 1785 1786 ++NumMarked; 1787 return true; 1788 } else if (!GV->getInitializer()->getType()->isSingleValueType()) { 1789 if (TargetData *TD = getAnalysisIfAvailable<TargetData>()) 1790 if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) { 1791 GVI = FirstNewGV; // Don't skip the newly produced globals! 1792 return true; 1793 } 1794 } else if (GS.StoredType == GlobalStatus::isStoredOnce) { 1795 // If the initial value for the global was an undef value, and if only 1796 // one other value was stored into it, we can just change the 1797 // initializer to be the stored value, then delete all stores to the 1798 // global. This allows us to mark it constant. 1799 if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) 1800 if (isa<UndefValue>(GV->getInitializer())) { 1801 // Change the initial value here. 1802 GV->setInitializer(SOVConstant); 1803 1804 // Clean up any obviously simplifiable users now. 1805 CleanupConstantGlobalUsers(GV, GV->getInitializer()); 1806 1807 if (GV->use_empty()) { 1808 DEBUG(dbgs() << " *** Substituting initializer allowed us to " 1809 << "simplify all users and delete global!\n"); 1810 GV->eraseFromParent(); 1811 ++NumDeleted; 1812 } else { 1813 GVI = GV; 1814 } 1815 ++NumSubstitute; 1816 return true; 1817 } 1818 1819 // Try to optimize globals based on the knowledge that only one value 1820 // (besides its initializer) is ever stored to the global. 1821 if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI, 1822 getAnalysisIfAvailable<TargetData>())) 1823 return true; 1824 1825 // Otherwise, if the global was not a boolean, we can shrink it to be a 1826 // boolean. 1827 if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) 1828 if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { 1829 ++NumShrunkToBool; 1830 return true; 1831 } 1832 } 1833 } 1834 return false; 1835} 1836 1837/// ChangeCalleesToFastCall - Walk all of the direct calls of the specified 1838/// function, changing them to FastCC. 1839static void ChangeCalleesToFastCall(Function *F) { 1840 for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){ 1841 CallSite User(cast<Instruction>(*UI)); 1842 User.setCallingConv(CallingConv::Fast); 1843 } 1844} 1845 1846static AttrListPtr StripNest(const AttrListPtr &Attrs) { 1847 for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) { 1848 if ((Attrs.getSlot(i).Attrs & Attribute::Nest) == 0) 1849 continue; 1850 1851 // There can be only one. 1852 return Attrs.removeAttr(Attrs.getSlot(i).Index, Attribute::Nest); 1853 } 1854 1855 return Attrs; 1856} 1857 1858static void RemoveNestAttribute(Function *F) { 1859 F->setAttributes(StripNest(F->getAttributes())); 1860 for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){ 1861 CallSite User(cast<Instruction>(*UI)); 1862 User.setAttributes(StripNest(User.getAttributes())); 1863 } 1864} 1865 1866bool GlobalOpt::OptimizeFunctions(Module &M) { 1867 bool Changed = false; 1868 // Optimize functions. 1869 for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) { 1870 Function *F = FI++; 1871 // Functions without names cannot be referenced outside this module. 1872 if (!F->hasName() && !F->isDeclaration()) 1873 F->setLinkage(GlobalValue::InternalLinkage); 1874 F->removeDeadConstantUsers(); 1875 if (F->use_empty() && (F->hasLocalLinkage() || F->hasLinkOnceLinkage())) { 1876 F->eraseFromParent(); 1877 Changed = true; 1878 ++NumFnDeleted; 1879 } else if (F->hasLocalLinkage()) { 1880 if (F->getCallingConv() == CallingConv::C && !F->isVarArg() && 1881 !F->hasAddressTaken()) { 1882 // If this function has C calling conventions, is not a varargs 1883 // function, and is only called directly, promote it to use the Fast 1884 // calling convention. 1885 F->setCallingConv(CallingConv::Fast); 1886 ChangeCalleesToFastCall(F); 1887 ++NumFastCallFns; 1888 Changed = true; 1889 } 1890 1891 if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) && 1892 !F->hasAddressTaken()) { 1893 // The function is not used by a trampoline intrinsic, so it is safe 1894 // to remove the 'nest' attribute. 1895 RemoveNestAttribute(F); 1896 ++NumNestRemoved; 1897 Changed = true; 1898 } 1899 } 1900 } 1901 return Changed; 1902} 1903 1904bool GlobalOpt::OptimizeGlobalVars(Module &M) { 1905 bool Changed = false; 1906 for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); 1907 GVI != E; ) { 1908 GlobalVariable *GV = GVI++; 1909 // Global variables without names cannot be referenced outside this module. 1910 if (!GV->hasName() && !GV->isDeclaration()) 1911 GV->setLinkage(GlobalValue::InternalLinkage); 1912 // Simplify the initializer. 1913 if (GV->hasInitializer()) 1914 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) { 1915 TargetData *TD = getAnalysisIfAvailable<TargetData>(); 1916 Constant *New = ConstantFoldConstantExpression(CE, TD); 1917 if (New && New != CE) 1918 GV->setInitializer(New); 1919 } 1920 // Do more involved optimizations if the global is internal. 1921 if (!GV->isConstant() && GV->hasLocalLinkage() && 1922 GV->hasInitializer()) 1923 Changed |= ProcessInternalGlobal(GV, GVI); 1924 } 1925 return Changed; 1926} 1927 1928/// FindGlobalCtors - Find the llvm.globalctors list, verifying that all 1929/// initializers have an init priority of 65535. 1930GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { 1931 for (Module::global_iterator I = M.global_begin(), E = M.global_end(); 1932 I != E; ++I) 1933 if (I->getName() == "llvm.global_ctors") { 1934 // Found it, verify it's an array of { int, void()* }. 1935 const ArrayType *ATy =dyn_cast<ArrayType>(I->getType()->getElementType()); 1936 if (!ATy) return 0; 1937 const StructType *STy = dyn_cast<StructType>(ATy->getElementType()); 1938 if (!STy || STy->getNumElements() != 2 || 1939 !STy->getElementType(0)->isIntegerTy(32)) return 0; 1940 const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1)); 1941 if (!PFTy) return 0; 1942 const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType()); 1943 if (!FTy || !FTy->getReturnType()->isVoidTy() || 1944 FTy->isVarArg() || FTy->getNumParams() != 0) 1945 return 0; 1946 1947 // Verify that the initializer is simple enough for us to handle. 1948 if (!I->hasDefinitiveInitializer()) return 0; 1949 ConstantArray *CA = dyn_cast<ConstantArray>(I->getInitializer()); 1950 if (!CA) return 0; 1951 for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) 1952 if (ConstantStruct *CS = dyn_cast<ConstantStruct>(*i)) { 1953 if (isa<ConstantPointerNull>(CS->getOperand(1))) 1954 continue; 1955 1956 // Must have a function or null ptr. 1957 if (!isa<Function>(CS->getOperand(1))) 1958 return 0; 1959 1960 // Init priority must be standard. 1961 ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0)); 1962 if (!CI || CI->getZExtValue() != 65535) 1963 return 0; 1964 } else { 1965 return 0; 1966 } 1967 1968 return I; 1969 } 1970 return 0; 1971} 1972 1973/// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand, 1974/// return a list of the functions and null terminator as a vector. 1975static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) { 1976 ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); 1977 std::vector<Function*> Result; 1978 Result.reserve(CA->getNumOperands()); 1979 for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) { 1980 ConstantStruct *CS = cast<ConstantStruct>(*i); 1981 Result.push_back(dyn_cast<Function>(CS->getOperand(1))); 1982 } 1983 return Result; 1984} 1985 1986/// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the 1987/// specified array, returning the new global to use. 1988static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, 1989 const std::vector<Function*> &Ctors) { 1990 // If we made a change, reassemble the initializer list. 1991 std::vector<Constant*> CSVals; 1992 CSVals.push_back(ConstantInt::get(Type::getInt32Ty(GCL->getContext()),65535)); 1993 CSVals.push_back(0); 1994 1995 // Create the new init list. 1996 std::vector<Constant*> CAList; 1997 for (unsigned i = 0, e = Ctors.size(); i != e; ++i) { 1998 if (Ctors[i]) { 1999 CSVals[1] = Ctors[i]; 2000 } else { 2001 const Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()), 2002 false); 2003 const PointerType *PFTy = PointerType::getUnqual(FTy); 2004 CSVals[1] = Constant::getNullValue(PFTy); 2005 CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 2006 2147483647); 2007 } 2008 CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false)); 2009 } 2010 2011 // Create the array initializer. 2012 const Type *StructTy = 2013 cast<ArrayType>(GCL->getType()->getElementType())->getElementType(); 2014 Constant *CA = ConstantArray::get(ArrayType::get(StructTy, 2015 CAList.size()), CAList); 2016 2017 // If we didn't change the number of elements, don't create a new GV. 2018 if (CA->getType() == GCL->getInitializer()->getType()) { 2019 GCL->setInitializer(CA); 2020 return GCL; 2021 } 2022 2023 // Create the new global and insert it next to the existing list. 2024 GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(), 2025 GCL->getLinkage(), CA, "", 2026 GCL->isThreadLocal()); 2027 GCL->getParent()->getGlobalList().insert(GCL, NGV); 2028 NGV->takeName(GCL); 2029 2030 // Nuke the old list, replacing any uses with the new one. 2031 if (!GCL->use_empty()) { 2032 Constant *V = NGV; 2033 if (V->getType() != GCL->getType()) 2034 V = ConstantExpr::getBitCast(V, GCL->getType()); 2035 GCL->replaceAllUsesWith(V); 2036 } 2037 GCL->eraseFromParent(); 2038 2039 if (Ctors.size()) 2040 return NGV; 2041 else 2042 return 0; 2043} 2044 2045 2046static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues, 2047 Value *V) { 2048 if (Constant *CV = dyn_cast<Constant>(V)) return CV; 2049 Constant *R = ComputedValues[V]; 2050 assert(R && "Reference to an uncomputed value!"); 2051 return R; 2052} 2053 2054/// isSimpleEnoughPointerToCommit - Return true if this constant is simple 2055/// enough for us to understand. In particular, if it is a cast of something, 2056/// we punt. We basically just support direct accesses to globals and GEP's of 2057/// globals. This should be kept up to date with CommitValueTo. 2058static bool isSimpleEnoughPointerToCommit(Constant *C) { 2059 // Conservatively, avoid aggregate types. This is because we don't 2060 // want to worry about them partially overlapping other stores. 2061 if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType()) 2062 return false; 2063 2064 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) 2065 // Do not allow weak/linkonce/dllimport/dllexport linkage or 2066 // external globals. 2067 return GV->hasDefinitiveInitializer(); 2068 2069 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) 2070 // Handle a constantexpr gep. 2071 if (CE->getOpcode() == Instruction::GetElementPtr && 2072 isa<GlobalVariable>(CE->getOperand(0)) && 2073 cast<GEPOperator>(CE)->isInBounds()) { 2074 GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); 2075 // Do not allow weak/linkonce/dllimport/dllexport linkage or 2076 // external globals. 2077 if (!GV->hasDefinitiveInitializer()) 2078 return false; 2079 2080 // The first index must be zero. 2081 ConstantInt *CI = dyn_cast<ConstantInt>(*llvm::next(CE->op_begin())); 2082 if (!CI || !CI->isZero()) return false; 2083 2084 // The remaining indices must be compile-time known integers within the 2085 // notional bounds of the corresponding static array types. 2086 if (!CE->isGEPWithNoNotionalOverIndexing()) 2087 return false; 2088 2089 return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); 2090 } 2091 return false; 2092} 2093 2094/// EvaluateStoreInto - Evaluate a piece of a constantexpr store into a global 2095/// initializer. This returns 'Init' modified to reflect 'Val' stored into it. 2096/// At this point, the GEP operands of Addr [0, OpNo) have been stepped into. 2097static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, 2098 ConstantExpr *Addr, unsigned OpNo) { 2099 // Base case of the recursion. 2100 if (OpNo == Addr->getNumOperands()) { 2101 assert(Val->getType() == Init->getType() && "Type mismatch!"); 2102 return Val; 2103 } 2104 2105 std::vector<Constant*> Elts; 2106 if (const StructType *STy = dyn_cast<StructType>(Init->getType())) { 2107 2108 // Break up the constant into its elements. 2109 if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) { 2110 for (User::op_iterator i = CS->op_begin(), e = CS->op_end(); i != e; ++i) 2111 Elts.push_back(cast<Constant>(*i)); 2112 } else if (isa<ConstantAggregateZero>(Init)) { 2113 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) 2114 Elts.push_back(Constant::getNullValue(STy->getElementType(i))); 2115 } else if (isa<UndefValue>(Init)) { 2116 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) 2117 Elts.push_back(UndefValue::get(STy->getElementType(i))); 2118 } else { 2119 llvm_unreachable("This code is out of sync with " 2120 " ConstantFoldLoadThroughGEPConstantExpr"); 2121 } 2122 2123 // Replace the element that we are supposed to. 2124 ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo)); 2125 unsigned Idx = CU->getZExtValue(); 2126 assert(Idx < STy->getNumElements() && "Struct index out of range!"); 2127 Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1); 2128 2129 // Return the modified struct. 2130 return ConstantStruct::get(Init->getContext(), &Elts[0], Elts.size(), 2131 STy->isPacked()); 2132 } else { 2133 ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo)); 2134 const SequentialType *InitTy = cast<SequentialType>(Init->getType()); 2135 2136 uint64_t NumElts; 2137 if (const ArrayType *ATy = dyn_cast<ArrayType>(InitTy)) 2138 NumElts = ATy->getNumElements(); 2139 else 2140 NumElts = cast<VectorType>(InitTy)->getNumElements(); 2141 2142 2143 // Break up the array into elements. 2144 if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) { 2145 for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) 2146 Elts.push_back(cast<Constant>(*i)); 2147 } else if (ConstantVector *CV = dyn_cast<ConstantVector>(Init)) { 2148 for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i) 2149 Elts.push_back(cast<Constant>(*i)); 2150 } else if (isa<ConstantAggregateZero>(Init)) { 2151 Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType())); 2152 } else { 2153 assert(isa<UndefValue>(Init) && "This code is out of sync with " 2154 " ConstantFoldLoadThroughGEPConstantExpr"); 2155 Elts.assign(NumElts, UndefValue::get(InitTy->getElementType())); 2156 } 2157 2158 assert(CI->getZExtValue() < NumElts); 2159 Elts[CI->getZExtValue()] = 2160 EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); 2161 2162 if (Init->getType()->isArrayTy()) 2163 return ConstantArray::get(cast<ArrayType>(InitTy), Elts); 2164 else 2165 return ConstantVector::get(&Elts[0], Elts.size()); 2166 } 2167} 2168 2169/// CommitValueTo - We have decided that Addr (which satisfies the predicate 2170/// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen. 2171static void CommitValueTo(Constant *Val, Constant *Addr) { 2172 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { 2173 assert(GV->hasInitializer()); 2174 GV->setInitializer(Val); 2175 return; 2176 } 2177 2178 ConstantExpr *CE = cast<ConstantExpr>(Addr); 2179 GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); 2180 GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2)); 2181} 2182 2183/// ComputeLoadResult - Return the value that would be computed by a load from 2184/// P after the stores reflected by 'memory' have been performed. If we can't 2185/// decide, return null. 2186static Constant *ComputeLoadResult(Constant *P, 2187 const DenseMap<Constant*, Constant*> &Memory) { 2188 // If this memory location has been recently stored, use the stored value: it 2189 // is the most up-to-date. 2190 DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P); 2191 if (I != Memory.end()) return I->second; 2192 2193 // Access it. 2194 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) { 2195 if (GV->hasDefinitiveInitializer()) 2196 return GV->getInitializer(); 2197 return 0; 2198 } 2199 2200 // Handle a constantexpr getelementptr. 2201 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P)) 2202 if (CE->getOpcode() == Instruction::GetElementPtr && 2203 isa<GlobalVariable>(CE->getOperand(0))) { 2204 GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); 2205 if (GV->hasDefinitiveInitializer()) 2206 return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); 2207 } 2208 2209 return 0; // don't know how to evaluate. 2210} 2211 2212/// EvaluateFunction - Evaluate a call to function F, returning true if 2213/// successful, false if we can't evaluate it. ActualArgs contains the formal 2214/// arguments for the function. 2215static bool EvaluateFunction(Function *F, Constant *&RetVal, 2216 const SmallVectorImpl<Constant*> &ActualArgs, 2217 std::vector<Function*> &CallStack, 2218 DenseMap<Constant*, Constant*> &MutatedMemory, 2219 std::vector<GlobalVariable*> &AllocaTmps) { 2220 // Check to see if this function is already executing (recursion). If so, 2221 // bail out. TODO: we might want to accept limited recursion. 2222 if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end()) 2223 return false; 2224 2225 CallStack.push_back(F); 2226 2227 /// Values - As we compute SSA register values, we store their contents here. 2228 DenseMap<Value*, Constant*> Values; 2229 2230 // Initialize arguments to the incoming values specified. 2231 unsigned ArgNo = 0; 2232 for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; 2233 ++AI, ++ArgNo) 2234 Values[AI] = ActualArgs[ArgNo]; 2235 2236 /// ExecutedBlocks - We only handle non-looping, non-recursive code. As such, 2237 /// we can only evaluate any one basic block at most once. This set keeps 2238 /// track of what we have executed so we can detect recursive cases etc. 2239 SmallPtrSet<BasicBlock*, 32> ExecutedBlocks; 2240 2241 // CurInst - The current instruction we're evaluating. 2242 BasicBlock::iterator CurInst = F->begin()->begin(); 2243 2244 // This is the main evaluation loop. 2245 while (1) { 2246 Constant *InstResult = 0; 2247 2248 if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) { 2249 if (SI->isVolatile()) return false; // no volatile accesses. 2250 Constant *Ptr = getVal(Values, SI->getOperand(1)); 2251 if (!isSimpleEnoughPointerToCommit(Ptr)) 2252 // If this is too complex for us to commit, reject it. 2253 return false; 2254 Constant *Val = getVal(Values, SI->getOperand(0)); 2255 MutatedMemory[Ptr] = Val; 2256 } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) { 2257 InstResult = ConstantExpr::get(BO->getOpcode(), 2258 getVal(Values, BO->getOperand(0)), 2259 getVal(Values, BO->getOperand(1))); 2260 } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) { 2261 InstResult = ConstantExpr::getCompare(CI->getPredicate(), 2262 getVal(Values, CI->getOperand(0)), 2263 getVal(Values, CI->getOperand(1))); 2264 } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) { 2265 InstResult = ConstantExpr::getCast(CI->getOpcode(), 2266 getVal(Values, CI->getOperand(0)), 2267 CI->getType()); 2268 } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) { 2269 InstResult = ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)), 2270 getVal(Values, SI->getOperand(1)), 2271 getVal(Values, SI->getOperand(2))); 2272 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) { 2273 Constant *P = getVal(Values, GEP->getOperand(0)); 2274 SmallVector<Constant*, 8> GEPOps; 2275 for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); 2276 i != e; ++i) 2277 GEPOps.push_back(getVal(Values, *i)); 2278 InstResult = cast<GEPOperator>(GEP)->isInBounds() ? 2279 ConstantExpr::getInBoundsGetElementPtr(P, &GEPOps[0], GEPOps.size()) : 2280 ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size()); 2281 } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { 2282 if (LI->isVolatile()) return false; // no volatile accesses. 2283 InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)), 2284 MutatedMemory); 2285 if (InstResult == 0) return false; // Could not evaluate load. 2286 } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) { 2287 if (AI->isArrayAllocation()) return false; // Cannot handle array allocs. 2288 const Type *Ty = AI->getType()->getElementType(); 2289 AllocaTmps.push_back(new GlobalVariable(Ty, false, 2290 GlobalValue::InternalLinkage, 2291 UndefValue::get(Ty), 2292 AI->getName())); 2293 InstResult = AllocaTmps.back(); 2294 } else if (CallInst *CI = dyn_cast<CallInst>(CurInst)) { 2295 2296 // Debug info can safely be ignored here. 2297 if (isa<DbgInfoIntrinsic>(CI)) { 2298 ++CurInst; 2299 continue; 2300 } 2301 2302 // Cannot handle inline asm. 2303 if (isa<InlineAsm>(CI->getCalledValue())) return false; 2304 2305 // Resolve function pointers. 2306 Function *Callee = dyn_cast<Function>(getVal(Values, 2307 CI->getCalledValue())); 2308 if (!Callee) return false; // Cannot resolve. 2309 2310 SmallVector<Constant*, 8> Formals; 2311 CallSite CS(CI); 2312 for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); 2313 i != e; ++i) 2314 Formals.push_back(getVal(Values, *i)); 2315 2316 if (Callee->isDeclaration()) { 2317 // If this is a function we can constant fold, do it. 2318 if (Constant *C = ConstantFoldCall(Callee, Formals.data(), 2319 Formals.size())) { 2320 InstResult = C; 2321 } else { 2322 return false; 2323 } 2324 } else { 2325 if (Callee->getFunctionType()->isVarArg()) 2326 return false; 2327 2328 Constant *RetVal; 2329 // Execute the call, if successful, use the return value. 2330 if (!EvaluateFunction(Callee, RetVal, Formals, CallStack, 2331 MutatedMemory, AllocaTmps)) 2332 return false; 2333 InstResult = RetVal; 2334 } 2335 } else if (isa<TerminatorInst>(CurInst)) { 2336 BasicBlock *NewBB = 0; 2337 if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) { 2338 if (BI->isUnconditional()) { 2339 NewBB = BI->getSuccessor(0); 2340 } else { 2341 ConstantInt *Cond = 2342 dyn_cast<ConstantInt>(getVal(Values, BI->getCondition())); 2343 if (!Cond) return false; // Cannot determine. 2344 2345 NewBB = BI->getSuccessor(!Cond->getZExtValue()); 2346 } 2347 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) { 2348 ConstantInt *Val = 2349 dyn_cast<ConstantInt>(getVal(Values, SI->getCondition())); 2350 if (!Val) return false; // Cannot determine. 2351 NewBB = SI->getSuccessor(SI->findCaseValue(Val)); 2352 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) { 2353 Value *Val = getVal(Values, IBI->getAddress())->stripPointerCasts(); 2354 if (BlockAddress *BA = dyn_cast<BlockAddress>(Val)) 2355 NewBB = BA->getBasicBlock(); 2356 else 2357 return false; // Cannot determine. 2358 } else if (ReturnInst *RI = dyn_cast<ReturnInst>(CurInst)) { 2359 if (RI->getNumOperands()) 2360 RetVal = getVal(Values, RI->getOperand(0)); 2361 2362 CallStack.pop_back(); // return from fn. 2363 return true; // We succeeded at evaluating this ctor! 2364 } else { 2365 // invoke, unwind, unreachable. 2366 return false; // Cannot handle this terminator. 2367 } 2368 2369 // Okay, we succeeded in evaluating this control flow. See if we have 2370 // executed the new block before. If so, we have a looping function, 2371 // which we cannot evaluate in reasonable time. 2372 if (!ExecutedBlocks.insert(NewBB)) 2373 return false; // looped! 2374 2375 // Okay, we have never been in this block before. Check to see if there 2376 // are any PHI nodes. If so, evaluate them with information about where 2377 // we came from. 2378 BasicBlock *OldBB = CurInst->getParent(); 2379 CurInst = NewBB->begin(); 2380 PHINode *PN; 2381 for (; (PN = dyn_cast<PHINode>(CurInst)); ++CurInst) 2382 Values[PN] = getVal(Values, PN->getIncomingValueForBlock(OldBB)); 2383 2384 // Do NOT increment CurInst. We know that the terminator had no value. 2385 continue; 2386 } else { 2387 // Did not know how to evaluate this! 2388 return false; 2389 } 2390 2391 if (!CurInst->use_empty()) 2392 Values[CurInst] = InstResult; 2393 2394 // Advance program counter. 2395 ++CurInst; 2396 } 2397} 2398 2399/// EvaluateStaticConstructor - Evaluate static constructors in the function, if 2400/// we can. Return true if we can, false otherwise. 2401static bool EvaluateStaticConstructor(Function *F) { 2402 /// MutatedMemory - For each store we execute, we update this map. Loads 2403 /// check this to get the most up-to-date value. If evaluation is successful, 2404 /// this state is committed to the process. 2405 DenseMap<Constant*, Constant*> MutatedMemory; 2406 2407 /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable 2408 /// to represent its body. This vector is needed so we can delete the 2409 /// temporary globals when we are done. 2410 std::vector<GlobalVariable*> AllocaTmps; 2411 2412 /// CallStack - This is used to detect recursion. In pathological situations 2413 /// we could hit exponential behavior, but at least there is nothing 2414 /// unbounded. 2415 std::vector<Function*> CallStack; 2416 2417 // Call the function. 2418 Constant *RetValDummy; 2419 bool EvalSuccess = EvaluateFunction(F, RetValDummy, 2420 SmallVector<Constant*, 0>(), CallStack, 2421 MutatedMemory, AllocaTmps); 2422 if (EvalSuccess) { 2423 // We succeeded at evaluation: commit the result. 2424 DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" 2425 << F->getName() << "' to " << MutatedMemory.size() 2426 << " stores.\n"); 2427 for (DenseMap<Constant*, Constant*>::iterator I = MutatedMemory.begin(), 2428 E = MutatedMemory.end(); I != E; ++I) 2429 CommitValueTo(I->second, I->first); 2430 } 2431 2432 // At this point, we are done interpreting. If we created any 'alloca' 2433 // temporaries, release them now. 2434 while (!AllocaTmps.empty()) { 2435 GlobalVariable *Tmp = AllocaTmps.back(); 2436 AllocaTmps.pop_back(); 2437 2438 // If there are still users of the alloca, the program is doing something 2439 // silly, e.g. storing the address of the alloca somewhere and using it 2440 // later. Since this is undefined, we'll just make it be null. 2441 if (!Tmp->use_empty()) 2442 Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType())); 2443 delete Tmp; 2444 } 2445 2446 return EvalSuccess; 2447} 2448 2449 2450 2451/// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible. 2452/// Return true if anything changed. 2453bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { 2454 std::vector<Function*> Ctors = ParseGlobalCtors(GCL); 2455 bool MadeChange = false; 2456 if (Ctors.empty()) return false; 2457 2458 // Loop over global ctors, optimizing them when we can. 2459 for (unsigned i = 0; i != Ctors.size(); ++i) { 2460 Function *F = Ctors[i]; 2461 // Found a null terminator in the middle of the list, prune off the rest of 2462 // the list. 2463 if (F == 0) { 2464 if (i != Ctors.size()-1) { 2465 Ctors.resize(i+1); 2466 MadeChange = true; 2467 } 2468 break; 2469 } 2470 2471 // We cannot simplify external ctor functions. 2472 if (F->empty()) continue; 2473 2474 // If we can evaluate the ctor at compile time, do. 2475 if (EvaluateStaticConstructor(F)) { 2476 Ctors.erase(Ctors.begin()+i); 2477 MadeChange = true; 2478 --i; 2479 ++NumCtorsEvaluated; 2480 continue; 2481 } 2482 } 2483 2484 if (!MadeChange) return false; 2485 2486 GCL = InstallGlobalCtors(GCL, Ctors); 2487 return true; 2488} 2489 2490bool GlobalOpt::OptimizeGlobalAliases(Module &M) { 2491 bool Changed = false; 2492 2493 for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); 2494 I != E;) { 2495 Module::alias_iterator J = I++; 2496 // Aliases without names cannot be referenced outside this module. 2497 if (!J->hasName() && !J->isDeclaration()) 2498 J->setLinkage(GlobalValue::InternalLinkage); 2499 // If the aliasee may change at link time, nothing can be done - bail out. 2500 if (J->mayBeOverridden()) 2501 continue; 2502 2503 Constant *Aliasee = J->getAliasee(); 2504 GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts()); 2505 Target->removeDeadConstantUsers(); 2506 bool hasOneUse = Target->hasOneUse() && Aliasee->hasOneUse(); 2507 2508 // Make all users of the alias use the aliasee instead. 2509 if (!J->use_empty()) { 2510 J->replaceAllUsesWith(Aliasee); 2511 ++NumAliasesResolved; 2512 Changed = true; 2513 } 2514 2515 // If the alias is externally visible, we may still be able to simplify it. 2516 if (!J->hasLocalLinkage()) { 2517 // If the aliasee has internal linkage, give it the name and linkage 2518 // of the alias, and delete the alias. This turns: 2519 // define internal ... @f(...) 2520 // @a = alias ... @f 2521 // into: 2522 // define ... @a(...) 2523 if (!Target->hasLocalLinkage()) 2524 continue; 2525 2526 // Do not perform the transform if multiple aliases potentially target the 2527 // aliasee. This check also ensures that it is safe to replace the section 2528 // and other attributes of the aliasee with those of the alias. 2529 if (!hasOneUse) 2530 continue; 2531 2532 // Give the aliasee the name, linkage and other attributes of the alias. 2533 Target->takeName(J); 2534 Target->setLinkage(J->getLinkage()); 2535 Target->GlobalValue::copyAttributesFrom(J); 2536 } 2537 2538 // Delete the alias. 2539 M.getAliasList().erase(J); 2540 ++NumAliasesRemoved; 2541 Changed = true; 2542 } 2543 2544 return Changed; 2545} 2546 2547bool GlobalOpt::runOnModule(Module &M) { 2548 bool Changed = false; 2549 2550 // Try to find the llvm.globalctors list. 2551 GlobalVariable *GlobalCtors = FindGlobalCtors(M); 2552 2553 bool LocalChange = true; 2554 while (LocalChange) { 2555 LocalChange = false; 2556 2557 // Delete functions that are trivially dead, ccc -> fastcc 2558 LocalChange |= OptimizeFunctions(M); 2559 2560 // Optimize global_ctors list. 2561 if (GlobalCtors) 2562 LocalChange |= OptimizeGlobalCtorsList(GlobalCtors); 2563 2564 // Optimize non-address-taken globals. 2565 LocalChange |= OptimizeGlobalVars(M); 2566 2567 // Resolve aliases, when possible. 2568 LocalChange |= OptimizeGlobalAliases(M); 2569 Changed |= LocalChange; 2570 } 2571 2572 // TODO: Move all global ctors functions to the end of the module for code 2573 // layout. 2574 2575 return Changed; 2576} 2577