GlobalOpt.cpp revision 08600f0c51738348b30fc8f5bf118d8f8bea1ed5
1//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass transforms simple global variables that never have their address 11// taken. If obviously true, it marks read/write globals as constant, deletes 12// variables only stored to, etc. 13// 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "globalopt" 17#include "llvm/Transforms/IPO.h" 18#include "llvm/CallingConv.h" 19#include "llvm/Constants.h" 20#include "llvm/DerivedTypes.h" 21#include "llvm/Instructions.h" 22#include "llvm/IntrinsicInst.h" 23#include "llvm/Module.h" 24#include "llvm/Pass.h" 25#include "llvm/Analysis/ConstantFolding.h" 26#include "llvm/Transforms/Utils/Local.h" 27#include "llvm/Target/TargetData.h" 28#include "llvm/Support/CallSite.h" 29#include "llvm/Support/Compiler.h" 30#include "llvm/Support/Debug.h" 31#include "llvm/Support/GetElementPtrTypeIterator.h" 32#include "llvm/Support/MathExtras.h" 33#include "llvm/ADT/DenseMap.h" 34#include "llvm/ADT/SmallPtrSet.h" 35#include "llvm/ADT/SmallVector.h" 36#include "llvm/ADT/Statistic.h" 37#include "llvm/ADT/StringExtras.h" 38#include "llvm/ADT/STLExtras.h" 39#include <algorithm> 40using namespace llvm; 41 42STATISTIC(NumMarked , "Number of globals marked constant"); 43STATISTIC(NumSRA , "Number of aggregate globals broken into scalars"); 44STATISTIC(NumHeapSRA , "Number of heap objects SRA'd"); 45STATISTIC(NumSubstitute,"Number of globals with initializers stored into them"); 46STATISTIC(NumDeleted , "Number of globals deleted"); 47STATISTIC(NumFnDeleted , "Number of functions deleted"); 48STATISTIC(NumGlobUses , "Number of global uses devirtualized"); 49STATISTIC(NumLocalized , "Number of globals localized"); 50STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans"); 51STATISTIC(NumFastCallFns , "Number of functions converted to fastcc"); 52STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated"); 53STATISTIC(NumNestRemoved , "Number of nest attributes removed"); 54STATISTIC(NumAliasesResolved, "Number of global aliases resolved"); 55STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated"); 56 57namespace { 58 struct VISIBILITY_HIDDEN GlobalOpt : public ModulePass { 59 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 60 AU.addRequired<TargetData>(); 61 } 62 static char ID; // Pass identification, replacement for typeid 63 GlobalOpt() : ModulePass(&ID) {} 64 65 bool runOnModule(Module &M); 66 67 private: 68 GlobalVariable *FindGlobalCtors(Module &M); 69 bool OptimizeFunctions(Module &M); 70 bool OptimizeGlobalVars(Module &M); 71 bool ResolveAliases(Module &M); 72 bool OptimizeGlobalCtorsList(GlobalVariable *&GCL); 73 bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI); 74 }; 75} 76 77char GlobalOpt::ID = 0; 78static RegisterPass<GlobalOpt> X("globalopt", "Global Variable Optimizer"); 79 80ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); } 81 82namespace { 83 84/// GlobalStatus - As we analyze each global, keep track of some information 85/// about it. If we find out that the address of the global is taken, none of 86/// this info will be accurate. 87struct VISIBILITY_HIDDEN GlobalStatus { 88 /// isLoaded - True if the global is ever loaded. If the global isn't ever 89 /// loaded it can be deleted. 90 bool isLoaded; 91 92 /// StoredType - Keep track of what stores to the global look like. 93 /// 94 enum StoredType { 95 /// NotStored - There is no store to this global. It can thus be marked 96 /// constant. 97 NotStored, 98 99 /// isInitializerStored - This global is stored to, but the only thing 100 /// stored is the constant it was initialized with. This is only tracked 101 /// for scalar globals. 102 isInitializerStored, 103 104 /// isStoredOnce - This global is stored to, but only its initializer and 105 /// one other value is ever stored to it. If this global isStoredOnce, we 106 /// track the value stored to it in StoredOnceValue below. This is only 107 /// tracked for scalar globals. 108 isStoredOnce, 109 110 /// isStored - This global is stored to by multiple values or something else 111 /// that we cannot track. 112 isStored 113 } StoredType; 114 115 /// StoredOnceValue - If only one value (besides the initializer constant) is 116 /// ever stored to this global, keep track of what value it is. 117 Value *StoredOnceValue; 118 119 /// AccessingFunction/HasMultipleAccessingFunctions - These start out 120 /// null/false. When the first accessing function is noticed, it is recorded. 121 /// When a second different accessing function is noticed, 122 /// HasMultipleAccessingFunctions is set to true. 123 Function *AccessingFunction; 124 bool HasMultipleAccessingFunctions; 125 126 /// HasNonInstructionUser - Set to true if this global has a user that is not 127 /// an instruction (e.g. a constant expr or GV initializer). 128 bool HasNonInstructionUser; 129 130 /// HasPHIUser - Set to true if this global has a user that is a PHI node. 131 bool HasPHIUser; 132 133 GlobalStatus() : isLoaded(false), StoredType(NotStored), StoredOnceValue(0), 134 AccessingFunction(0), HasMultipleAccessingFunctions(false), 135 HasNonInstructionUser(false), HasPHIUser(false) {} 136}; 137 138} 139 140/// ConstantIsDead - Return true if the specified constant is (transitively) 141/// dead. The constant may be used by other constants (e.g. constant arrays, 142/// constant exprs, constant global variables) as long as they are dead, 143/// but it cannot be used by anything else. If DeadGVs is not null then 144/// record dead constant GV users. 145static bool ConstantIsDead(Constant *C, 146 SmallPtrSet<GlobalVariable *, 4> *DeadGVs = false) { 147 if (isa<GlobalValue>(C)) return false; 148 149 for (Value::use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; ++UI) { 150 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(*UI)) { 151 if (!GV->isConstant() || !GV->hasLocalLinkage() || !GV->use_empty()) 152 return false; 153 else { 154 if (DeadGVs) 155 DeadGVs->insert(GV); 156 } 157 } 158 else if (Constant *CU = dyn_cast<Constant>(*UI)) { 159 if (!ConstantIsDead(CU, DeadGVs)) return false; 160 } else 161 return false; 162 } 163 return true; 164} 165 166 167/// AnalyzeGlobal - Look at all uses of the global and fill in the GlobalStatus 168/// structure. If the global has its address taken, return true to indicate we 169/// can't do anything with it. 170/// 171static bool AnalyzeGlobal(Value *V, GlobalStatus &GS, 172 SmallPtrSet<PHINode*, 16> &PHIUsers) { 173 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) 174 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) { 175 GS.HasNonInstructionUser = true; 176 177 if (AnalyzeGlobal(CE, GS, PHIUsers)) return true; 178 179 } else if (Instruction *I = dyn_cast<Instruction>(*UI)) { 180 if (!GS.HasMultipleAccessingFunctions) { 181 Function *F = I->getParent()->getParent(); 182 if (GS.AccessingFunction == 0) 183 GS.AccessingFunction = F; 184 else if (GS.AccessingFunction != F) 185 GS.HasMultipleAccessingFunctions = true; 186 } 187 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 188 GS.isLoaded = true; 189 if (LI->isVolatile()) return true; // Don't hack on volatile loads. 190 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 191 // Don't allow a store OF the address, only stores TO the address. 192 if (SI->getOperand(0) == V) return true; 193 194 if (SI->isVolatile()) return true; // Don't hack on volatile stores. 195 196 // If this is a direct store to the global (i.e., the global is a scalar 197 // value, not an aggregate), keep more specific information about 198 // stores. 199 if (GS.StoredType != GlobalStatus::isStored) { 200 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(SI->getOperand(1))){ 201 Value *StoredVal = SI->getOperand(0); 202 if (StoredVal == GV->getInitializer()) { 203 if (GS.StoredType < GlobalStatus::isInitializerStored) 204 GS.StoredType = GlobalStatus::isInitializerStored; 205 } else if (isa<LoadInst>(StoredVal) && 206 cast<LoadInst>(StoredVal)->getOperand(0) == GV) { 207 // G = G 208 if (GS.StoredType < GlobalStatus::isInitializerStored) 209 GS.StoredType = GlobalStatus::isInitializerStored; 210 } else if (GS.StoredType < GlobalStatus::isStoredOnce) { 211 GS.StoredType = GlobalStatus::isStoredOnce; 212 GS.StoredOnceValue = StoredVal; 213 } else if (GS.StoredType == GlobalStatus::isStoredOnce && 214 GS.StoredOnceValue == StoredVal) { 215 // noop. 216 } else { 217 GS.StoredType = GlobalStatus::isStored; 218 } 219 } else { 220 GS.StoredType = GlobalStatus::isStored; 221 } 222 } 223 } else if (isa<GetElementPtrInst>(I)) { 224 if (AnalyzeGlobal(I, GS, PHIUsers)) return true; 225 } else if (isa<SelectInst>(I)) { 226 if (AnalyzeGlobal(I, GS, PHIUsers)) return true; 227 } else if (PHINode *PN = dyn_cast<PHINode>(I)) { 228 // PHI nodes we can check just like select or GEP instructions, but we 229 // have to be careful about infinite recursion. 230 if (PHIUsers.insert(PN)) // Not already visited. 231 if (AnalyzeGlobal(I, GS, PHIUsers)) return true; 232 GS.HasPHIUser = true; 233 } else if (isa<CmpInst>(I)) { 234 } else if (isa<MemCpyInst>(I) || isa<MemMoveInst>(I)) { 235 if (I->getOperand(1) == V) 236 GS.StoredType = GlobalStatus::isStored; 237 if (I->getOperand(2) == V) 238 GS.isLoaded = true; 239 } else if (isa<MemSetInst>(I)) { 240 assert(I->getOperand(1) == V && "Memset only takes one pointer!"); 241 GS.StoredType = GlobalStatus::isStored; 242 } else { 243 return true; // Any other non-load instruction might take address! 244 } 245 } else if (Constant *C = dyn_cast<Constant>(*UI)) { 246 GS.HasNonInstructionUser = true; 247 // We might have a dead and dangling constant hanging off of here. 248 if (!ConstantIsDead(C)) 249 return true; 250 } else { 251 GS.HasNonInstructionUser = true; 252 // Otherwise must be some other user. 253 return true; 254 } 255 256 return false; 257} 258 259static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) { 260 ConstantInt *CI = dyn_cast<ConstantInt>(Idx); 261 if (!CI) return 0; 262 unsigned IdxV = CI->getZExtValue(); 263 264 if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Agg)) { 265 if (IdxV < CS->getNumOperands()) return CS->getOperand(IdxV); 266 } else if (ConstantArray *CA = dyn_cast<ConstantArray>(Agg)) { 267 if (IdxV < CA->getNumOperands()) return CA->getOperand(IdxV); 268 } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Agg)) { 269 if (IdxV < CP->getNumOperands()) return CP->getOperand(IdxV); 270 } else if (isa<ConstantAggregateZero>(Agg)) { 271 if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) { 272 if (IdxV < STy->getNumElements()) 273 return Constant::getNullValue(STy->getElementType(IdxV)); 274 } else if (const SequentialType *STy = 275 dyn_cast<SequentialType>(Agg->getType())) { 276 return Constant::getNullValue(STy->getElementType()); 277 } 278 } else if (isa<UndefValue>(Agg)) { 279 if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) { 280 if (IdxV < STy->getNumElements()) 281 return UndefValue::get(STy->getElementType(IdxV)); 282 } else if (const SequentialType *STy = 283 dyn_cast<SequentialType>(Agg->getType())) { 284 return UndefValue::get(STy->getElementType()); 285 } 286 } 287 return 0; 288} 289 290 291/// CleanupConstantGlobalUsers - We just marked GV constant. Loop over all 292/// users of the global, cleaning up the obvious ones. This is largely just a 293/// quick scan over the use list to clean up the easy and obvious cruft. This 294/// returns true if it made a change. 295static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { 296 bool Changed = false; 297 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) { 298 User *U = *UI++; 299 300 if (LoadInst *LI = dyn_cast<LoadInst>(U)) { 301 if (Init) { 302 // Replace the load with the initializer. 303 LI->replaceAllUsesWith(Init); 304 LI->eraseFromParent(); 305 Changed = true; 306 } 307 } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { 308 // Store must be unreachable or storing Init into the global. 309 SI->eraseFromParent(); 310 Changed = true; 311 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { 312 if (CE->getOpcode() == Instruction::GetElementPtr) { 313 Constant *SubInit = 0; 314 if (Init) 315 SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); 316 Changed |= CleanupConstantGlobalUsers(CE, SubInit); 317 } else if (CE->getOpcode() == Instruction::BitCast && 318 isa<PointerType>(CE->getType())) { 319 // Pointer cast, delete any stores and memsets to the global. 320 Changed |= CleanupConstantGlobalUsers(CE, 0); 321 } 322 323 if (CE->use_empty()) { 324 CE->destroyConstant(); 325 Changed = true; 326 } 327 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { 328 // Do not transform "gepinst (gep constexpr (GV))" here, because forming 329 // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold 330 // and will invalidate our notion of what Init is. 331 Constant *SubInit = 0; 332 if (!isa<ConstantExpr>(GEP->getOperand(0))) { 333 ConstantExpr *CE = 334 dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP)); 335 if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) 336 SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); 337 } 338 Changed |= CleanupConstantGlobalUsers(GEP, SubInit); 339 340 if (GEP->use_empty()) { 341 GEP->eraseFromParent(); 342 Changed = true; 343 } 344 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv 345 if (MI->getRawDest() == V) { 346 MI->eraseFromParent(); 347 Changed = true; 348 } 349 350 } else if (Constant *C = dyn_cast<Constant>(U)) { 351 // If we have a chain of dead constantexprs or other things dangling from 352 // us, and if they are all dead, nuke them without remorse. 353 SmallPtrSet<GlobalVariable *, 4> DeadGVs; 354 if (ConstantIsDead(C, &DeadGVs)) { 355 for (SmallPtrSet<GlobalVariable *, 4>::iterator TI = DeadGVs.begin(), 356 TE = DeadGVs.end(); TI != TE; ) { 357 GlobalVariable *TGV = *TI; ++TI; 358 if (TGV == C) 359 C = NULL; 360 TGV->eraseFromParent(); 361 } 362 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) 363 GV->eraseFromParent(); 364 else if (C) 365 C->destroyConstant(); 366 // This could have invalidated UI, start over from scratch. 367 CleanupConstantGlobalUsers(V, Init); 368 return true; 369 } 370 } 371 } 372 return Changed; 373} 374 375/// isSafeSROAElementUse - Return true if the specified instruction is a safe 376/// user of a derived expression from a global that we want to SROA. 377static bool isSafeSROAElementUse(Value *V) { 378 // We might have a dead and dangling constant hanging off of here. 379 if (Constant *C = dyn_cast<Constant>(V)) 380 return ConstantIsDead(C); 381 382 Instruction *I = dyn_cast<Instruction>(V); 383 if (!I) return false; 384 385 // Loads are ok. 386 if (isa<LoadInst>(I)) return true; 387 388 // Stores *to* the pointer are ok. 389 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 390 return SI->getOperand(0) != V; 391 392 // Otherwise, it must be a GEP. 393 GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I); 394 if (GEPI == 0) return false; 395 396 if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) || 397 !cast<Constant>(GEPI->getOperand(1))->isNullValue()) 398 return false; 399 400 for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end(); 401 I != E; ++I) 402 if (!isSafeSROAElementUse(*I)) 403 return false; 404 return true; 405} 406 407 408/// IsUserOfGlobalSafeForSRA - U is a direct user of the specified global value. 409/// Look at it and its uses and decide whether it is safe to SROA this global. 410/// 411static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { 412 // The user of the global must be a GEP Inst or a ConstantExpr GEP. 413 if (!isa<GetElementPtrInst>(U) && 414 (!isa<ConstantExpr>(U) || 415 cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr)) 416 return false; 417 418 // Check to see if this ConstantExpr GEP is SRA'able. In particular, we 419 // don't like < 3 operand CE's, and we don't like non-constant integer 420 // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some 421 // value of C. 422 if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) || 423 !cast<Constant>(U->getOperand(1))->isNullValue() || 424 !isa<ConstantInt>(U->getOperand(2))) 425 return false; 426 427 gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U); 428 ++GEPI; // Skip over the pointer index. 429 430 // If this is a use of an array allocation, do a bit more checking for sanity. 431 if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) { 432 uint64_t NumElements = AT->getNumElements(); 433 ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2)); 434 435 // Check to make sure that index falls within the array. If not, 436 // something funny is going on, so we won't do the optimization. 437 // 438 if (Idx->getZExtValue() >= NumElements) 439 return false; 440 441 // We cannot scalar repl this level of the array unless any array 442 // sub-indices are in-range constants. In particular, consider: 443 // A[0][i]. We cannot know that the user isn't doing invalid things like 444 // allowing i to index an out-of-range subscript that accesses A[1]. 445 // 446 // Scalar replacing *just* the outer index of the array is probably not 447 // going to be a win anyway, so just give up. 448 for (++GEPI; // Skip array index. 449 GEPI != E && (isa<ArrayType>(*GEPI) || isa<VectorType>(*GEPI)); 450 ++GEPI) { 451 uint64_t NumElements; 452 if (const ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI)) 453 NumElements = SubArrayTy->getNumElements(); 454 else 455 NumElements = cast<VectorType>(*GEPI)->getNumElements(); 456 457 ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand()); 458 if (!IdxVal || IdxVal->getZExtValue() >= NumElements) 459 return false; 460 } 461 } 462 463 for (Value::use_iterator I = U->use_begin(), E = U->use_end(); I != E; ++I) 464 if (!isSafeSROAElementUse(*I)) 465 return false; 466 return true; 467} 468 469/// GlobalUsersSafeToSRA - Look at all uses of the global and decide whether it 470/// is safe for us to perform this transformation. 471/// 472static bool GlobalUsersSafeToSRA(GlobalValue *GV) { 473 for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); 474 UI != E; ++UI) { 475 if (!IsUserOfGlobalSafeForSRA(*UI, GV)) 476 return false; 477 } 478 return true; 479} 480 481 482/// SRAGlobal - Perform scalar replacement of aggregates on the specified global 483/// variable. This opens the door for other optimizations by exposing the 484/// behavior of the program in a more fine-grained way. We have determined that 485/// this transformation is safe already. We return the first global variable we 486/// insert so that the caller can reprocess it. 487static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { 488 // Make sure this global only has simple uses that we can SRA. 489 if (!GlobalUsersSafeToSRA(GV)) 490 return 0; 491 492 assert(GV->hasLocalLinkage() && !GV->isConstant()); 493 Constant *Init = GV->getInitializer(); 494 const Type *Ty = Init->getType(); 495 496 std::vector<GlobalVariable*> NewGlobals; 497 Module::GlobalListType &Globals = GV->getParent()->getGlobalList(); 498 499 // Get the alignment of the global, either explicit or target-specific. 500 unsigned StartAlignment = GV->getAlignment(); 501 if (StartAlignment == 0) 502 StartAlignment = TD.getABITypeAlignment(GV->getType()); 503 504 if (const StructType *STy = dyn_cast<StructType>(Ty)) { 505 NewGlobals.reserve(STy->getNumElements()); 506 const StructLayout &Layout = *TD.getStructLayout(STy); 507 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 508 Constant *In = getAggregateConstantElement(Init, 509 ConstantInt::get(Type::Int32Ty, i)); 510 assert(In && "Couldn't get element of initializer?"); 511 GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false, 512 GlobalVariable::InternalLinkage, 513 In, GV->getName()+"."+utostr(i), 514 (Module *)NULL, 515 GV->isThreadLocal(), 516 GV->getType()->getAddressSpace()); 517 Globals.insert(GV, NGV); 518 NewGlobals.push_back(NGV); 519 520 // Calculate the known alignment of the field. If the original aggregate 521 // had 256 byte alignment for example, something might depend on that: 522 // propagate info to each field. 523 uint64_t FieldOffset = Layout.getElementOffset(i); 524 unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset); 525 if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i))) 526 NGV->setAlignment(NewAlign); 527 } 528 } else if (const SequentialType *STy = dyn_cast<SequentialType>(Ty)) { 529 unsigned NumElements = 0; 530 if (const ArrayType *ATy = dyn_cast<ArrayType>(STy)) 531 NumElements = ATy->getNumElements(); 532 else 533 NumElements = cast<VectorType>(STy)->getNumElements(); 534 535 if (NumElements > 16 && GV->hasNUsesOrMore(16)) 536 return 0; // It's not worth it. 537 NewGlobals.reserve(NumElements); 538 539 uint64_t EltSize = TD.getTypePaddedSize(STy->getElementType()); 540 unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType()); 541 for (unsigned i = 0, e = NumElements; i != e; ++i) { 542 Constant *In = getAggregateConstantElement(Init, 543 ConstantInt::get(Type::Int32Ty, i)); 544 assert(In && "Couldn't get element of initializer?"); 545 546 GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false, 547 GlobalVariable::InternalLinkage, 548 In, GV->getName()+"."+utostr(i), 549 (Module *)NULL, 550 GV->isThreadLocal(), 551 GV->getType()->getAddressSpace()); 552 Globals.insert(GV, NGV); 553 NewGlobals.push_back(NGV); 554 555 // Calculate the known alignment of the field. If the original aggregate 556 // had 256 byte alignment for example, something might depend on that: 557 // propagate info to each field. 558 unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i); 559 if (NewAlign > EltAlign) 560 NGV->setAlignment(NewAlign); 561 } 562 } 563 564 if (NewGlobals.empty()) 565 return 0; 566 567 DOUT << "PERFORMING GLOBAL SRA ON: " << *GV; 568 569 Constant *NullInt = Constant::getNullValue(Type::Int32Ty); 570 571 // Loop over all of the uses of the global, replacing the constantexpr geps, 572 // with smaller constantexpr geps or direct references. 573 while (!GV->use_empty()) { 574 User *GEP = GV->use_back(); 575 assert(((isa<ConstantExpr>(GEP) && 576 cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)|| 577 isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!"); 578 579 // Ignore the 1th operand, which has to be zero or else the program is quite 580 // broken (undefined). Get the 2nd operand, which is the structure or array 581 // index. 582 unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue(); 583 if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access. 584 585 Value *NewPtr = NewGlobals[Val]; 586 587 // Form a shorter GEP if needed. 588 if (GEP->getNumOperands() > 3) { 589 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) { 590 SmallVector<Constant*, 8> Idxs; 591 Idxs.push_back(NullInt); 592 for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i) 593 Idxs.push_back(CE->getOperand(i)); 594 NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), 595 &Idxs[0], Idxs.size()); 596 } else { 597 GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP); 598 SmallVector<Value*, 8> Idxs; 599 Idxs.push_back(NullInt); 600 for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i) 601 Idxs.push_back(GEPI->getOperand(i)); 602 NewPtr = GetElementPtrInst::Create(NewPtr, Idxs.begin(), Idxs.end(), 603 GEPI->getName()+"."+utostr(Val), GEPI); 604 } 605 } 606 GEP->replaceAllUsesWith(NewPtr); 607 608 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP)) 609 GEPI->eraseFromParent(); 610 else 611 cast<ConstantExpr>(GEP)->destroyConstant(); 612 } 613 614 // Delete the old global, now that it is dead. 615 Globals.erase(GV); 616 ++NumSRA; 617 618 // Loop over the new globals array deleting any globals that are obviously 619 // dead. This can arise due to scalarization of a structure or an array that 620 // has elements that are dead. 621 unsigned FirstGlobal = 0; 622 for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i) 623 if (NewGlobals[i]->use_empty()) { 624 Globals.erase(NewGlobals[i]); 625 if (FirstGlobal == i) ++FirstGlobal; 626 } 627 628 return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : 0; 629} 630 631/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified 632/// value will trap if the value is dynamically null. PHIs keeps track of any 633/// phi nodes we've seen to avoid reprocessing them. 634static bool AllUsesOfValueWillTrapIfNull(Value *V, 635 SmallPtrSet<PHINode*, 8> &PHIs) { 636 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) 637 if (isa<LoadInst>(*UI)) { 638 // Will trap. 639 } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) { 640 if (SI->getOperand(0) == V) { 641 //cerr << "NONTRAPPING USE: " << **UI; 642 return false; // Storing the value. 643 } 644 } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) { 645 if (CI->getOperand(0) != V) { 646 //cerr << "NONTRAPPING USE: " << **UI; 647 return false; // Not calling the ptr 648 } 649 } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) { 650 if (II->getOperand(0) != V) { 651 //cerr << "NONTRAPPING USE: " << **UI; 652 return false; // Not calling the ptr 653 } 654 } else if (BitCastInst *CI = dyn_cast<BitCastInst>(*UI)) { 655 if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false; 656 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI)) { 657 if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false; 658 } else if (PHINode *PN = dyn_cast<PHINode>(*UI)) { 659 // If we've already seen this phi node, ignore it, it has already been 660 // checked. 661 if (PHIs.insert(PN)) 662 return AllUsesOfValueWillTrapIfNull(PN, PHIs); 663 } else if (isa<ICmpInst>(*UI) && 664 isa<ConstantPointerNull>(UI->getOperand(1))) { 665 // Ignore setcc X, null 666 } else { 667 //cerr << "NONTRAPPING USE: " << **UI; 668 return false; 669 } 670 return true; 671} 672 673/// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads 674/// from GV will trap if the loaded value is null. Note that this also permits 675/// comparisons of the loaded value against null, as a special case. 676static bool AllUsesOfLoadedValueWillTrapIfNull(GlobalVariable *GV) { 677 for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI!=E; ++UI) 678 if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { 679 SmallPtrSet<PHINode*, 8> PHIs; 680 if (!AllUsesOfValueWillTrapIfNull(LI, PHIs)) 681 return false; 682 } else if (isa<StoreInst>(*UI)) { 683 // Ignore stores to the global. 684 } else { 685 // We don't know or understand this user, bail out. 686 //cerr << "UNKNOWN USER OF GLOBAL!: " << **UI; 687 return false; 688 } 689 690 return true; 691} 692 693static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { 694 bool Changed = false; 695 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) { 696 Instruction *I = cast<Instruction>(*UI++); 697 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 698 LI->setOperand(0, NewV); 699 Changed = true; 700 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 701 if (SI->getOperand(1) == V) { 702 SI->setOperand(1, NewV); 703 Changed = true; 704 } 705 } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) { 706 if (I->getOperand(0) == V) { 707 // Calling through the pointer! Turn into a direct call, but be careful 708 // that the pointer is not also being passed as an argument. 709 I->setOperand(0, NewV); 710 Changed = true; 711 bool PassedAsArg = false; 712 for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i) 713 if (I->getOperand(i) == V) { 714 PassedAsArg = true; 715 I->setOperand(i, NewV); 716 } 717 718 if (PassedAsArg) { 719 // Being passed as an argument also. Be careful to not invalidate UI! 720 UI = V->use_begin(); 721 } 722 } 723 } else if (CastInst *CI = dyn_cast<CastInst>(I)) { 724 Changed |= OptimizeAwayTrappingUsesOfValue(CI, 725 ConstantExpr::getCast(CI->getOpcode(), 726 NewV, CI->getType())); 727 if (CI->use_empty()) { 728 Changed = true; 729 CI->eraseFromParent(); 730 } 731 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { 732 // Should handle GEP here. 733 SmallVector<Constant*, 8> Idxs; 734 Idxs.reserve(GEPI->getNumOperands()-1); 735 for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end(); 736 i != e; ++i) 737 if (Constant *C = dyn_cast<Constant>(*i)) 738 Idxs.push_back(C); 739 else 740 break; 741 if (Idxs.size() == GEPI->getNumOperands()-1) 742 Changed |= OptimizeAwayTrappingUsesOfValue(GEPI, 743 ConstantExpr::getGetElementPtr(NewV, &Idxs[0], 744 Idxs.size())); 745 if (GEPI->use_empty()) { 746 Changed = true; 747 GEPI->eraseFromParent(); 748 } 749 } 750 } 751 752 return Changed; 753} 754 755 756/// OptimizeAwayTrappingUsesOfLoads - The specified global has only one non-null 757/// value stored into it. If there are uses of the loaded value that would trap 758/// if the loaded value is dynamically null, then we know that they cannot be 759/// reachable with a null optimize away the load. 760static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { 761 bool Changed = false; 762 763 // Keep track of whether we are able to remove all the uses of the global 764 // other than the store that defines it. 765 bool AllNonStoreUsesGone = true; 766 767 // Replace all uses of loads with uses of uses of the stored value. 768 for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){ 769 User *GlobalUser = *GUI++; 770 if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) { 771 Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV); 772 // If we were able to delete all uses of the loads 773 if (LI->use_empty()) { 774 LI->eraseFromParent(); 775 Changed = true; 776 } else { 777 AllNonStoreUsesGone = false; 778 } 779 } else if (isa<StoreInst>(GlobalUser)) { 780 // Ignore the store that stores "LV" to the global. 781 assert(GlobalUser->getOperand(1) == GV && 782 "Must be storing *to* the global"); 783 } else { 784 AllNonStoreUsesGone = false; 785 786 // If we get here we could have other crazy uses that are transitively 787 // loaded. 788 assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) || 789 isa<ConstantExpr>(GlobalUser)) && "Only expect load and stores!"); 790 } 791 } 792 793 if (Changed) { 794 DOUT << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV; 795 ++NumGlobUses; 796 } 797 798 // If we nuked all of the loads, then none of the stores are needed either, 799 // nor is the global. 800 if (AllNonStoreUsesGone) { 801 DOUT << " *** GLOBAL NOW DEAD!\n"; 802 CleanupConstantGlobalUsers(GV, 0); 803 if (GV->use_empty()) { 804 GV->eraseFromParent(); 805 ++NumDeleted; 806 } 807 Changed = true; 808 } 809 return Changed; 810} 811 812/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the 813/// instructions that are foldable. 814static void ConstantPropUsersOf(Value *V) { 815 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) 816 if (Instruction *I = dyn_cast<Instruction>(*UI++)) 817 if (Constant *NewC = ConstantFoldInstruction(I)) { 818 I->replaceAllUsesWith(NewC); 819 820 // Advance UI to the next non-I use to avoid invalidating it! 821 // Instructions could multiply use V. 822 while (UI != E && *UI == I) 823 ++UI; 824 I->eraseFromParent(); 825 } 826} 827 828/// OptimizeGlobalAddressOfMalloc - This function takes the specified global 829/// variable, and transforms the program as if it always contained the result of 830/// the specified malloc. Because it is always the result of the specified 831/// malloc, there is no reason to actually DO the malloc. Instead, turn the 832/// malloc into a global, and any loads of GV as uses of the new global. 833static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, 834 MallocInst *MI) { 835 DOUT << "PROMOTING MALLOC GLOBAL: " << *GV << " MALLOC = " << *MI; 836 ConstantInt *NElements = cast<ConstantInt>(MI->getArraySize()); 837 838 if (NElements->getZExtValue() != 1) { 839 // If we have an array allocation, transform it to a single element 840 // allocation to make the code below simpler. 841 Type *NewTy = ArrayType::get(MI->getAllocatedType(), 842 NElements->getZExtValue()); 843 MallocInst *NewMI = 844 new MallocInst(NewTy, Constant::getNullValue(Type::Int32Ty), 845 MI->getAlignment(), MI->getName(), MI); 846 Value* Indices[2]; 847 Indices[0] = Indices[1] = Constant::getNullValue(Type::Int32Ty); 848 Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2, 849 NewMI->getName()+".el0", MI); 850 MI->replaceAllUsesWith(NewGEP); 851 MI->eraseFromParent(); 852 MI = NewMI; 853 } 854 855 // Create the new global variable. The contents of the malloc'd memory is 856 // undefined, so initialize with an undef value. 857 Constant *Init = UndefValue::get(MI->getAllocatedType()); 858 GlobalVariable *NewGV = new GlobalVariable(MI->getAllocatedType(), false, 859 GlobalValue::InternalLinkage, Init, 860 GV->getName()+".body", 861 (Module *)NULL, 862 GV->isThreadLocal()); 863 // FIXME: This new global should have the alignment returned by malloc. Code 864 // could depend on malloc returning large alignment (on the mac, 16 bytes) but 865 // this would only guarantee some lower alignment. 866 GV->getParent()->getGlobalList().insert(GV, NewGV); 867 868 // Anything that used the malloc now uses the global directly. 869 MI->replaceAllUsesWith(NewGV); 870 871 Constant *RepValue = NewGV; 872 if (NewGV->getType() != GV->getType()->getElementType()) 873 RepValue = ConstantExpr::getBitCast(RepValue, 874 GV->getType()->getElementType()); 875 876 // If there is a comparison against null, we will insert a global bool to 877 // keep track of whether the global was initialized yet or not. 878 GlobalVariable *InitBool = 879 new GlobalVariable(Type::Int1Ty, false, GlobalValue::InternalLinkage, 880 ConstantInt::getFalse(), GV->getName()+".init", 881 (Module *)NULL, GV->isThreadLocal()); 882 bool InitBoolUsed = false; 883 884 // Loop over all uses of GV, processing them in turn. 885 std::vector<StoreInst*> Stores; 886 while (!GV->use_empty()) 887 if (LoadInst *LI = dyn_cast<LoadInst>(GV->use_back())) { 888 while (!LI->use_empty()) { 889 Use &LoadUse = LI->use_begin().getUse(); 890 if (!isa<ICmpInst>(LoadUse.getUser())) 891 LoadUse = RepValue; 892 else { 893 ICmpInst *CI = cast<ICmpInst>(LoadUse.getUser()); 894 // Replace the cmp X, 0 with a use of the bool value. 895 Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", CI); 896 InitBoolUsed = true; 897 switch (CI->getPredicate()) { 898 default: assert(0 && "Unknown ICmp Predicate!"); 899 case ICmpInst::ICMP_ULT: 900 case ICmpInst::ICMP_SLT: 901 LV = ConstantInt::getFalse(); // X < null -> always false 902 break; 903 case ICmpInst::ICMP_ULE: 904 case ICmpInst::ICMP_SLE: 905 case ICmpInst::ICMP_EQ: 906 LV = BinaryOperator::CreateNot(LV, "notinit", CI); 907 break; 908 case ICmpInst::ICMP_NE: 909 case ICmpInst::ICMP_UGE: 910 case ICmpInst::ICMP_SGE: 911 case ICmpInst::ICMP_UGT: 912 case ICmpInst::ICMP_SGT: 913 break; // no change. 914 } 915 CI->replaceAllUsesWith(LV); 916 CI->eraseFromParent(); 917 } 918 } 919 LI->eraseFromParent(); 920 } else { 921 StoreInst *SI = cast<StoreInst>(GV->use_back()); 922 // The global is initialized when the store to it occurs. 923 new StoreInst(ConstantInt::getTrue(), InitBool, SI); 924 SI->eraseFromParent(); 925 } 926 927 // If the initialization boolean was used, insert it, otherwise delete it. 928 if (!InitBoolUsed) { 929 while (!InitBool->use_empty()) // Delete initializations 930 cast<Instruction>(InitBool->use_back())->eraseFromParent(); 931 delete InitBool; 932 } else 933 GV->getParent()->getGlobalList().insert(GV, InitBool); 934 935 936 // Now the GV is dead, nuke it and the malloc. 937 GV->eraseFromParent(); 938 MI->eraseFromParent(); 939 940 // To further other optimizations, loop over all users of NewGV and try to 941 // constant prop them. This will promote GEP instructions with constant 942 // indices into GEP constant-exprs, which will allow global-opt to hack on it. 943 ConstantPropUsersOf(NewGV); 944 if (RepValue != NewGV) 945 ConstantPropUsersOf(RepValue); 946 947 return NewGV; 948} 949 950/// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking 951/// to make sure that there are no complex uses of V. We permit simple things 952/// like dereferencing the pointer, but not storing through the address, unless 953/// it is to the specified global. 954static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Instruction *V, 955 GlobalVariable *GV, 956 SmallPtrSet<PHINode*, 8> &PHIs) { 957 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ 958 Instruction *Inst = dyn_cast<Instruction>(*UI); 959 if (Inst == 0) return false; 960 961 if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) { 962 continue; // Fine, ignore. 963 } 964 965 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 966 if (SI->getOperand(0) == V && SI->getOperand(1) != GV) 967 return false; // Storing the pointer itself... bad. 968 continue; // Otherwise, storing through it, or storing into GV... fine. 969 } 970 971 if (isa<GetElementPtrInst>(Inst)) { 972 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs)) 973 return false; 974 continue; 975 } 976 977 if (PHINode *PN = dyn_cast<PHINode>(Inst)) { 978 // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI 979 // cycles. 980 if (PHIs.insert(PN)) 981 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs)) 982 return false; 983 continue; 984 } 985 986 if (BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) { 987 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) 988 return false; 989 continue; 990 } 991 992 return false; 993 } 994 return true; 995} 996 997/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV 998/// somewhere. Transform all uses of the allocation into loads from the 999/// global and uses of the resultant pointer. Further, delete the store into 1000/// GV. This assumes that these value pass the 1001/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate. 1002static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, 1003 GlobalVariable *GV) { 1004 while (!Alloc->use_empty()) { 1005 Instruction *U = cast<Instruction>(*Alloc->use_begin()); 1006 Instruction *InsertPt = U; 1007 if (StoreInst *SI = dyn_cast<StoreInst>(U)) { 1008 // If this is the store of the allocation into the global, remove it. 1009 if (SI->getOperand(1) == GV) { 1010 SI->eraseFromParent(); 1011 continue; 1012 } 1013 } else if (PHINode *PN = dyn_cast<PHINode>(U)) { 1014 // Insert the load in the corresponding predecessor, not right before the 1015 // PHI. 1016 InsertPt = PN->getIncomingBlock(Alloc->use_begin())->getTerminator(); 1017 } else if (isa<BitCastInst>(U)) { 1018 // Must be bitcast between the malloc and store to initialize the global. 1019 ReplaceUsesOfMallocWithGlobal(U, GV); 1020 U->eraseFromParent(); 1021 continue; 1022 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { 1023 // If this is a "GEP bitcast" and the user is a store to the global, then 1024 // just process it as a bitcast. 1025 if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse()) 1026 if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->use_back())) 1027 if (SI->getOperand(1) == GV) { 1028 // Must be bitcast GEP between the malloc and store to initialize 1029 // the global. 1030 ReplaceUsesOfMallocWithGlobal(GEPI, GV); 1031 GEPI->eraseFromParent(); 1032 continue; 1033 } 1034 } 1035 1036 // Insert a load from the global, and use it instead of the malloc. 1037 Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt); 1038 U->replaceUsesOfWith(Alloc, NL); 1039 } 1040} 1041 1042/// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi 1043/// of a load) are simple enough to perform heap SRA on. This permits GEP's 1044/// that index through the array and struct field, icmps of null, and PHIs. 1045static bool LoadUsesSimpleEnoughForHeapSRA(Value *V, 1046 SmallPtrSet<PHINode*, 32> &LoadUsingPHIs) { 1047 // We permit two users of the load: setcc comparing against the null 1048 // pointer, and a getelementptr of a specific form. 1049 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ 1050 Instruction *User = cast<Instruction>(*UI); 1051 1052 // Comparison against null is ok. 1053 if (ICmpInst *ICI = dyn_cast<ICmpInst>(User)) { 1054 if (!isa<ConstantPointerNull>(ICI->getOperand(1))) 1055 return false; 1056 continue; 1057 } 1058 1059 // getelementptr is also ok, but only a simple form. 1060 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { 1061 // Must index into the array and into the struct. 1062 if (GEPI->getNumOperands() < 3) 1063 return false; 1064 1065 // Otherwise the GEP is ok. 1066 continue; 1067 } 1068 1069 if (PHINode *PN = dyn_cast<PHINode>(User)) { 1070 // If we have already recursively analyzed this PHI, then it is safe. 1071 if (LoadUsingPHIs.insert(PN)) 1072 continue; 1073 1074 // Make sure all uses of the PHI are simple enough to transform. 1075 if (!LoadUsesSimpleEnoughForHeapSRA(PN, LoadUsingPHIs)) 1076 return false; 1077 1078 continue; 1079 } 1080 1081 // Otherwise we don't know what this is, not ok. 1082 return false; 1083 } 1084 1085 return true; 1086} 1087 1088 1089/// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from 1090/// GV are simple enough to perform HeapSRA, return true. 1091static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV, 1092 MallocInst *MI) { 1093 SmallPtrSet<PHINode*, 32> LoadUsingPHIs; 1094 for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; 1095 ++UI) 1096 if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) 1097 if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs)) 1098 return false; 1099 1100 // If we reach here, we know that all uses of the loads and transitive uses 1101 // (through PHI nodes) are simple enough to transform. However, we don't know 1102 // that all inputs the to the PHI nodes are in the same equivalence sets. 1103 // Check to verify that all operands of the PHIs are either PHIS that can be 1104 // transformed, loads from GV, or MI itself. 1105 for (SmallPtrSet<PHINode*, 32>::iterator I = LoadUsingPHIs.begin(), 1106 E = LoadUsingPHIs.end(); I != E; ++I) { 1107 PHINode *PN = *I; 1108 for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) { 1109 Value *InVal = PN->getIncomingValue(op); 1110 1111 // PHI of the stored value itself is ok. 1112 if (InVal == MI) continue; 1113 1114 if (PHINode *InPN = dyn_cast<PHINode>(InVal)) { 1115 // One of the PHIs in our set is (optimistically) ok. 1116 if (LoadUsingPHIs.count(InPN)) 1117 continue; 1118 return false; 1119 } 1120 1121 // Load from GV is ok. 1122 if (LoadInst *LI = dyn_cast<LoadInst>(InVal)) 1123 if (LI->getOperand(0) == GV) 1124 continue; 1125 1126 // UNDEF? NULL? 1127 1128 // Anything else is rejected. 1129 return false; 1130 } 1131 } 1132 1133 return true; 1134} 1135 1136static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, 1137 DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, 1138 std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { 1139 std::vector<Value*> &FieldVals = InsertedScalarizedValues[V]; 1140 1141 if (FieldNo >= FieldVals.size()) 1142 FieldVals.resize(FieldNo+1); 1143 1144 // If we already have this value, just reuse the previously scalarized 1145 // version. 1146 if (Value *FieldVal = FieldVals[FieldNo]) 1147 return FieldVal; 1148 1149 // Depending on what instruction this is, we have several cases. 1150 Value *Result; 1151 if (LoadInst *LI = dyn_cast<LoadInst>(V)) { 1152 // This is a scalarized version of the load from the global. Just create 1153 // a new Load of the scalarized global. 1154 Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo, 1155 InsertedScalarizedValues, 1156 PHIsToRewrite), 1157 LI->getName()+".f" + utostr(FieldNo), LI); 1158 } else if (PHINode *PN = dyn_cast<PHINode>(V)) { 1159 // PN's type is pointer to struct. Make a new PHI of pointer to struct 1160 // field. 1161 const StructType *ST = 1162 cast<StructType>(cast<PointerType>(PN->getType())->getElementType()); 1163 1164 Result =PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)), 1165 PN->getName()+".f"+utostr(FieldNo), PN); 1166 PHIsToRewrite.push_back(std::make_pair(PN, FieldNo)); 1167 } else { 1168 assert(0 && "Unknown usable value"); 1169 Result = 0; 1170 } 1171 1172 return FieldVals[FieldNo] = Result; 1173} 1174 1175/// RewriteHeapSROALoadUser - Given a load instruction and a value derived from 1176/// the load, rewrite the derived value to use the HeapSRoA'd load. 1177static void RewriteHeapSROALoadUser(Instruction *LoadUser, 1178 DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, 1179 std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { 1180 // If this is a comparison against null, handle it. 1181 if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) { 1182 assert(isa<ConstantPointerNull>(SCI->getOperand(1))); 1183 // If we have a setcc of the loaded pointer, we can use a setcc of any 1184 // field. 1185 Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0, 1186 InsertedScalarizedValues, PHIsToRewrite); 1187 1188 Value *New = new ICmpInst(SCI->getPredicate(), NPtr, 1189 Constant::getNullValue(NPtr->getType()), 1190 SCI->getName(), SCI); 1191 SCI->replaceAllUsesWith(New); 1192 SCI->eraseFromParent(); 1193 return; 1194 } 1195 1196 // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...' 1197 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) { 1198 assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2)) 1199 && "Unexpected GEPI!"); 1200 1201 // Load the pointer for this field. 1202 unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue(); 1203 Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo, 1204 InsertedScalarizedValues, PHIsToRewrite); 1205 1206 // Create the new GEP idx vector. 1207 SmallVector<Value*, 8> GEPIdx; 1208 GEPIdx.push_back(GEPI->getOperand(1)); 1209 GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end()); 1210 1211 Value *NGEPI = GetElementPtrInst::Create(NewPtr, 1212 GEPIdx.begin(), GEPIdx.end(), 1213 GEPI->getName(), GEPI); 1214 GEPI->replaceAllUsesWith(NGEPI); 1215 GEPI->eraseFromParent(); 1216 return; 1217 } 1218 1219 // Recursively transform the users of PHI nodes. This will lazily create the 1220 // PHIs that are needed for individual elements. Keep track of what PHIs we 1221 // see in InsertedScalarizedValues so that we don't get infinite loops (very 1222 // antisocial). If the PHI is already in InsertedScalarizedValues, it has 1223 // already been seen first by another load, so its uses have already been 1224 // processed. 1225 PHINode *PN = cast<PHINode>(LoadUser); 1226 bool Inserted; 1227 DenseMap<Value*, std::vector<Value*> >::iterator InsertPos; 1228 tie(InsertPos, Inserted) = 1229 InsertedScalarizedValues.insert(std::make_pair(PN, std::vector<Value*>())); 1230 if (!Inserted) return; 1231 1232 // If this is the first time we've seen this PHI, recursively process all 1233 // users. 1234 for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) { 1235 Instruction *User = cast<Instruction>(*UI++); 1236 RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); 1237 } 1238} 1239 1240/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global. Ptr 1241/// is a value loaded from the global. Eliminate all uses of Ptr, making them 1242/// use FieldGlobals instead. All uses of loaded values satisfy 1243/// AllGlobalLoadUsesSimpleEnoughForHeapSRA. 1244static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, 1245 DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, 1246 std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { 1247 for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end(); 1248 UI != E; ) { 1249 Instruction *User = cast<Instruction>(*UI++); 1250 RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); 1251 } 1252 1253 if (Load->use_empty()) { 1254 Load->eraseFromParent(); 1255 InsertedScalarizedValues.erase(Load); 1256 } 1257} 1258 1259/// PerformHeapAllocSRoA - MI is an allocation of an array of structures. Break 1260/// it up into multiple allocations of arrays of the fields. 1261static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){ 1262 DOUT << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *MI; 1263 const StructType *STy = cast<StructType>(MI->getAllocatedType()); 1264 1265 // There is guaranteed to be at least one use of the malloc (storing 1266 // it into GV). If there are other uses, change them to be uses of 1267 // the global to simplify later code. This also deletes the store 1268 // into GV. 1269 ReplaceUsesOfMallocWithGlobal(MI, GV); 1270 1271 // Okay, at this point, there are no users of the malloc. Insert N 1272 // new mallocs at the same place as MI, and N globals. 1273 std::vector<Value*> FieldGlobals; 1274 std::vector<MallocInst*> FieldMallocs; 1275 1276 for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ 1277 const Type *FieldTy = STy->getElementType(FieldNo); 1278 const Type *PFieldTy = PointerType::getUnqual(FieldTy); 1279 1280 GlobalVariable *NGV = 1281 new GlobalVariable(PFieldTy, false, GlobalValue::InternalLinkage, 1282 Constant::getNullValue(PFieldTy), 1283 GV->getName() + ".f" + utostr(FieldNo), GV, 1284 GV->isThreadLocal()); 1285 FieldGlobals.push_back(NGV); 1286 1287 MallocInst *NMI = new MallocInst(FieldTy, MI->getArraySize(), 1288 MI->getName() + ".f" + utostr(FieldNo),MI); 1289 FieldMallocs.push_back(NMI); 1290 new StoreInst(NMI, NGV, MI); 1291 } 1292 1293 // The tricky aspect of this transformation is handling the case when malloc 1294 // fails. In the original code, malloc failing would set the result pointer 1295 // of malloc to null. In this case, some mallocs could succeed and others 1296 // could fail. As such, we emit code that looks like this: 1297 // F0 = malloc(field0) 1298 // F1 = malloc(field1) 1299 // F2 = malloc(field2) 1300 // if (F0 == 0 || F1 == 0 || F2 == 0) { 1301 // if (F0) { free(F0); F0 = 0; } 1302 // if (F1) { free(F1); F1 = 0; } 1303 // if (F2) { free(F2); F2 = 0; } 1304 // } 1305 Value *RunningOr = 0; 1306 for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { 1307 Value *Cond = new ICmpInst(ICmpInst::ICMP_EQ, FieldMallocs[i], 1308 Constant::getNullValue(FieldMallocs[i]->getType()), 1309 "isnull", MI); 1310 if (!RunningOr) 1311 RunningOr = Cond; // First seteq 1312 else 1313 RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", MI); 1314 } 1315 1316 // Split the basic block at the old malloc. 1317 BasicBlock *OrigBB = MI->getParent(); 1318 BasicBlock *ContBB = OrigBB->splitBasicBlock(MI, "malloc_cont"); 1319 1320 // Create the block to check the first condition. Put all these blocks at the 1321 // end of the function as they are unlikely to be executed. 1322 BasicBlock *NullPtrBlock = BasicBlock::Create("malloc_ret_null", 1323 OrigBB->getParent()); 1324 1325 // Remove the uncond branch from OrigBB to ContBB, turning it into a cond 1326 // branch on RunningOr. 1327 OrigBB->getTerminator()->eraseFromParent(); 1328 BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB); 1329 1330 // Within the NullPtrBlock, we need to emit a comparison and branch for each 1331 // pointer, because some may be null while others are not. 1332 for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { 1333 Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); 1334 Value *Cmp = new ICmpInst(ICmpInst::ICMP_NE, GVVal, 1335 Constant::getNullValue(GVVal->getType()), 1336 "tmp", NullPtrBlock); 1337 BasicBlock *FreeBlock = BasicBlock::Create("free_it", OrigBB->getParent()); 1338 BasicBlock *NextBlock = BasicBlock::Create("next", OrigBB->getParent()); 1339 BranchInst::Create(FreeBlock, NextBlock, Cmp, NullPtrBlock); 1340 1341 // Fill in FreeBlock. 1342 new FreeInst(GVVal, FreeBlock); 1343 new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i], 1344 FreeBlock); 1345 BranchInst::Create(NextBlock, FreeBlock); 1346 1347 NullPtrBlock = NextBlock; 1348 } 1349 1350 BranchInst::Create(ContBB, NullPtrBlock); 1351 1352 // MI is no longer needed, remove it. 1353 MI->eraseFromParent(); 1354 1355 /// InsertedScalarizedLoads - As we process loads, if we can't immediately 1356 /// update all uses of the load, keep track of what scalarized loads are 1357 /// inserted for a given load. 1358 DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues; 1359 InsertedScalarizedValues[GV] = FieldGlobals; 1360 1361 std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite; 1362 1363 // Okay, the malloc site is completely handled. All of the uses of GV are now 1364 // loads, and all uses of those loads are simple. Rewrite them to use loads 1365 // of the per-field globals instead. 1366 for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) { 1367 Instruction *User = cast<Instruction>(*UI++); 1368 1369 if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 1370 RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite); 1371 continue; 1372 } 1373 1374 // Must be a store of null. 1375 StoreInst *SI = cast<StoreInst>(User); 1376 assert(isa<ConstantPointerNull>(SI->getOperand(0)) && 1377 "Unexpected heap-sra user!"); 1378 1379 // Insert a store of null into each global. 1380 for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { 1381 const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType()); 1382 Constant *Null = Constant::getNullValue(PT->getElementType()); 1383 new StoreInst(Null, FieldGlobals[i], SI); 1384 } 1385 // Erase the original store. 1386 SI->eraseFromParent(); 1387 } 1388 1389 // While we have PHIs that are interesting to rewrite, do it. 1390 while (!PHIsToRewrite.empty()) { 1391 PHINode *PN = PHIsToRewrite.back().first; 1392 unsigned FieldNo = PHIsToRewrite.back().second; 1393 PHIsToRewrite.pop_back(); 1394 PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]); 1395 assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi"); 1396 1397 // Add all the incoming values. This can materialize more phis. 1398 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 1399 Value *InVal = PN->getIncomingValue(i); 1400 InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues, 1401 PHIsToRewrite); 1402 FieldPN->addIncoming(InVal, PN->getIncomingBlock(i)); 1403 } 1404 } 1405 1406 // Drop all inter-phi links and any loads that made it this far. 1407 for (DenseMap<Value*, std::vector<Value*> >::iterator 1408 I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); 1409 I != E; ++I) { 1410 if (PHINode *PN = dyn_cast<PHINode>(I->first)) 1411 PN->dropAllReferences(); 1412 else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) 1413 LI->dropAllReferences(); 1414 } 1415 1416 // Delete all the phis and loads now that inter-references are dead. 1417 for (DenseMap<Value*, std::vector<Value*> >::iterator 1418 I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); 1419 I != E; ++I) { 1420 if (PHINode *PN = dyn_cast<PHINode>(I->first)) 1421 PN->eraseFromParent(); 1422 else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) 1423 LI->eraseFromParent(); 1424 } 1425 1426 // The old global is now dead, remove it. 1427 GV->eraseFromParent(); 1428 1429 ++NumHeapSRA; 1430 return cast<GlobalVariable>(FieldGlobals[0]); 1431} 1432 1433/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a 1434/// pointer global variable with a single value stored it that is a malloc or 1435/// cast of malloc. 1436static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, 1437 MallocInst *MI, 1438 Module::global_iterator &GVI, 1439 TargetData &TD) { 1440 // If this is a malloc of an abstract type, don't touch it. 1441 if (!MI->getAllocatedType()->isSized()) 1442 return false; 1443 1444 // We can't optimize this global unless all uses of it are *known* to be 1445 // of the malloc value, not of the null initializer value (consider a use 1446 // that compares the global's value against zero to see if the malloc has 1447 // been reached). To do this, we check to see if all uses of the global 1448 // would trap if the global were null: this proves that they must all 1449 // happen after the malloc. 1450 if (!AllUsesOfLoadedValueWillTrapIfNull(GV)) 1451 return false; 1452 1453 // We can't optimize this if the malloc itself is used in a complex way, 1454 // for example, being stored into multiple globals. This allows the 1455 // malloc to be stored into the specified global, loaded setcc'd, and 1456 // GEP'd. These are all things we could transform to using the global 1457 // for. 1458 { 1459 SmallPtrSet<PHINode*, 8> PHIs; 1460 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(MI, GV, PHIs)) 1461 return false; 1462 } 1463 1464 1465 // If we have a global that is only initialized with a fixed size malloc, 1466 // transform the program to use global memory instead of malloc'd memory. 1467 // This eliminates dynamic allocation, avoids an indirection accessing the 1468 // data, and exposes the resultant global to further GlobalOpt. 1469 if (ConstantInt *NElements = dyn_cast<ConstantInt>(MI->getArraySize())) { 1470 // Restrict this transformation to only working on small allocations 1471 // (2048 bytes currently), as we don't want to introduce a 16M global or 1472 // something. 1473 if (NElements->getZExtValue()* 1474 TD.getTypePaddedSize(MI->getAllocatedType()) < 2048) { 1475 GVI = OptimizeGlobalAddressOfMalloc(GV, MI); 1476 return true; 1477 } 1478 } 1479 1480 // If the allocation is an array of structures, consider transforming this 1481 // into multiple malloc'd arrays, one for each field. This is basically 1482 // SRoA for malloc'd memory. 1483 const Type *AllocTy = MI->getAllocatedType(); 1484 1485 // If this is an allocation of a fixed size array of structs, analyze as a 1486 // variable size array. malloc [100 x struct],1 -> malloc struct, 100 1487 if (!MI->isArrayAllocation()) 1488 if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) 1489 AllocTy = AT->getElementType(); 1490 1491 if (const StructType *AllocSTy = dyn_cast<StructType>(AllocTy)) { 1492 // This the structure has an unreasonable number of fields, leave it 1493 // alone. 1494 if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && 1495 AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, MI)) { 1496 1497 // If this is a fixed size array, transform the Malloc to be an alloc of 1498 // structs. malloc [100 x struct],1 -> malloc struct, 100 1499 if (const ArrayType *AT = dyn_cast<ArrayType>(MI->getAllocatedType())) { 1500 MallocInst *NewMI = 1501 new MallocInst(AllocSTy, 1502 ConstantInt::get(Type::Int32Ty, AT->getNumElements()), 1503 "", MI); 1504 NewMI->takeName(MI); 1505 Value *Cast = new BitCastInst(NewMI, MI->getType(), "tmp", MI); 1506 MI->replaceAllUsesWith(Cast); 1507 MI->eraseFromParent(); 1508 MI = NewMI; 1509 } 1510 1511 GVI = PerformHeapAllocSRoA(GV, MI); 1512 return true; 1513 } 1514 } 1515 1516 return false; 1517} 1518 1519// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge 1520// that only one value (besides its initializer) is ever stored to the global. 1521static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, 1522 Module::global_iterator &GVI, 1523 TargetData &TD) { 1524 // Ignore no-op GEPs and bitcasts. 1525 StoredOnceVal = StoredOnceVal->stripPointerCasts(); 1526 1527 // If we are dealing with a pointer global that is initialized to null and 1528 // only has one (non-null) value stored into it, then we can optimize any 1529 // users of the loaded value (often calls and loads) that would trap if the 1530 // value was null. 1531 if (isa<PointerType>(GV->getInitializer()->getType()) && 1532 GV->getInitializer()->isNullValue()) { 1533 if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) { 1534 if (GV->getInitializer()->getType() != SOVC->getType()) 1535 SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); 1536 1537 // Optimize away any trapping uses of the loaded value. 1538 if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC)) 1539 return true; 1540 } else if (MallocInst *MI = dyn_cast<MallocInst>(StoredOnceVal)) { 1541 if (TryToOptimizeStoreOfMallocToGlobal(GV, MI, GVI, TD)) 1542 return true; 1543 } 1544 } 1545 1546 return false; 1547} 1548 1549/// TryToShrinkGlobalToBoolean - At this point, we have learned that the only 1550/// two values ever stored into GV are its initializer and OtherVal. See if we 1551/// can shrink the global into a boolean and select between the two values 1552/// whenever it is used. This exposes the values to other scalar optimizations. 1553static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { 1554 const Type *GVElType = GV->getType()->getElementType(); 1555 1556 // If GVElType is already i1, it is already shrunk. If the type of the GV is 1557 // an FP value or vector, don't do this optimization because a select between 1558 // them is very expensive and unlikely to lead to later simplification. 1559 if (GVElType == Type::Int1Ty || GVElType->isFloatingPoint() || 1560 isa<VectorType>(GVElType)) 1561 return false; 1562 1563 // Walk the use list of the global seeing if all the uses are load or store. 1564 // If there is anything else, bail out. 1565 for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I) 1566 if (!isa<LoadInst>(I) && !isa<StoreInst>(I) && !UserIsDebugInfo(*I)) 1567 return false; 1568 1569 DOUT << " *** SHRINKING TO BOOL: " << *GV; 1570 1571 // Create the new global, initializing it to false. 1572 GlobalVariable *NewGV = new GlobalVariable(Type::Int1Ty, false, 1573 GlobalValue::InternalLinkage, ConstantInt::getFalse(), 1574 GV->getName()+".b", 1575 (Module *)NULL, 1576 GV->isThreadLocal()); 1577 GV->getParent()->getGlobalList().insert(GV, NewGV); 1578 1579 Constant *InitVal = GV->getInitializer(); 1580 assert(InitVal->getType() != Type::Int1Ty && "No reason to shrink to bool!"); 1581 1582 // If initialized to zero and storing one into the global, we can use a cast 1583 // instead of a select to synthesize the desired value. 1584 bool IsOneZero = false; 1585 if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) 1586 IsOneZero = InitVal->isNullValue() && CI->isOne(); 1587 1588 while (!GV->use_empty()) { 1589 User *GVU = GV->use_back(); 1590 if (StoreInst *SI = dyn_cast<StoreInst>(GVU)) { 1591 // Change the store into a boolean store. 1592 bool StoringOther = SI->getOperand(0) == OtherVal; 1593 // Only do this if we weren't storing a loaded value. 1594 Value *StoreVal; 1595 if (StoringOther || SI->getOperand(0) == InitVal) 1596 StoreVal = ConstantInt::get(Type::Int1Ty, StoringOther); 1597 else { 1598 // Otherwise, we are storing a previously loaded copy. To do this, 1599 // change the copy from copying the original value to just copying the 1600 // bool. 1601 Instruction *StoredVal = cast<Instruction>(SI->getOperand(0)); 1602 1603 // If we're already replaced the input, StoredVal will be a cast or 1604 // select instruction. If not, it will be a load of the original 1605 // global. 1606 if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) { 1607 assert(LI->getOperand(0) == GV && "Not a copy!"); 1608 // Insert a new load, to preserve the saved value. 1609 StoreVal = new LoadInst(NewGV, LI->getName()+".b", LI); 1610 } else { 1611 assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) && 1612 "This is not a form that we understand!"); 1613 StoreVal = StoredVal->getOperand(0); 1614 assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!"); 1615 } 1616 } 1617 new StoreInst(StoreVal, NewGV, SI); 1618 SI->eraseFromParent(); 1619 } else if (LoadInst *LI = dyn_cast<LoadInst>(GVU)) { 1620 // Change the load into a load of bool then a select. 1621 LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", LI); 1622 Value *NSI; 1623 if (IsOneZero) 1624 NSI = new ZExtInst(NLI, LI->getType(), "", LI); 1625 else 1626 NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI); 1627 NSI->takeName(LI); 1628 LI->replaceAllUsesWith(NSI); 1629 LI->eraseFromParent(); 1630 } else 1631 RemoveDbgInfoUser(GVU); 1632 } 1633 1634 GV->eraseFromParent(); 1635 return true; 1636} 1637 1638 1639/// ProcessInternalGlobal - Analyze the specified global variable and optimize 1640/// it if possible. If we make a change, return true. 1641bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, 1642 Module::global_iterator &GVI) { 1643 SmallPtrSet<PHINode*, 16> PHIUsers; 1644 GlobalStatus GS; 1645 GV->removeDeadConstantUsers(); 1646 1647 if (GV->use_empty()) { 1648 DOUT << "GLOBAL DEAD: " << *GV; 1649 GV->eraseFromParent(); 1650 ++NumDeleted; 1651 return true; 1652 } 1653 1654 if (!AnalyzeGlobal(GV, GS, PHIUsers)) { 1655#if 0 1656 cerr << "Global: " << *GV; 1657 cerr << " isLoaded = " << GS.isLoaded << "\n"; 1658 cerr << " StoredType = "; 1659 switch (GS.StoredType) { 1660 case GlobalStatus::NotStored: cerr << "NEVER STORED\n"; break; 1661 case GlobalStatus::isInitializerStored: cerr << "INIT STORED\n"; break; 1662 case GlobalStatus::isStoredOnce: cerr << "STORED ONCE\n"; break; 1663 case GlobalStatus::isStored: cerr << "stored\n"; break; 1664 } 1665 if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue) 1666 cerr << " StoredOnceValue = " << *GS.StoredOnceValue << "\n"; 1667 if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions) 1668 cerr << " AccessingFunction = " << GS.AccessingFunction->getName() 1669 << "\n"; 1670 cerr << " HasMultipleAccessingFunctions = " 1671 << GS.HasMultipleAccessingFunctions << "\n"; 1672 cerr << " HasNonInstructionUser = " << GS.HasNonInstructionUser<<"\n"; 1673 cerr << "\n"; 1674#endif 1675 1676 // If this is a first class global and has only one accessing function 1677 // and this function is main (which we know is not recursive we can make 1678 // this global a local variable) we replace the global with a local alloca 1679 // in this function. 1680 // 1681 // NOTE: It doesn't make sense to promote non single-value types since we 1682 // are just replacing static memory to stack memory. 1683 if (!GS.HasMultipleAccessingFunctions && 1684 GS.AccessingFunction && !GS.HasNonInstructionUser && 1685 GV->getType()->getElementType()->isSingleValueType() && 1686 GS.AccessingFunction->getName() == "main" && 1687 GS.AccessingFunction->hasExternalLinkage()) { 1688 DOUT << "LOCALIZING GLOBAL: " << *GV; 1689 Instruction* FirstI = GS.AccessingFunction->getEntryBlock().begin(); 1690 const Type* ElemTy = GV->getType()->getElementType(); 1691 // FIXME: Pass Global's alignment when globals have alignment 1692 AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), FirstI); 1693 if (!isa<UndefValue>(GV->getInitializer())) 1694 new StoreInst(GV->getInitializer(), Alloca, FirstI); 1695 1696 GV->replaceAllUsesWith(Alloca); 1697 GV->eraseFromParent(); 1698 ++NumLocalized; 1699 return true; 1700 } 1701 1702 // If the global is never loaded (but may be stored to), it is dead. 1703 // Delete it now. 1704 if (!GS.isLoaded) { 1705 DOUT << "GLOBAL NEVER LOADED: " << *GV; 1706 1707 // Delete any stores we can find to the global. We may not be able to 1708 // make it completely dead though. 1709 bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer()); 1710 1711 // If the global is dead now, delete it. 1712 if (GV->use_empty()) { 1713 GV->eraseFromParent(); 1714 ++NumDeleted; 1715 Changed = true; 1716 } 1717 return Changed; 1718 1719 } else if (GS.StoredType <= GlobalStatus::isInitializerStored) { 1720 DOUT << "MARKING CONSTANT: " << *GV; 1721 GV->setConstant(true); 1722 1723 // Clean up any obviously simplifiable users now. 1724 CleanupConstantGlobalUsers(GV, GV->getInitializer()); 1725 1726 // If the global is dead now, just nuke it. 1727 if (GV->use_empty()) { 1728 DOUT << " *** Marking constant allowed us to simplify " 1729 << "all users and delete global!\n"; 1730 GV->eraseFromParent(); 1731 ++NumDeleted; 1732 } 1733 1734 ++NumMarked; 1735 return true; 1736 } else if (!GV->getInitializer()->getType()->isSingleValueType()) { 1737 if (GlobalVariable *FirstNewGV = SRAGlobal(GV, 1738 getAnalysis<TargetData>())) { 1739 GVI = FirstNewGV; // Don't skip the newly produced globals! 1740 return true; 1741 } 1742 } else if (GS.StoredType == GlobalStatus::isStoredOnce) { 1743 // If the initial value for the global was an undef value, and if only 1744 // one other value was stored into it, we can just change the 1745 // initializer to be the stored value, then delete all stores to the 1746 // global. This allows us to mark it constant. 1747 if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) 1748 if (isa<UndefValue>(GV->getInitializer())) { 1749 // Change the initial value here. 1750 GV->setInitializer(SOVConstant); 1751 1752 // Clean up any obviously simplifiable users now. 1753 CleanupConstantGlobalUsers(GV, GV->getInitializer()); 1754 1755 if (GV->use_empty()) { 1756 DOUT << " *** Substituting initializer allowed us to " 1757 << "simplify all users and delete global!\n"; 1758 GV->eraseFromParent(); 1759 ++NumDeleted; 1760 } else { 1761 GVI = GV; 1762 } 1763 ++NumSubstitute; 1764 return true; 1765 } 1766 1767 // Try to optimize globals based on the knowledge that only one value 1768 // (besides its initializer) is ever stored to the global. 1769 if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI, 1770 getAnalysis<TargetData>())) 1771 return true; 1772 1773 // Otherwise, if the global was not a boolean, we can shrink it to be a 1774 // boolean. 1775 if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) 1776 if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { 1777 ++NumShrunkToBool; 1778 return true; 1779 } 1780 } 1781 } 1782 return false; 1783} 1784 1785/// OnlyCalledDirectly - Return true if the specified function is only called 1786/// directly. In other words, its address is never taken. 1787static bool OnlyCalledDirectly(Function *F) { 1788 for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){ 1789 Instruction *User = dyn_cast<Instruction>(*UI); 1790 if (!User) return false; 1791 if (!isa<CallInst>(User) && !isa<InvokeInst>(User)) return false; 1792 1793 // See if the function address is passed as an argument. 1794 for (User::op_iterator i = User->op_begin() + 1, e = User->op_end(); 1795 i != e; ++i) 1796 if (*i == F) return false; 1797 } 1798 return true; 1799} 1800 1801/// ChangeCalleesToFastCall - Walk all of the direct calls of the specified 1802/// function, changing them to FastCC. 1803static void ChangeCalleesToFastCall(Function *F) { 1804 for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){ 1805 CallSite User(cast<Instruction>(*UI)); 1806 User.setCallingConv(CallingConv::Fast); 1807 } 1808} 1809 1810static AttrListPtr StripNest(const AttrListPtr &Attrs) { 1811 for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) { 1812 if ((Attrs.getSlot(i).Attrs & Attribute::Nest) == 0) 1813 continue; 1814 1815 // There can be only one. 1816 return Attrs.removeAttr(Attrs.getSlot(i).Index, Attribute::Nest); 1817 } 1818 1819 return Attrs; 1820} 1821 1822static void RemoveNestAttribute(Function *F) { 1823 F->setAttributes(StripNest(F->getAttributes())); 1824 for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){ 1825 CallSite User(cast<Instruction>(*UI)); 1826 User.setAttributes(StripNest(User.getAttributes())); 1827 } 1828} 1829 1830bool GlobalOpt::OptimizeFunctions(Module &M) { 1831 bool Changed = false; 1832 // Optimize functions. 1833 for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) { 1834 Function *F = FI++; 1835 F->removeDeadConstantUsers(); 1836 if (F->use_empty() && (F->hasLocalLinkage() || 1837 F->hasLinkOnceLinkage())) { 1838 M.getFunctionList().erase(F); 1839 Changed = true; 1840 ++NumFnDeleted; 1841 } else if (F->hasLocalLinkage()) { 1842 if (F->getCallingConv() == CallingConv::C && !F->isVarArg() && 1843 OnlyCalledDirectly(F)) { 1844 // If this function has C calling conventions, is not a varargs 1845 // function, and is only called directly, promote it to use the Fast 1846 // calling convention. 1847 F->setCallingConv(CallingConv::Fast); 1848 ChangeCalleesToFastCall(F); 1849 ++NumFastCallFns; 1850 Changed = true; 1851 } 1852 1853 if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) && 1854 OnlyCalledDirectly(F)) { 1855 // The function is not used by a trampoline intrinsic, so it is safe 1856 // to remove the 'nest' attribute. 1857 RemoveNestAttribute(F); 1858 ++NumNestRemoved; 1859 Changed = true; 1860 } 1861 } 1862 } 1863 return Changed; 1864} 1865 1866bool GlobalOpt::OptimizeGlobalVars(Module &M) { 1867 bool Changed = false; 1868 for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); 1869 GVI != E; ) { 1870 GlobalVariable *GV = GVI++; 1871 if (!GV->isConstant() && GV->hasLocalLinkage() && 1872 GV->hasInitializer()) 1873 Changed |= ProcessInternalGlobal(GV, GVI); 1874 } 1875 return Changed; 1876} 1877 1878/// FindGlobalCtors - Find the llvm.globalctors list, verifying that all 1879/// initializers have an init priority of 65535. 1880GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { 1881 for (Module::global_iterator I = M.global_begin(), E = M.global_end(); 1882 I != E; ++I) 1883 if (I->getName() == "llvm.global_ctors") { 1884 // Found it, verify it's an array of { int, void()* }. 1885 const ArrayType *ATy =dyn_cast<ArrayType>(I->getType()->getElementType()); 1886 if (!ATy) return 0; 1887 const StructType *STy = dyn_cast<StructType>(ATy->getElementType()); 1888 if (!STy || STy->getNumElements() != 2 || 1889 STy->getElementType(0) != Type::Int32Ty) return 0; 1890 const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1)); 1891 if (!PFTy) return 0; 1892 const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType()); 1893 if (!FTy || FTy->getReturnType() != Type::VoidTy || FTy->isVarArg() || 1894 FTy->getNumParams() != 0) 1895 return 0; 1896 1897 // Verify that the initializer is simple enough for us to handle. 1898 if (!I->hasInitializer()) return 0; 1899 ConstantArray *CA = dyn_cast<ConstantArray>(I->getInitializer()); 1900 if (!CA) return 0; 1901 for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) 1902 if (ConstantStruct *CS = dyn_cast<ConstantStruct>(*i)) { 1903 if (isa<ConstantPointerNull>(CS->getOperand(1))) 1904 continue; 1905 1906 // Must have a function or null ptr. 1907 if (!isa<Function>(CS->getOperand(1))) 1908 return 0; 1909 1910 // Init priority must be standard. 1911 ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0)); 1912 if (!CI || CI->getZExtValue() != 65535) 1913 return 0; 1914 } else { 1915 return 0; 1916 } 1917 1918 return I; 1919 } 1920 return 0; 1921} 1922 1923/// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand, 1924/// return a list of the functions and null terminator as a vector. 1925static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) { 1926 ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); 1927 std::vector<Function*> Result; 1928 Result.reserve(CA->getNumOperands()); 1929 for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) { 1930 ConstantStruct *CS = cast<ConstantStruct>(*i); 1931 Result.push_back(dyn_cast<Function>(CS->getOperand(1))); 1932 } 1933 return Result; 1934} 1935 1936/// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the 1937/// specified array, returning the new global to use. 1938static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, 1939 const std::vector<Function*> &Ctors) { 1940 // If we made a change, reassemble the initializer list. 1941 std::vector<Constant*> CSVals; 1942 CSVals.push_back(ConstantInt::get(Type::Int32Ty, 65535)); 1943 CSVals.push_back(0); 1944 1945 // Create the new init list. 1946 std::vector<Constant*> CAList; 1947 for (unsigned i = 0, e = Ctors.size(); i != e; ++i) { 1948 if (Ctors[i]) { 1949 CSVals[1] = Ctors[i]; 1950 } else { 1951 const Type *FTy = FunctionType::get(Type::VoidTy, 1952 std::vector<const Type*>(), false); 1953 const PointerType *PFTy = PointerType::getUnqual(FTy); 1954 CSVals[1] = Constant::getNullValue(PFTy); 1955 CSVals[0] = ConstantInt::get(Type::Int32Ty, 2147483647); 1956 } 1957 CAList.push_back(ConstantStruct::get(CSVals)); 1958 } 1959 1960 // Create the array initializer. 1961 const Type *StructTy = 1962 cast<ArrayType>(GCL->getType()->getElementType())->getElementType(); 1963 Constant *CA = ConstantArray::get(ArrayType::get(StructTy, CAList.size()), 1964 CAList); 1965 1966 // If we didn't change the number of elements, don't create a new GV. 1967 if (CA->getType() == GCL->getInitializer()->getType()) { 1968 GCL->setInitializer(CA); 1969 return GCL; 1970 } 1971 1972 // Create the new global and insert it next to the existing list. 1973 GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(), 1974 GCL->getLinkage(), CA, "", 1975 (Module *)NULL, 1976 GCL->isThreadLocal()); 1977 GCL->getParent()->getGlobalList().insert(GCL, NGV); 1978 NGV->takeName(GCL); 1979 1980 // Nuke the old list, replacing any uses with the new one. 1981 if (!GCL->use_empty()) { 1982 Constant *V = NGV; 1983 if (V->getType() != GCL->getType()) 1984 V = ConstantExpr::getBitCast(V, GCL->getType()); 1985 GCL->replaceAllUsesWith(V); 1986 } 1987 GCL->eraseFromParent(); 1988 1989 if (Ctors.size()) 1990 return NGV; 1991 else 1992 return 0; 1993} 1994 1995 1996static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues, 1997 Value *V) { 1998 if (Constant *CV = dyn_cast<Constant>(V)) return CV; 1999 Constant *R = ComputedValues[V]; 2000 assert(R && "Reference to an uncomputed value!"); 2001 return R; 2002} 2003 2004/// isSimpleEnoughPointerToCommit - Return true if this constant is simple 2005/// enough for us to understand. In particular, if it is a cast of something, 2006/// we punt. We basically just support direct accesses to globals and GEP's of 2007/// globals. This should be kept up to date with CommitValueTo. 2008static bool isSimpleEnoughPointerToCommit(Constant *C) { 2009 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { 2010 if (!GV->hasExternalLinkage() && !GV->hasLocalLinkage()) 2011 return false; // do not allow weak/linkonce/dllimport/dllexport linkage. 2012 return !GV->isDeclaration(); // reject external globals. 2013 } 2014 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) 2015 // Handle a constantexpr gep. 2016 if (CE->getOpcode() == Instruction::GetElementPtr && 2017 isa<GlobalVariable>(CE->getOperand(0))) { 2018 GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); 2019 if (!GV->hasExternalLinkage() && !GV->hasLocalLinkage()) 2020 return false; // do not allow weak/linkonce/dllimport/dllexport linkage. 2021 return GV->hasInitializer() && 2022 ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); 2023 } 2024 return false; 2025} 2026 2027/// EvaluateStoreInto - Evaluate a piece of a constantexpr store into a global 2028/// initializer. This returns 'Init' modified to reflect 'Val' stored into it. 2029/// At this point, the GEP operands of Addr [0, OpNo) have been stepped into. 2030static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, 2031 ConstantExpr *Addr, unsigned OpNo) { 2032 // Base case of the recursion. 2033 if (OpNo == Addr->getNumOperands()) { 2034 assert(Val->getType() == Init->getType() && "Type mismatch!"); 2035 return Val; 2036 } 2037 2038 if (const StructType *STy = dyn_cast<StructType>(Init->getType())) { 2039 std::vector<Constant*> Elts; 2040 2041 // Break up the constant into its elements. 2042 if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) { 2043 for (User::op_iterator i = CS->op_begin(), e = CS->op_end(); i != e; ++i) 2044 Elts.push_back(cast<Constant>(*i)); 2045 } else if (isa<ConstantAggregateZero>(Init)) { 2046 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) 2047 Elts.push_back(Constant::getNullValue(STy->getElementType(i))); 2048 } else if (isa<UndefValue>(Init)) { 2049 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) 2050 Elts.push_back(UndefValue::get(STy->getElementType(i))); 2051 } else { 2052 assert(0 && "This code is out of sync with " 2053 " ConstantFoldLoadThroughGEPConstantExpr"); 2054 } 2055 2056 // Replace the element that we are supposed to. 2057 ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo)); 2058 unsigned Idx = CU->getZExtValue(); 2059 assert(Idx < STy->getNumElements() && "Struct index out of range!"); 2060 Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1); 2061 2062 // Return the modified struct. 2063 return ConstantStruct::get(&Elts[0], Elts.size(), STy->isPacked()); 2064 } else { 2065 ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo)); 2066 const ArrayType *ATy = cast<ArrayType>(Init->getType()); 2067 2068 // Break up the array into elements. 2069 std::vector<Constant*> Elts; 2070 if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) { 2071 for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) 2072 Elts.push_back(cast<Constant>(*i)); 2073 } else if (isa<ConstantAggregateZero>(Init)) { 2074 Constant *Elt = Constant::getNullValue(ATy->getElementType()); 2075 Elts.assign(ATy->getNumElements(), Elt); 2076 } else if (isa<UndefValue>(Init)) { 2077 Constant *Elt = UndefValue::get(ATy->getElementType()); 2078 Elts.assign(ATy->getNumElements(), Elt); 2079 } else { 2080 assert(0 && "This code is out of sync with " 2081 " ConstantFoldLoadThroughGEPConstantExpr"); 2082 } 2083 2084 assert(CI->getZExtValue() < ATy->getNumElements()); 2085 Elts[CI->getZExtValue()] = 2086 EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); 2087 return ConstantArray::get(ATy, Elts); 2088 } 2089} 2090 2091/// CommitValueTo - We have decided that Addr (which satisfies the predicate 2092/// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen. 2093static void CommitValueTo(Constant *Val, Constant *Addr) { 2094 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { 2095 assert(GV->hasInitializer()); 2096 GV->setInitializer(Val); 2097 return; 2098 } 2099 2100 ConstantExpr *CE = cast<ConstantExpr>(Addr); 2101 GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); 2102 2103 Constant *Init = GV->getInitializer(); 2104 Init = EvaluateStoreInto(Init, Val, CE, 2); 2105 GV->setInitializer(Init); 2106} 2107 2108/// ComputeLoadResult - Return the value that would be computed by a load from 2109/// P after the stores reflected by 'memory' have been performed. If we can't 2110/// decide, return null. 2111static Constant *ComputeLoadResult(Constant *P, 2112 const DenseMap<Constant*, Constant*> &Memory) { 2113 // If this memory location has been recently stored, use the stored value: it 2114 // is the most up-to-date. 2115 DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P); 2116 if (I != Memory.end()) return I->second; 2117 2118 // Access it. 2119 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) { 2120 if (GV->hasInitializer()) 2121 return GV->getInitializer(); 2122 return 0; 2123 } 2124 2125 // Handle a constantexpr getelementptr. 2126 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P)) 2127 if (CE->getOpcode() == Instruction::GetElementPtr && 2128 isa<GlobalVariable>(CE->getOperand(0))) { 2129 GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); 2130 if (GV->hasInitializer()) 2131 return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); 2132 } 2133 2134 return 0; // don't know how to evaluate. 2135} 2136 2137/// EvaluateFunction - Evaluate a call to function F, returning true if 2138/// successful, false if we can't evaluate it. ActualArgs contains the formal 2139/// arguments for the function. 2140static bool EvaluateFunction(Function *F, Constant *&RetVal, 2141 const std::vector<Constant*> &ActualArgs, 2142 std::vector<Function*> &CallStack, 2143 DenseMap<Constant*, Constant*> &MutatedMemory, 2144 std::vector<GlobalVariable*> &AllocaTmps) { 2145 // Check to see if this function is already executing (recursion). If so, 2146 // bail out. TODO: we might want to accept limited recursion. 2147 if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end()) 2148 return false; 2149 2150 CallStack.push_back(F); 2151 2152 /// Values - As we compute SSA register values, we store their contents here. 2153 DenseMap<Value*, Constant*> Values; 2154 2155 // Initialize arguments to the incoming values specified. 2156 unsigned ArgNo = 0; 2157 for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; 2158 ++AI, ++ArgNo) 2159 Values[AI] = ActualArgs[ArgNo]; 2160 2161 /// ExecutedBlocks - We only handle non-looping, non-recursive code. As such, 2162 /// we can only evaluate any one basic block at most once. This set keeps 2163 /// track of what we have executed so we can detect recursive cases etc. 2164 SmallPtrSet<BasicBlock*, 32> ExecutedBlocks; 2165 2166 // CurInst - The current instruction we're evaluating. 2167 BasicBlock::iterator CurInst = F->begin()->begin(); 2168 2169 // This is the main evaluation loop. 2170 while (1) { 2171 Constant *InstResult = 0; 2172 2173 if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) { 2174 if (SI->isVolatile()) return false; // no volatile accesses. 2175 Constant *Ptr = getVal(Values, SI->getOperand(1)); 2176 if (!isSimpleEnoughPointerToCommit(Ptr)) 2177 // If this is too complex for us to commit, reject it. 2178 return false; 2179 Constant *Val = getVal(Values, SI->getOperand(0)); 2180 MutatedMemory[Ptr] = Val; 2181 } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) { 2182 InstResult = ConstantExpr::get(BO->getOpcode(), 2183 getVal(Values, BO->getOperand(0)), 2184 getVal(Values, BO->getOperand(1))); 2185 } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) { 2186 InstResult = ConstantExpr::getCompare(CI->getPredicate(), 2187 getVal(Values, CI->getOperand(0)), 2188 getVal(Values, CI->getOperand(1))); 2189 } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) { 2190 InstResult = ConstantExpr::getCast(CI->getOpcode(), 2191 getVal(Values, CI->getOperand(0)), 2192 CI->getType()); 2193 } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) { 2194 InstResult = ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)), 2195 getVal(Values, SI->getOperand(1)), 2196 getVal(Values, SI->getOperand(2))); 2197 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) { 2198 Constant *P = getVal(Values, GEP->getOperand(0)); 2199 SmallVector<Constant*, 8> GEPOps; 2200 for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); 2201 i != e; ++i) 2202 GEPOps.push_back(getVal(Values, *i)); 2203 InstResult = ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size()); 2204 } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { 2205 if (LI->isVolatile()) return false; // no volatile accesses. 2206 InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)), 2207 MutatedMemory); 2208 if (InstResult == 0) return false; // Could not evaluate load. 2209 } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) { 2210 if (AI->isArrayAllocation()) return false; // Cannot handle array allocs. 2211 const Type *Ty = AI->getType()->getElementType(); 2212 AllocaTmps.push_back(new GlobalVariable(Ty, false, 2213 GlobalValue::InternalLinkage, 2214 UndefValue::get(Ty), 2215 AI->getName())); 2216 InstResult = AllocaTmps.back(); 2217 } else if (CallInst *CI = dyn_cast<CallInst>(CurInst)) { 2218 // Cannot handle inline asm. 2219 if (isa<InlineAsm>(CI->getOperand(0))) return false; 2220 2221 // Resolve function pointers. 2222 Function *Callee = dyn_cast<Function>(getVal(Values, CI->getOperand(0))); 2223 if (!Callee) return false; // Cannot resolve. 2224 2225 std::vector<Constant*> Formals; 2226 for (User::op_iterator i = CI->op_begin() + 1, e = CI->op_end(); 2227 i != e; ++i) 2228 Formals.push_back(getVal(Values, *i)); 2229 2230 if (Callee->isDeclaration()) { 2231 // If this is a function we can constant fold, do it. 2232 if (Constant *C = ConstantFoldCall(Callee, &Formals[0], 2233 Formals.size())) { 2234 InstResult = C; 2235 } else { 2236 return false; 2237 } 2238 } else { 2239 if (Callee->getFunctionType()->isVarArg()) 2240 return false; 2241 2242 Constant *RetVal; 2243 2244 // Execute the call, if successful, use the return value. 2245 if (!EvaluateFunction(Callee, RetVal, Formals, CallStack, 2246 MutatedMemory, AllocaTmps)) 2247 return false; 2248 InstResult = RetVal; 2249 } 2250 } else if (isa<TerminatorInst>(CurInst)) { 2251 BasicBlock *NewBB = 0; 2252 if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) { 2253 if (BI->isUnconditional()) { 2254 NewBB = BI->getSuccessor(0); 2255 } else { 2256 ConstantInt *Cond = 2257 dyn_cast<ConstantInt>(getVal(Values, BI->getCondition())); 2258 if (!Cond) return false; // Cannot determine. 2259 2260 NewBB = BI->getSuccessor(!Cond->getZExtValue()); 2261 } 2262 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) { 2263 ConstantInt *Val = 2264 dyn_cast<ConstantInt>(getVal(Values, SI->getCondition())); 2265 if (!Val) return false; // Cannot determine. 2266 NewBB = SI->getSuccessor(SI->findCaseValue(Val)); 2267 } else if (ReturnInst *RI = dyn_cast<ReturnInst>(CurInst)) { 2268 if (RI->getNumOperands()) 2269 RetVal = getVal(Values, RI->getOperand(0)); 2270 2271 CallStack.pop_back(); // return from fn. 2272 return true; // We succeeded at evaluating this ctor! 2273 } else { 2274 // invoke, unwind, unreachable. 2275 return false; // Cannot handle this terminator. 2276 } 2277 2278 // Okay, we succeeded in evaluating this control flow. See if we have 2279 // executed the new block before. If so, we have a looping function, 2280 // which we cannot evaluate in reasonable time. 2281 if (!ExecutedBlocks.insert(NewBB)) 2282 return false; // looped! 2283 2284 // Okay, we have never been in this block before. Check to see if there 2285 // are any PHI nodes. If so, evaluate them with information about where 2286 // we came from. 2287 BasicBlock *OldBB = CurInst->getParent(); 2288 CurInst = NewBB->begin(); 2289 PHINode *PN; 2290 for (; (PN = dyn_cast<PHINode>(CurInst)); ++CurInst) 2291 Values[PN] = getVal(Values, PN->getIncomingValueForBlock(OldBB)); 2292 2293 // Do NOT increment CurInst. We know that the terminator had no value. 2294 continue; 2295 } else { 2296 // Did not know how to evaluate this! 2297 return false; 2298 } 2299 2300 if (!CurInst->use_empty()) 2301 Values[CurInst] = InstResult; 2302 2303 // Advance program counter. 2304 ++CurInst; 2305 } 2306} 2307 2308/// EvaluateStaticConstructor - Evaluate static constructors in the function, if 2309/// we can. Return true if we can, false otherwise. 2310static bool EvaluateStaticConstructor(Function *F) { 2311 /// MutatedMemory - For each store we execute, we update this map. Loads 2312 /// check this to get the most up-to-date value. If evaluation is successful, 2313 /// this state is committed to the process. 2314 DenseMap<Constant*, Constant*> MutatedMemory; 2315 2316 /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable 2317 /// to represent its body. This vector is needed so we can delete the 2318 /// temporary globals when we are done. 2319 std::vector<GlobalVariable*> AllocaTmps; 2320 2321 /// CallStack - This is used to detect recursion. In pathological situations 2322 /// we could hit exponential behavior, but at least there is nothing 2323 /// unbounded. 2324 std::vector<Function*> CallStack; 2325 2326 // Call the function. 2327 Constant *RetValDummy; 2328 bool EvalSuccess = EvaluateFunction(F, RetValDummy, std::vector<Constant*>(), 2329 CallStack, MutatedMemory, AllocaTmps); 2330 if (EvalSuccess) { 2331 // We succeeded at evaluation: commit the result. 2332 DOUT << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" 2333 << F->getName() << "' to " << MutatedMemory.size() 2334 << " stores.\n"; 2335 for (DenseMap<Constant*, Constant*>::iterator I = MutatedMemory.begin(), 2336 E = MutatedMemory.end(); I != E; ++I) 2337 CommitValueTo(I->second, I->first); 2338 } 2339 2340 // At this point, we are done interpreting. If we created any 'alloca' 2341 // temporaries, release them now. 2342 while (!AllocaTmps.empty()) { 2343 GlobalVariable *Tmp = AllocaTmps.back(); 2344 AllocaTmps.pop_back(); 2345 2346 // If there are still users of the alloca, the program is doing something 2347 // silly, e.g. storing the address of the alloca somewhere and using it 2348 // later. Since this is undefined, we'll just make it be null. 2349 if (!Tmp->use_empty()) 2350 Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType())); 2351 delete Tmp; 2352 } 2353 2354 return EvalSuccess; 2355} 2356 2357 2358 2359/// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible. 2360/// Return true if anything changed. 2361bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { 2362 std::vector<Function*> Ctors = ParseGlobalCtors(GCL); 2363 bool MadeChange = false; 2364 if (Ctors.empty()) return false; 2365 2366 // Loop over global ctors, optimizing them when we can. 2367 for (unsigned i = 0; i != Ctors.size(); ++i) { 2368 Function *F = Ctors[i]; 2369 // Found a null terminator in the middle of the list, prune off the rest of 2370 // the list. 2371 if (F == 0) { 2372 if (i != Ctors.size()-1) { 2373 Ctors.resize(i+1); 2374 MadeChange = true; 2375 } 2376 break; 2377 } 2378 2379 // We cannot simplify external ctor functions. 2380 if (F->empty()) continue; 2381 2382 // If we can evaluate the ctor at compile time, do. 2383 if (EvaluateStaticConstructor(F)) { 2384 Ctors.erase(Ctors.begin()+i); 2385 MadeChange = true; 2386 --i; 2387 ++NumCtorsEvaluated; 2388 continue; 2389 } 2390 } 2391 2392 if (!MadeChange) return false; 2393 2394 GCL = InstallGlobalCtors(GCL, Ctors); 2395 return true; 2396} 2397 2398bool GlobalOpt::ResolveAliases(Module &M) { 2399 bool Changed = false; 2400 2401 for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); 2402 I != E;) { 2403 Module::alias_iterator J = I++; 2404 // If the aliasee may change at link time, nothing can be done - bail out. 2405 if (J->mayBeOverridden()) 2406 continue; 2407 2408 Constant *Aliasee = J->getAliasee(); 2409 GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts()); 2410 Target->removeDeadConstantUsers(); 2411 bool hasOneUse = Target->hasOneUse() && Aliasee->hasOneUse(); 2412 2413 // Make all users of the alias use the aliasee instead. 2414 if (!J->use_empty()) { 2415 J->replaceAllUsesWith(Aliasee); 2416 ++NumAliasesResolved; 2417 Changed = true; 2418 } 2419 2420 // If the aliasee has internal linkage, give it the name and linkage 2421 // of the alias, and delete the alias. This turns: 2422 // define internal ... @f(...) 2423 // @a = alias ... @f 2424 // into: 2425 // define ... @a(...) 2426 if (!Target->hasLocalLinkage()) 2427 continue; 2428 2429 // The transform is only useful if the alias does not have internal linkage. 2430 if (J->hasLocalLinkage()) 2431 continue; 2432 2433 // Do not perform the transform if multiple aliases potentially target the 2434 // aliasee. This check also ensures that it is safe to replace the section 2435 // and other attributes of the aliasee with those of the alias. 2436 if (!hasOneUse) 2437 continue; 2438 2439 // Give the aliasee the name, linkage and other attributes of the alias. 2440 Target->takeName(J); 2441 Target->setLinkage(J->getLinkage()); 2442 Target->GlobalValue::copyAttributesFrom(J); 2443 2444 // Delete the alias. 2445 M.getAliasList().erase(J); 2446 ++NumAliasesRemoved; 2447 Changed = true; 2448 } 2449 2450 return Changed; 2451} 2452 2453bool GlobalOpt::runOnModule(Module &M) { 2454 bool Changed = false; 2455 2456 // Try to find the llvm.globalctors list. 2457 GlobalVariable *GlobalCtors = FindGlobalCtors(M); 2458 2459 bool LocalChange = true; 2460 while (LocalChange) { 2461 LocalChange = false; 2462 2463 // Delete functions that are trivially dead, ccc -> fastcc 2464 LocalChange |= OptimizeFunctions(M); 2465 2466 // Optimize global_ctors list. 2467 if (GlobalCtors) 2468 LocalChange |= OptimizeGlobalCtorsList(GlobalCtors); 2469 2470 // Optimize non-address-taken globals. 2471 LocalChange |= OptimizeGlobalVars(M); 2472 2473 // Resolve aliases, when possible. 2474 LocalChange |= ResolveAliases(M); 2475 Changed |= LocalChange; 2476 } 2477 2478 // TODO: Move all global ctors functions to the end of the module for code 2479 // layout. 2480 2481 return Changed; 2482} 2483