GlobalOpt.cpp revision 492d06efde44a4e38a6ed321ada4af5a75494df6
1//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass transforms simple global variables that never have their address 11// taken. If obviously true, it marks read/write globals as constant, deletes 12// variables only stored to, etc. 13// 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "globalopt" 17#include "llvm/Transforms/IPO.h" 18#include "llvm/CallingConv.h" 19#include "llvm/Constants.h" 20#include "llvm/DerivedTypes.h" 21#include "llvm/Instructions.h" 22#include "llvm/IntrinsicInst.h" 23#include "llvm/LLVMContext.h" 24#include "llvm/Module.h" 25#include "llvm/Pass.h" 26#include "llvm/Analysis/ConstantFolding.h" 27#include "llvm/Analysis/MallocHelper.h" 28#include "llvm/Target/TargetData.h" 29#include "llvm/Support/CallSite.h" 30#include "llvm/Support/Compiler.h" 31#include "llvm/Support/Debug.h" 32#include "llvm/Support/ErrorHandling.h" 33#include "llvm/Support/GetElementPtrTypeIterator.h" 34#include "llvm/Support/MathExtras.h" 35#include "llvm/Support/raw_ostream.h" 36#include "llvm/ADT/DenseMap.h" 37#include "llvm/ADT/SmallPtrSet.h" 38#include "llvm/ADT/SmallVector.h" 39#include "llvm/ADT/Statistic.h" 40#include "llvm/ADT/STLExtras.h" 41#include <algorithm> 42using namespace llvm; 43 44STATISTIC(NumMarked , "Number of globals marked constant"); 45STATISTIC(NumSRA , "Number of aggregate globals broken into scalars"); 46STATISTIC(NumHeapSRA , "Number of heap objects SRA'd"); 47STATISTIC(NumSubstitute,"Number of globals with initializers stored into them"); 48STATISTIC(NumDeleted , "Number of globals deleted"); 49STATISTIC(NumFnDeleted , "Number of functions deleted"); 50STATISTIC(NumGlobUses , "Number of global uses devirtualized"); 51STATISTIC(NumLocalized , "Number of globals localized"); 52STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans"); 53STATISTIC(NumFastCallFns , "Number of functions converted to fastcc"); 54STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated"); 55STATISTIC(NumNestRemoved , "Number of nest attributes removed"); 56STATISTIC(NumAliasesResolved, "Number of global aliases resolved"); 57STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated"); 58 59namespace { 60 struct GlobalOpt : public ModulePass { 61 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 62 } 63 static char ID; // Pass identification, replacement for typeid 64 GlobalOpt() : ModulePass(&ID) {} 65 66 bool runOnModule(Module &M); 67 68 private: 69 GlobalVariable *FindGlobalCtors(Module &M); 70 bool OptimizeFunctions(Module &M); 71 bool OptimizeGlobalVars(Module &M); 72 bool OptimizeGlobalAliases(Module &M); 73 bool OptimizeGlobalCtorsList(GlobalVariable *&GCL); 74 bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI); 75 }; 76} 77 78char GlobalOpt::ID = 0; 79static RegisterPass<GlobalOpt> X("globalopt", "Global Variable Optimizer"); 80 81ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); } 82 83namespace { 84 85/// GlobalStatus - As we analyze each global, keep track of some information 86/// about it. If we find out that the address of the global is taken, none of 87/// this info will be accurate. 88struct GlobalStatus { 89 /// isLoaded - True if the global is ever loaded. If the global isn't ever 90 /// loaded it can be deleted. 91 bool isLoaded; 92 93 /// StoredType - Keep track of what stores to the global look like. 94 /// 95 enum StoredType { 96 /// NotStored - There is no store to this global. It can thus be marked 97 /// constant. 98 NotStored, 99 100 /// isInitializerStored - This global is stored to, but the only thing 101 /// stored is the constant it was initialized with. This is only tracked 102 /// for scalar globals. 103 isInitializerStored, 104 105 /// isStoredOnce - This global is stored to, but only its initializer and 106 /// one other value is ever stored to it. If this global isStoredOnce, we 107 /// track the value stored to it in StoredOnceValue below. This is only 108 /// tracked for scalar globals. 109 isStoredOnce, 110 111 /// isStored - This global is stored to by multiple values or something else 112 /// that we cannot track. 113 isStored 114 } StoredType; 115 116 /// StoredOnceValue - If only one value (besides the initializer constant) is 117 /// ever stored to this global, keep track of what value it is. 118 Value *StoredOnceValue; 119 120 /// AccessingFunction/HasMultipleAccessingFunctions - These start out 121 /// null/false. When the first accessing function is noticed, it is recorded. 122 /// When a second different accessing function is noticed, 123 /// HasMultipleAccessingFunctions is set to true. 124 Function *AccessingFunction; 125 bool HasMultipleAccessingFunctions; 126 127 /// HasNonInstructionUser - Set to true if this global has a user that is not 128 /// an instruction (e.g. a constant expr or GV initializer). 129 bool HasNonInstructionUser; 130 131 /// HasPHIUser - Set to true if this global has a user that is a PHI node. 132 bool HasPHIUser; 133 134 GlobalStatus() : isLoaded(false), StoredType(NotStored), StoredOnceValue(0), 135 AccessingFunction(0), HasMultipleAccessingFunctions(false), 136 HasNonInstructionUser(false), HasPHIUser(false) {} 137}; 138 139} 140 141// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used 142// by constants itself. Note that constants cannot be cyclic, so this test is 143// pretty easy to implement recursively. 144// 145static bool SafeToDestroyConstant(Constant *C) { 146 if (isa<GlobalValue>(C)) return false; 147 148 for (Value::use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; ++UI) 149 if (Constant *CU = dyn_cast<Constant>(*UI)) { 150 if (!SafeToDestroyConstant(CU)) return false; 151 } else 152 return false; 153 return true; 154} 155 156 157/// AnalyzeGlobal - Look at all uses of the global and fill in the GlobalStatus 158/// structure. If the global has its address taken, return true to indicate we 159/// can't do anything with it. 160/// 161static bool AnalyzeGlobal(Value *V, GlobalStatus &GS, 162 SmallPtrSet<PHINode*, 16> &PHIUsers) { 163 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) 164 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) { 165 GS.HasNonInstructionUser = true; 166 167 if (AnalyzeGlobal(CE, GS, PHIUsers)) return true; 168 169 } else if (Instruction *I = dyn_cast<Instruction>(*UI)) { 170 if (!GS.HasMultipleAccessingFunctions) { 171 Function *F = I->getParent()->getParent(); 172 if (GS.AccessingFunction == 0) 173 GS.AccessingFunction = F; 174 else if (GS.AccessingFunction != F) 175 GS.HasMultipleAccessingFunctions = true; 176 } 177 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 178 GS.isLoaded = true; 179 if (LI->isVolatile()) return true; // Don't hack on volatile loads. 180 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 181 // Don't allow a store OF the address, only stores TO the address. 182 if (SI->getOperand(0) == V) return true; 183 184 if (SI->isVolatile()) return true; // Don't hack on volatile stores. 185 186 // If this is a direct store to the global (i.e., the global is a scalar 187 // value, not an aggregate), keep more specific information about 188 // stores. 189 if (GS.StoredType != GlobalStatus::isStored) { 190 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(SI->getOperand(1))){ 191 Value *StoredVal = SI->getOperand(0); 192 if (StoredVal == GV->getInitializer()) { 193 if (GS.StoredType < GlobalStatus::isInitializerStored) 194 GS.StoredType = GlobalStatus::isInitializerStored; 195 } else if (isa<LoadInst>(StoredVal) && 196 cast<LoadInst>(StoredVal)->getOperand(0) == GV) { 197 // G = G 198 if (GS.StoredType < GlobalStatus::isInitializerStored) 199 GS.StoredType = GlobalStatus::isInitializerStored; 200 } else if (GS.StoredType < GlobalStatus::isStoredOnce) { 201 GS.StoredType = GlobalStatus::isStoredOnce; 202 GS.StoredOnceValue = StoredVal; 203 } else if (GS.StoredType == GlobalStatus::isStoredOnce && 204 GS.StoredOnceValue == StoredVal) { 205 // noop. 206 } else { 207 GS.StoredType = GlobalStatus::isStored; 208 } 209 } else { 210 GS.StoredType = GlobalStatus::isStored; 211 } 212 } 213 } else if (isa<GetElementPtrInst>(I)) { 214 if (AnalyzeGlobal(I, GS, PHIUsers)) return true; 215 } else if (isa<SelectInst>(I)) { 216 if (AnalyzeGlobal(I, GS, PHIUsers)) return true; 217 } else if (PHINode *PN = dyn_cast<PHINode>(I)) { 218 // PHI nodes we can check just like select or GEP instructions, but we 219 // have to be careful about infinite recursion. 220 if (PHIUsers.insert(PN)) // Not already visited. 221 if (AnalyzeGlobal(I, GS, PHIUsers)) return true; 222 GS.HasPHIUser = true; 223 } else if (isa<CmpInst>(I)) { 224 } else if (isa<MemTransferInst>(I)) { 225 if (I->getOperand(1) == V) 226 GS.StoredType = GlobalStatus::isStored; 227 if (I->getOperand(2) == V) 228 GS.isLoaded = true; 229 } else if (isa<MemSetInst>(I)) { 230 assert(I->getOperand(1) == V && "Memset only takes one pointer!"); 231 GS.StoredType = GlobalStatus::isStored; 232 } else { 233 return true; // Any other non-load instruction might take address! 234 } 235 } else if (Constant *C = dyn_cast<Constant>(*UI)) { 236 GS.HasNonInstructionUser = true; 237 // We might have a dead and dangling constant hanging off of here. 238 if (!SafeToDestroyConstant(C)) 239 return true; 240 } else { 241 GS.HasNonInstructionUser = true; 242 // Otherwise must be some other user. 243 return true; 244 } 245 246 return false; 247} 248 249static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx, 250 LLVMContext &Context) { 251 ConstantInt *CI = dyn_cast<ConstantInt>(Idx); 252 if (!CI) return 0; 253 unsigned IdxV = CI->getZExtValue(); 254 255 if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Agg)) { 256 if (IdxV < CS->getNumOperands()) return CS->getOperand(IdxV); 257 } else if (ConstantArray *CA = dyn_cast<ConstantArray>(Agg)) { 258 if (IdxV < CA->getNumOperands()) return CA->getOperand(IdxV); 259 } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Agg)) { 260 if (IdxV < CP->getNumOperands()) return CP->getOperand(IdxV); 261 } else if (isa<ConstantAggregateZero>(Agg)) { 262 if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) { 263 if (IdxV < STy->getNumElements()) 264 return Constant::getNullValue(STy->getElementType(IdxV)); 265 } else if (const SequentialType *STy = 266 dyn_cast<SequentialType>(Agg->getType())) { 267 return Constant::getNullValue(STy->getElementType()); 268 } 269 } else if (isa<UndefValue>(Agg)) { 270 if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) { 271 if (IdxV < STy->getNumElements()) 272 return UndefValue::get(STy->getElementType(IdxV)); 273 } else if (const SequentialType *STy = 274 dyn_cast<SequentialType>(Agg->getType())) { 275 return UndefValue::get(STy->getElementType()); 276 } 277 } 278 return 0; 279} 280 281 282/// CleanupConstantGlobalUsers - We just marked GV constant. Loop over all 283/// users of the global, cleaning up the obvious ones. This is largely just a 284/// quick scan over the use list to clean up the easy and obvious cruft. This 285/// returns true if it made a change. 286static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, 287 LLVMContext &Context) { 288 bool Changed = false; 289 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) { 290 User *U = *UI++; 291 292 if (LoadInst *LI = dyn_cast<LoadInst>(U)) { 293 if (Init) { 294 // Replace the load with the initializer. 295 LI->replaceAllUsesWith(Init); 296 LI->eraseFromParent(); 297 Changed = true; 298 } 299 } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { 300 // Store must be unreachable or storing Init into the global. 301 SI->eraseFromParent(); 302 Changed = true; 303 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { 304 if (CE->getOpcode() == Instruction::GetElementPtr) { 305 Constant *SubInit = 0; 306 if (Init) 307 SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); 308 Changed |= CleanupConstantGlobalUsers(CE, SubInit, Context); 309 } else if (CE->getOpcode() == Instruction::BitCast && 310 isa<PointerType>(CE->getType())) { 311 // Pointer cast, delete any stores and memsets to the global. 312 Changed |= CleanupConstantGlobalUsers(CE, 0, Context); 313 } 314 315 if (CE->use_empty()) { 316 CE->destroyConstant(); 317 Changed = true; 318 } 319 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { 320 // Do not transform "gepinst (gep constexpr (GV))" here, because forming 321 // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold 322 // and will invalidate our notion of what Init is. 323 Constant *SubInit = 0; 324 if (!isa<ConstantExpr>(GEP->getOperand(0))) { 325 ConstantExpr *CE = 326 dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, Context)); 327 if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) 328 SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); 329 } 330 Changed |= CleanupConstantGlobalUsers(GEP, SubInit, Context); 331 332 if (GEP->use_empty()) { 333 GEP->eraseFromParent(); 334 Changed = true; 335 } 336 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv 337 if (MI->getRawDest() == V) { 338 MI->eraseFromParent(); 339 Changed = true; 340 } 341 342 } else if (Constant *C = dyn_cast<Constant>(U)) { 343 // If we have a chain of dead constantexprs or other things dangling from 344 // us, and if they are all dead, nuke them without remorse. 345 if (SafeToDestroyConstant(C)) { 346 C->destroyConstant(); 347 // This could have invalidated UI, start over from scratch. 348 CleanupConstantGlobalUsers(V, Init, Context); 349 return true; 350 } 351 } 352 } 353 return Changed; 354} 355 356/// isSafeSROAElementUse - Return true if the specified instruction is a safe 357/// user of a derived expression from a global that we want to SROA. 358static bool isSafeSROAElementUse(Value *V) { 359 // We might have a dead and dangling constant hanging off of here. 360 if (Constant *C = dyn_cast<Constant>(V)) 361 return SafeToDestroyConstant(C); 362 363 Instruction *I = dyn_cast<Instruction>(V); 364 if (!I) return false; 365 366 // Loads are ok. 367 if (isa<LoadInst>(I)) return true; 368 369 // Stores *to* the pointer are ok. 370 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 371 return SI->getOperand(0) != V; 372 373 // Otherwise, it must be a GEP. 374 GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I); 375 if (GEPI == 0) return false; 376 377 if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) || 378 !cast<Constant>(GEPI->getOperand(1))->isNullValue()) 379 return false; 380 381 for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end(); 382 I != E; ++I) 383 if (!isSafeSROAElementUse(*I)) 384 return false; 385 return true; 386} 387 388 389/// IsUserOfGlobalSafeForSRA - U is a direct user of the specified global value. 390/// Look at it and its uses and decide whether it is safe to SROA this global. 391/// 392static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { 393 // The user of the global must be a GEP Inst or a ConstantExpr GEP. 394 if (!isa<GetElementPtrInst>(U) && 395 (!isa<ConstantExpr>(U) || 396 cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr)) 397 return false; 398 399 // Check to see if this ConstantExpr GEP is SRA'able. In particular, we 400 // don't like < 3 operand CE's, and we don't like non-constant integer 401 // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some 402 // value of C. 403 if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) || 404 !cast<Constant>(U->getOperand(1))->isNullValue() || 405 !isa<ConstantInt>(U->getOperand(2))) 406 return false; 407 408 gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U); 409 ++GEPI; // Skip over the pointer index. 410 411 // If this is a use of an array allocation, do a bit more checking for sanity. 412 if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) { 413 uint64_t NumElements = AT->getNumElements(); 414 ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2)); 415 416 // Check to make sure that index falls within the array. If not, 417 // something funny is going on, so we won't do the optimization. 418 // 419 if (Idx->getZExtValue() >= NumElements) 420 return false; 421 422 // We cannot scalar repl this level of the array unless any array 423 // sub-indices are in-range constants. In particular, consider: 424 // A[0][i]. We cannot know that the user isn't doing invalid things like 425 // allowing i to index an out-of-range subscript that accesses A[1]. 426 // 427 // Scalar replacing *just* the outer index of the array is probably not 428 // going to be a win anyway, so just give up. 429 for (++GEPI; // Skip array index. 430 GEPI != E; 431 ++GEPI) { 432 uint64_t NumElements; 433 if (const ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI)) 434 NumElements = SubArrayTy->getNumElements(); 435 else if (const VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI)) 436 NumElements = SubVectorTy->getNumElements(); 437 else { 438 assert(isa<StructType>(*GEPI) && 439 "Indexed GEP type is not array, vector, or struct!"); 440 continue; 441 } 442 443 ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand()); 444 if (!IdxVal || IdxVal->getZExtValue() >= NumElements) 445 return false; 446 } 447 } 448 449 for (Value::use_iterator I = U->use_begin(), E = U->use_end(); I != E; ++I) 450 if (!isSafeSROAElementUse(*I)) 451 return false; 452 return true; 453} 454 455/// GlobalUsersSafeToSRA - Look at all uses of the global and decide whether it 456/// is safe for us to perform this transformation. 457/// 458static bool GlobalUsersSafeToSRA(GlobalValue *GV) { 459 for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); 460 UI != E; ++UI) { 461 if (!IsUserOfGlobalSafeForSRA(*UI, GV)) 462 return false; 463 } 464 return true; 465} 466 467 468/// SRAGlobal - Perform scalar replacement of aggregates on the specified global 469/// variable. This opens the door for other optimizations by exposing the 470/// behavior of the program in a more fine-grained way. We have determined that 471/// this transformation is safe already. We return the first global variable we 472/// insert so that the caller can reprocess it. 473static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD, 474 LLVMContext &Context) { 475 // Make sure this global only has simple uses that we can SRA. 476 if (!GlobalUsersSafeToSRA(GV)) 477 return 0; 478 479 assert(GV->hasLocalLinkage() && !GV->isConstant()); 480 Constant *Init = GV->getInitializer(); 481 const Type *Ty = Init->getType(); 482 483 std::vector<GlobalVariable*> NewGlobals; 484 Module::GlobalListType &Globals = GV->getParent()->getGlobalList(); 485 486 // Get the alignment of the global, either explicit or target-specific. 487 unsigned StartAlignment = GV->getAlignment(); 488 if (StartAlignment == 0) 489 StartAlignment = TD.getABITypeAlignment(GV->getType()); 490 491 if (const StructType *STy = dyn_cast<StructType>(Ty)) { 492 NewGlobals.reserve(STy->getNumElements()); 493 const StructLayout &Layout = *TD.getStructLayout(STy); 494 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 495 Constant *In = getAggregateConstantElement(Init, 496 ConstantInt::get(Type::getInt32Ty(Context), i), 497 Context); 498 assert(In && "Couldn't get element of initializer?"); 499 GlobalVariable *NGV = new GlobalVariable(Context, 500 STy->getElementType(i), false, 501 GlobalVariable::InternalLinkage, 502 In, GV->getName()+"."+Twine(i), 503 GV->isThreadLocal(), 504 GV->getType()->getAddressSpace()); 505 Globals.insert(GV, NGV); 506 NewGlobals.push_back(NGV); 507 508 // Calculate the known alignment of the field. If the original aggregate 509 // had 256 byte alignment for example, something might depend on that: 510 // propagate info to each field. 511 uint64_t FieldOffset = Layout.getElementOffset(i); 512 unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset); 513 if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i))) 514 NGV->setAlignment(NewAlign); 515 } 516 } else if (const SequentialType *STy = dyn_cast<SequentialType>(Ty)) { 517 unsigned NumElements = 0; 518 if (const ArrayType *ATy = dyn_cast<ArrayType>(STy)) 519 NumElements = ATy->getNumElements(); 520 else 521 NumElements = cast<VectorType>(STy)->getNumElements(); 522 523 if (NumElements > 16 && GV->hasNUsesOrMore(16)) 524 return 0; // It's not worth it. 525 NewGlobals.reserve(NumElements); 526 527 uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType()); 528 unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType()); 529 for (unsigned i = 0, e = NumElements; i != e; ++i) { 530 Constant *In = getAggregateConstantElement(Init, 531 ConstantInt::get(Type::getInt32Ty(Context), i), 532 Context); 533 assert(In && "Couldn't get element of initializer?"); 534 535 GlobalVariable *NGV = new GlobalVariable(Context, 536 STy->getElementType(), false, 537 GlobalVariable::InternalLinkage, 538 In, GV->getName()+"."+Twine(i), 539 GV->isThreadLocal(), 540 GV->getType()->getAddressSpace()); 541 Globals.insert(GV, NGV); 542 NewGlobals.push_back(NGV); 543 544 // Calculate the known alignment of the field. If the original aggregate 545 // had 256 byte alignment for example, something might depend on that: 546 // propagate info to each field. 547 unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i); 548 if (NewAlign > EltAlign) 549 NGV->setAlignment(NewAlign); 550 } 551 } 552 553 if (NewGlobals.empty()) 554 return 0; 555 556 DEBUG(errs() << "PERFORMING GLOBAL SRA ON: " << *GV); 557 558 Constant *NullInt = Constant::getNullValue(Type::getInt32Ty(Context)); 559 560 // Loop over all of the uses of the global, replacing the constantexpr geps, 561 // with smaller constantexpr geps or direct references. 562 while (!GV->use_empty()) { 563 User *GEP = GV->use_back(); 564 assert(((isa<ConstantExpr>(GEP) && 565 cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)|| 566 isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!"); 567 568 // Ignore the 1th operand, which has to be zero or else the program is quite 569 // broken (undefined). Get the 2nd operand, which is the structure or array 570 // index. 571 unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue(); 572 if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access. 573 574 Value *NewPtr = NewGlobals[Val]; 575 576 // Form a shorter GEP if needed. 577 if (GEP->getNumOperands() > 3) { 578 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) { 579 SmallVector<Constant*, 8> Idxs; 580 Idxs.push_back(NullInt); 581 for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i) 582 Idxs.push_back(CE->getOperand(i)); 583 NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), 584 &Idxs[0], Idxs.size()); 585 } else { 586 GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP); 587 SmallVector<Value*, 8> Idxs; 588 Idxs.push_back(NullInt); 589 for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i) 590 Idxs.push_back(GEPI->getOperand(i)); 591 NewPtr = GetElementPtrInst::Create(NewPtr, Idxs.begin(), Idxs.end(), 592 GEPI->getName()+"."+Twine(Val),GEPI); 593 } 594 } 595 GEP->replaceAllUsesWith(NewPtr); 596 597 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP)) 598 GEPI->eraseFromParent(); 599 else 600 cast<ConstantExpr>(GEP)->destroyConstant(); 601 } 602 603 // Delete the old global, now that it is dead. 604 Globals.erase(GV); 605 ++NumSRA; 606 607 // Loop over the new globals array deleting any globals that are obviously 608 // dead. This can arise due to scalarization of a structure or an array that 609 // has elements that are dead. 610 unsigned FirstGlobal = 0; 611 for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i) 612 if (NewGlobals[i]->use_empty()) { 613 Globals.erase(NewGlobals[i]); 614 if (FirstGlobal == i) ++FirstGlobal; 615 } 616 617 return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : 0; 618} 619 620/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified 621/// value will trap if the value is dynamically null. PHIs keeps track of any 622/// phi nodes we've seen to avoid reprocessing them. 623static bool AllUsesOfValueWillTrapIfNull(Value *V, 624 SmallPtrSet<PHINode*, 8> &PHIs) { 625 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) 626 if (isa<LoadInst>(*UI)) { 627 // Will trap. 628 } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) { 629 if (SI->getOperand(0) == V) { 630 //cerr << "NONTRAPPING USE: " << **UI; 631 return false; // Storing the value. 632 } 633 } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) { 634 if (CI->getOperand(0) != V) { 635 //cerr << "NONTRAPPING USE: " << **UI; 636 return false; // Not calling the ptr 637 } 638 } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) { 639 if (II->getOperand(0) != V) { 640 //cerr << "NONTRAPPING USE: " << **UI; 641 return false; // Not calling the ptr 642 } 643 } else if (BitCastInst *CI = dyn_cast<BitCastInst>(*UI)) { 644 if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false; 645 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI)) { 646 if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false; 647 } else if (PHINode *PN = dyn_cast<PHINode>(*UI)) { 648 // If we've already seen this phi node, ignore it, it has already been 649 // checked. 650 if (PHIs.insert(PN)) 651 return AllUsesOfValueWillTrapIfNull(PN, PHIs); 652 } else if (isa<ICmpInst>(*UI) && 653 isa<ConstantPointerNull>(UI->getOperand(1))) { 654 // Ignore setcc X, null 655 } else { 656 //cerr << "NONTRAPPING USE: " << **UI; 657 return false; 658 } 659 return true; 660} 661 662/// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads 663/// from GV will trap if the loaded value is null. Note that this also permits 664/// comparisons of the loaded value against null, as a special case. 665static bool AllUsesOfLoadedValueWillTrapIfNull(GlobalVariable *GV) { 666 for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI!=E; ++UI) 667 if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { 668 SmallPtrSet<PHINode*, 8> PHIs; 669 if (!AllUsesOfValueWillTrapIfNull(LI, PHIs)) 670 return false; 671 } else if (isa<StoreInst>(*UI)) { 672 // Ignore stores to the global. 673 } else { 674 // We don't know or understand this user, bail out. 675 //cerr << "UNKNOWN USER OF GLOBAL!: " << **UI; 676 return false; 677 } 678 679 return true; 680} 681 682static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV, 683 LLVMContext &Context) { 684 bool Changed = false; 685 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) { 686 Instruction *I = cast<Instruction>(*UI++); 687 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 688 LI->setOperand(0, NewV); 689 Changed = true; 690 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 691 if (SI->getOperand(1) == V) { 692 SI->setOperand(1, NewV); 693 Changed = true; 694 } 695 } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) { 696 if (I->getOperand(0) == V) { 697 // Calling through the pointer! Turn into a direct call, but be careful 698 // that the pointer is not also being passed as an argument. 699 I->setOperand(0, NewV); 700 Changed = true; 701 bool PassedAsArg = false; 702 for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i) 703 if (I->getOperand(i) == V) { 704 PassedAsArg = true; 705 I->setOperand(i, NewV); 706 } 707 708 if (PassedAsArg) { 709 // Being passed as an argument also. Be careful to not invalidate UI! 710 UI = V->use_begin(); 711 } 712 } 713 } else if (CastInst *CI = dyn_cast<CastInst>(I)) { 714 Changed |= OptimizeAwayTrappingUsesOfValue(CI, 715 ConstantExpr::getCast(CI->getOpcode(), 716 NewV, CI->getType()), Context); 717 if (CI->use_empty()) { 718 Changed = true; 719 CI->eraseFromParent(); 720 } 721 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { 722 // Should handle GEP here. 723 SmallVector<Constant*, 8> Idxs; 724 Idxs.reserve(GEPI->getNumOperands()-1); 725 for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end(); 726 i != e; ++i) 727 if (Constant *C = dyn_cast<Constant>(*i)) 728 Idxs.push_back(C); 729 else 730 break; 731 if (Idxs.size() == GEPI->getNumOperands()-1) 732 Changed |= OptimizeAwayTrappingUsesOfValue(GEPI, 733 ConstantExpr::getGetElementPtr(NewV, &Idxs[0], 734 Idxs.size()), Context); 735 if (GEPI->use_empty()) { 736 Changed = true; 737 GEPI->eraseFromParent(); 738 } 739 } 740 } 741 742 return Changed; 743} 744 745 746/// OptimizeAwayTrappingUsesOfLoads - The specified global has only one non-null 747/// value stored into it. If there are uses of the loaded value that would trap 748/// if the loaded value is dynamically null, then we know that they cannot be 749/// reachable with a null optimize away the load. 750static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, 751 LLVMContext &Context) { 752 bool Changed = false; 753 754 // Keep track of whether we are able to remove all the uses of the global 755 // other than the store that defines it. 756 bool AllNonStoreUsesGone = true; 757 758 // Replace all uses of loads with uses of uses of the stored value. 759 for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){ 760 User *GlobalUser = *GUI++; 761 if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) { 762 Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV, Context); 763 // If we were able to delete all uses of the loads 764 if (LI->use_empty()) { 765 LI->eraseFromParent(); 766 Changed = true; 767 } else { 768 AllNonStoreUsesGone = false; 769 } 770 } else if (isa<StoreInst>(GlobalUser)) { 771 // Ignore the store that stores "LV" to the global. 772 assert(GlobalUser->getOperand(1) == GV && 773 "Must be storing *to* the global"); 774 } else { 775 AllNonStoreUsesGone = false; 776 777 // If we get here we could have other crazy uses that are transitively 778 // loaded. 779 assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) || 780 isa<ConstantExpr>(GlobalUser)) && "Only expect load and stores!"); 781 } 782 } 783 784 if (Changed) { 785 DEBUG(errs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV); 786 ++NumGlobUses; 787 } 788 789 // If we nuked all of the loads, then none of the stores are needed either, 790 // nor is the global. 791 if (AllNonStoreUsesGone) { 792 DEBUG(errs() << " *** GLOBAL NOW DEAD!\n"); 793 CleanupConstantGlobalUsers(GV, 0, Context); 794 if (GV->use_empty()) { 795 GV->eraseFromParent(); 796 ++NumDeleted; 797 } 798 Changed = true; 799 } 800 return Changed; 801} 802 803/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the 804/// instructions that are foldable. 805static void ConstantPropUsersOf(Value *V, LLVMContext &Context) { 806 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) 807 if (Instruction *I = dyn_cast<Instruction>(*UI++)) 808 if (Constant *NewC = ConstantFoldInstruction(I, Context)) { 809 I->replaceAllUsesWith(NewC); 810 811 // Advance UI to the next non-I use to avoid invalidating it! 812 // Instructions could multiply use V. 813 while (UI != E && *UI == I) 814 ++UI; 815 I->eraseFromParent(); 816 } 817} 818 819/// OptimizeGlobalAddressOfMalloc - This function takes the specified global 820/// variable, and transforms the program as if it always contained the result of 821/// the specified malloc. Because it is always the result of the specified 822/// malloc, there is no reason to actually DO the malloc. Instead, turn the 823/// malloc into a global, and any loads of GV as uses of the new global. 824static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, 825 CallInst *CI, 826 BitCastInst *BCI, 827 LLVMContext &Context, 828 TargetData* TD) { 829 DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV 830 << " CALL = " << *CI << " BCI = " << *BCI << '\n'); 831 832 const Type *IntPtrTy = TD->getIntPtrType(Context); 833 834 Value* ArraySize = getMallocArraySize(CI, Context, TD); 835 assert(ArraySize && "not a malloc whose array size can be determined"); 836 ConstantInt *NElements = cast<ConstantInt>(ArraySize); 837 if (NElements->getZExtValue() != 1) { 838 // If we have an array allocation, transform it to a single element 839 // allocation to make the code below simpler. 840 Type *NewTy = ArrayType::get(getMallocAllocatedType(CI), 841 NElements->getZExtValue()); 842 Value* NewM = CallInst::CreateMalloc(CI, IntPtrTy, NewTy); 843 Instruction* NewMI = cast<Instruction>(NewM); 844 Value* Indices[2]; 845 Indices[0] = Indices[1] = Constant::getNullValue(IntPtrTy); 846 Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2, 847 NewMI->getName()+".el0", CI); 848 BCI->replaceAllUsesWith(NewGEP); 849 BCI->eraseFromParent(); 850 CI->eraseFromParent(); 851 BCI = cast<BitCastInst>(NewMI); 852 CI = extractMallocCallFromBitCast(NewMI); 853 } 854 855 // Create the new global variable. The contents of the malloc'd memory is 856 // undefined, so initialize with an undef value. 857 const Type *MAT = getMallocAllocatedType(CI); 858 Constant *Init = UndefValue::get(MAT); 859 GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), 860 MAT, false, 861 GlobalValue::InternalLinkage, Init, 862 GV->getName()+".body", 863 GV, 864 GV->isThreadLocal()); 865 866 // Anything that used the malloc now uses the global directly. 867 BCI->replaceAllUsesWith(NewGV); 868 869 Constant *RepValue = NewGV; 870 if (NewGV->getType() != GV->getType()->getElementType()) 871 RepValue = ConstantExpr::getBitCast(RepValue, 872 GV->getType()->getElementType()); 873 874 // If there is a comparison against null, we will insert a global bool to 875 // keep track of whether the global was initialized yet or not. 876 GlobalVariable *InitBool = 877 new GlobalVariable(Context, Type::getInt1Ty(Context), false, 878 GlobalValue::InternalLinkage, 879 ConstantInt::getFalse(Context), GV->getName()+".init", 880 GV->isThreadLocal()); 881 bool InitBoolUsed = false; 882 883 // Loop over all uses of GV, processing them in turn. 884 std::vector<StoreInst*> Stores; 885 while (!GV->use_empty()) 886 if (LoadInst *LI = dyn_cast<LoadInst>(GV->use_back())) { 887 while (!LI->use_empty()) { 888 Use &LoadUse = LI->use_begin().getUse(); 889 if (!isa<ICmpInst>(LoadUse.getUser())) 890 LoadUse = RepValue; 891 else { 892 ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser()); 893 // Replace the cmp X, 0 with a use of the bool value. 894 Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI); 895 InitBoolUsed = true; 896 switch (ICI->getPredicate()) { 897 default: llvm_unreachable("Unknown ICmp Predicate!"); 898 case ICmpInst::ICMP_ULT: 899 case ICmpInst::ICMP_SLT: 900 LV = ConstantInt::getFalse(Context); // X < null -> always false 901 break; 902 case ICmpInst::ICMP_ULE: 903 case ICmpInst::ICMP_SLE: 904 case ICmpInst::ICMP_EQ: 905 LV = BinaryOperator::CreateNot(LV, "notinit", ICI); 906 break; 907 case ICmpInst::ICMP_NE: 908 case ICmpInst::ICMP_UGE: 909 case ICmpInst::ICMP_SGE: 910 case ICmpInst::ICMP_UGT: 911 case ICmpInst::ICMP_SGT: 912 break; // no change. 913 } 914 ICI->replaceAllUsesWith(LV); 915 ICI->eraseFromParent(); 916 } 917 } 918 LI->eraseFromParent(); 919 } else { 920 StoreInst *SI = cast<StoreInst>(GV->use_back()); 921 // The global is initialized when the store to it occurs. 922 new StoreInst(ConstantInt::getTrue(Context), InitBool, SI); 923 SI->eraseFromParent(); 924 } 925 926 // If the initialization boolean was used, insert it, otherwise delete it. 927 if (!InitBoolUsed) { 928 while (!InitBool->use_empty()) // Delete initializations 929 cast<Instruction>(InitBool->use_back())->eraseFromParent(); 930 delete InitBool; 931 } else 932 GV->getParent()->getGlobalList().insert(GV, InitBool); 933 934 935 // Now the GV is dead, nuke it and the malloc. 936 GV->eraseFromParent(); 937 BCI->eraseFromParent(); 938 CI->eraseFromParent(); 939 940 // To further other optimizations, loop over all users of NewGV and try to 941 // constant prop them. This will promote GEP instructions with constant 942 // indices into GEP constant-exprs, which will allow global-opt to hack on it. 943 ConstantPropUsersOf(NewGV, Context); 944 if (RepValue != NewGV) 945 ConstantPropUsersOf(RepValue, Context); 946 947 return NewGV; 948} 949 950/// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking 951/// to make sure that there are no complex uses of V. We permit simple things 952/// like dereferencing the pointer, but not storing through the address, unless 953/// it is to the specified global. 954static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Instruction *V, 955 GlobalVariable *GV, 956 SmallPtrSet<PHINode*, 8> &PHIs) { 957 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ 958 Instruction *Inst = cast<Instruction>(*UI); 959 960 if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) { 961 continue; // Fine, ignore. 962 } 963 964 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 965 if (SI->getOperand(0) == V && SI->getOperand(1) != GV) 966 return false; // Storing the pointer itself... bad. 967 continue; // Otherwise, storing through it, or storing into GV... fine. 968 } 969 970 if (isa<GetElementPtrInst>(Inst)) { 971 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs)) 972 return false; 973 continue; 974 } 975 976 if (PHINode *PN = dyn_cast<PHINode>(Inst)) { 977 // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI 978 // cycles. 979 if (PHIs.insert(PN)) 980 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs)) 981 return false; 982 continue; 983 } 984 985 if (BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) { 986 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) 987 return false; 988 continue; 989 } 990 991 return false; 992 } 993 return true; 994} 995 996/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV 997/// somewhere. Transform all uses of the allocation into loads from the 998/// global and uses of the resultant pointer. Further, delete the store into 999/// GV. This assumes that these value pass the 1000/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate. 1001static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, 1002 GlobalVariable *GV) { 1003 while (!Alloc->use_empty()) { 1004 Instruction *U = cast<Instruction>(*Alloc->use_begin()); 1005 Instruction *InsertPt = U; 1006 if (StoreInst *SI = dyn_cast<StoreInst>(U)) { 1007 // If this is the store of the allocation into the global, remove it. 1008 if (SI->getOperand(1) == GV) { 1009 SI->eraseFromParent(); 1010 continue; 1011 } 1012 } else if (PHINode *PN = dyn_cast<PHINode>(U)) { 1013 // Insert the load in the corresponding predecessor, not right before the 1014 // PHI. 1015 InsertPt = PN->getIncomingBlock(Alloc->use_begin())->getTerminator(); 1016 } else if (isa<BitCastInst>(U)) { 1017 // Must be bitcast between the malloc and store to initialize the global. 1018 ReplaceUsesOfMallocWithGlobal(U, GV); 1019 U->eraseFromParent(); 1020 continue; 1021 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { 1022 // If this is a "GEP bitcast" and the user is a store to the global, then 1023 // just process it as a bitcast. 1024 if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse()) 1025 if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->use_back())) 1026 if (SI->getOperand(1) == GV) { 1027 // Must be bitcast GEP between the malloc and store to initialize 1028 // the global. 1029 ReplaceUsesOfMallocWithGlobal(GEPI, GV); 1030 GEPI->eraseFromParent(); 1031 continue; 1032 } 1033 } 1034 1035 // Insert a load from the global, and use it instead of the malloc. 1036 Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt); 1037 U->replaceUsesOfWith(Alloc, NL); 1038 } 1039} 1040 1041/// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi 1042/// of a load) are simple enough to perform heap SRA on. This permits GEP's 1043/// that index through the array and struct field, icmps of null, and PHIs. 1044static bool LoadUsesSimpleEnoughForHeapSRA(Value *V, 1045 SmallPtrSet<PHINode*, 32> &LoadUsingPHIs, 1046 SmallPtrSet<PHINode*, 32> &LoadUsingPHIsPerLoad) { 1047 // We permit two users of the load: setcc comparing against the null 1048 // pointer, and a getelementptr of a specific form. 1049 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ 1050 Instruction *User = cast<Instruction>(*UI); 1051 1052 // Comparison against null is ok. 1053 if (ICmpInst *ICI = dyn_cast<ICmpInst>(User)) { 1054 if (!isa<ConstantPointerNull>(ICI->getOperand(1))) 1055 return false; 1056 continue; 1057 } 1058 1059 // getelementptr is also ok, but only a simple form. 1060 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { 1061 // Must index into the array and into the struct. 1062 if (GEPI->getNumOperands() < 3) 1063 return false; 1064 1065 // Otherwise the GEP is ok. 1066 continue; 1067 } 1068 1069 if (PHINode *PN = dyn_cast<PHINode>(User)) { 1070 if (!LoadUsingPHIsPerLoad.insert(PN)) 1071 // This means some phi nodes are dependent on each other. 1072 // Avoid infinite looping! 1073 return false; 1074 if (!LoadUsingPHIs.insert(PN)) 1075 // If we have already analyzed this PHI, then it is safe. 1076 continue; 1077 1078 // Make sure all uses of the PHI are simple enough to transform. 1079 if (!LoadUsesSimpleEnoughForHeapSRA(PN, 1080 LoadUsingPHIs, LoadUsingPHIsPerLoad)) 1081 return false; 1082 1083 continue; 1084 } 1085 1086 // Otherwise we don't know what this is, not ok. 1087 return false; 1088 } 1089 1090 return true; 1091} 1092 1093 1094/// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from 1095/// GV are simple enough to perform HeapSRA, return true. 1096static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV, 1097 Instruction *StoredVal) { 1098 SmallPtrSet<PHINode*, 32> LoadUsingPHIs; 1099 SmallPtrSet<PHINode*, 32> LoadUsingPHIsPerLoad; 1100 for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; 1101 ++UI) 1102 if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { 1103 if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs, 1104 LoadUsingPHIsPerLoad)) 1105 return false; 1106 LoadUsingPHIsPerLoad.clear(); 1107 } 1108 1109 // If we reach here, we know that all uses of the loads and transitive uses 1110 // (through PHI nodes) are simple enough to transform. However, we don't know 1111 // that all inputs the to the PHI nodes are in the same equivalence sets. 1112 // Check to verify that all operands of the PHIs are either PHIS that can be 1113 // transformed, loads from GV, or MI itself. 1114 for (SmallPtrSet<PHINode*, 32>::iterator I = LoadUsingPHIs.begin(), 1115 E = LoadUsingPHIs.end(); I != E; ++I) { 1116 PHINode *PN = *I; 1117 for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) { 1118 Value *InVal = PN->getIncomingValue(op); 1119 1120 // PHI of the stored value itself is ok. 1121 if (InVal == StoredVal) continue; 1122 1123 if (PHINode *InPN = dyn_cast<PHINode>(InVal)) { 1124 // One of the PHIs in our set is (optimistically) ok. 1125 if (LoadUsingPHIs.count(InPN)) 1126 continue; 1127 return false; 1128 } 1129 1130 // Load from GV is ok. 1131 if (LoadInst *LI = dyn_cast<LoadInst>(InVal)) 1132 if (LI->getOperand(0) == GV) 1133 continue; 1134 1135 // UNDEF? NULL? 1136 1137 // Anything else is rejected. 1138 return false; 1139 } 1140 } 1141 1142 return true; 1143} 1144 1145static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, 1146 DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, 1147 std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite, 1148 LLVMContext &Context) { 1149 std::vector<Value*> &FieldVals = InsertedScalarizedValues[V]; 1150 1151 if (FieldNo >= FieldVals.size()) 1152 FieldVals.resize(FieldNo+1); 1153 1154 // If we already have this value, just reuse the previously scalarized 1155 // version. 1156 if (Value *FieldVal = FieldVals[FieldNo]) 1157 return FieldVal; 1158 1159 // Depending on what instruction this is, we have several cases. 1160 Value *Result; 1161 if (LoadInst *LI = dyn_cast<LoadInst>(V)) { 1162 // This is a scalarized version of the load from the global. Just create 1163 // a new Load of the scalarized global. 1164 Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo, 1165 InsertedScalarizedValues, 1166 PHIsToRewrite, Context), 1167 LI->getName()+".f"+Twine(FieldNo), LI); 1168 } else if (PHINode *PN = dyn_cast<PHINode>(V)) { 1169 // PN's type is pointer to struct. Make a new PHI of pointer to struct 1170 // field. 1171 const StructType *ST = 1172 cast<StructType>(cast<PointerType>(PN->getType())->getElementType()); 1173 1174 Result = 1175 PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)), 1176 PN->getName()+".f"+Twine(FieldNo), PN); 1177 PHIsToRewrite.push_back(std::make_pair(PN, FieldNo)); 1178 } else { 1179 llvm_unreachable("Unknown usable value"); 1180 Result = 0; 1181 } 1182 1183 return FieldVals[FieldNo] = Result; 1184} 1185 1186/// RewriteHeapSROALoadUser - Given a load instruction and a value derived from 1187/// the load, rewrite the derived value to use the HeapSRoA'd load. 1188static void RewriteHeapSROALoadUser(Instruction *LoadUser, 1189 DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, 1190 std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite, 1191 LLVMContext &Context) { 1192 // If this is a comparison against null, handle it. 1193 if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) { 1194 assert(isa<ConstantPointerNull>(SCI->getOperand(1))); 1195 // If we have a setcc of the loaded pointer, we can use a setcc of any 1196 // field. 1197 Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0, 1198 InsertedScalarizedValues, PHIsToRewrite, 1199 Context); 1200 1201 Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr, 1202 Constant::getNullValue(NPtr->getType()), 1203 SCI->getName()); 1204 SCI->replaceAllUsesWith(New); 1205 SCI->eraseFromParent(); 1206 return; 1207 } 1208 1209 // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...' 1210 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) { 1211 assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2)) 1212 && "Unexpected GEPI!"); 1213 1214 // Load the pointer for this field. 1215 unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue(); 1216 Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo, 1217 InsertedScalarizedValues, PHIsToRewrite, 1218 Context); 1219 1220 // Create the new GEP idx vector. 1221 SmallVector<Value*, 8> GEPIdx; 1222 GEPIdx.push_back(GEPI->getOperand(1)); 1223 GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end()); 1224 1225 Value *NGEPI = GetElementPtrInst::Create(NewPtr, 1226 GEPIdx.begin(), GEPIdx.end(), 1227 GEPI->getName(), GEPI); 1228 GEPI->replaceAllUsesWith(NGEPI); 1229 GEPI->eraseFromParent(); 1230 return; 1231 } 1232 1233 // Recursively transform the users of PHI nodes. This will lazily create the 1234 // PHIs that are needed for individual elements. Keep track of what PHIs we 1235 // see in InsertedScalarizedValues so that we don't get infinite loops (very 1236 // antisocial). If the PHI is already in InsertedScalarizedValues, it has 1237 // already been seen first by another load, so its uses have already been 1238 // processed. 1239 PHINode *PN = cast<PHINode>(LoadUser); 1240 bool Inserted; 1241 DenseMap<Value*, std::vector<Value*> >::iterator InsertPos; 1242 tie(InsertPos, Inserted) = 1243 InsertedScalarizedValues.insert(std::make_pair(PN, std::vector<Value*>())); 1244 if (!Inserted) return; 1245 1246 // If this is the first time we've seen this PHI, recursively process all 1247 // users. 1248 for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) { 1249 Instruction *User = cast<Instruction>(*UI++); 1250 RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite, 1251 Context); 1252 } 1253} 1254 1255/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global. Ptr 1256/// is a value loaded from the global. Eliminate all uses of Ptr, making them 1257/// use FieldGlobals instead. All uses of loaded values satisfy 1258/// AllGlobalLoadUsesSimpleEnoughForHeapSRA. 1259static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, 1260 DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, 1261 std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite, 1262 LLVMContext &Context) { 1263 for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end(); 1264 UI != E; ) { 1265 Instruction *User = cast<Instruction>(*UI++); 1266 RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite, 1267 Context); 1268 } 1269 1270 if (Load->use_empty()) { 1271 Load->eraseFromParent(); 1272 InsertedScalarizedValues.erase(Load); 1273 } 1274} 1275 1276/// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break 1277/// it up into multiple allocations of arrays of the fields. 1278static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, 1279 CallInst *CI, BitCastInst* BCI, 1280 LLVMContext &Context, 1281 TargetData *TD){ 1282 DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << " MALLOC CALL = " << *CI 1283 << " BITCAST = " << *BCI << '\n'); 1284 const Type* MAT = getMallocAllocatedType(CI); 1285 const StructType *STy = cast<StructType>(MAT); 1286 Value* ArraySize = getMallocArraySize(CI, Context, TD); 1287 assert(ArraySize && "not a malloc whose array size can be determined"); 1288 1289 // There is guaranteed to be at least one use of the malloc (storing 1290 // it into GV). If there are other uses, change them to be uses of 1291 // the global to simplify later code. This also deletes the store 1292 // into GV. 1293 ReplaceUsesOfMallocWithGlobal(BCI, GV); 1294 1295 // Okay, at this point, there are no users of the malloc. Insert N 1296 // new mallocs at the same place as CI, and N globals. 1297 std::vector<Value*> FieldGlobals; 1298 std::vector<Value*> FieldMallocs; 1299 1300 for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ 1301 const Type *FieldTy = STy->getElementType(FieldNo); 1302 const PointerType *PFieldTy = PointerType::getUnqual(FieldTy); 1303 1304 GlobalVariable *NGV = 1305 new GlobalVariable(*GV->getParent(), 1306 PFieldTy, false, GlobalValue::InternalLinkage, 1307 Constant::getNullValue(PFieldTy), 1308 GV->getName() + ".f" + Twine(FieldNo), GV, 1309 GV->isThreadLocal()); 1310 FieldGlobals.push_back(NGV); 1311 1312 Value *NMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), 1313 FieldTy, ArraySize, 1314 BCI->getName() + ".f" + Twine(FieldNo)); 1315 FieldMallocs.push_back(NMI); 1316 new StoreInst(NMI, NGV, BCI); 1317 } 1318 1319 // The tricky aspect of this transformation is handling the case when malloc 1320 // fails. In the original code, malloc failing would set the result pointer 1321 // of malloc to null. In this case, some mallocs could succeed and others 1322 // could fail. As such, we emit code that looks like this: 1323 // F0 = malloc(field0) 1324 // F1 = malloc(field1) 1325 // F2 = malloc(field2) 1326 // if (F0 == 0 || F1 == 0 || F2 == 0) { 1327 // if (F0) { free(F0); F0 = 0; } 1328 // if (F1) { free(F1); F1 = 0; } 1329 // if (F2) { free(F2); F2 = 0; } 1330 // } 1331 Value *RunningOr = 0; 1332 for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { 1333 Value *Cond = new ICmpInst(BCI, ICmpInst::ICMP_EQ, FieldMallocs[i], 1334 Constant::getNullValue(FieldMallocs[i]->getType()), 1335 "isnull"); 1336 if (!RunningOr) 1337 RunningOr = Cond; // First seteq 1338 else 1339 RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", BCI); 1340 } 1341 1342 // Split the basic block at the old malloc. 1343 BasicBlock *OrigBB = BCI->getParent(); 1344 BasicBlock *ContBB = OrigBB->splitBasicBlock(BCI, "malloc_cont"); 1345 1346 // Create the block to check the first condition. Put all these blocks at the 1347 // end of the function as they are unlikely to be executed. 1348 BasicBlock *NullPtrBlock = BasicBlock::Create(Context, "malloc_ret_null", 1349 OrigBB->getParent()); 1350 1351 // Remove the uncond branch from OrigBB to ContBB, turning it into a cond 1352 // branch on RunningOr. 1353 OrigBB->getTerminator()->eraseFromParent(); 1354 BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB); 1355 1356 // Within the NullPtrBlock, we need to emit a comparison and branch for each 1357 // pointer, because some may be null while others are not. 1358 for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { 1359 Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); 1360 Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, 1361 Constant::getNullValue(GVVal->getType()), 1362 "tmp"); 1363 BasicBlock *FreeBlock = BasicBlock::Create(Context, "free_it", 1364 OrigBB->getParent()); 1365 BasicBlock *NextBlock = BasicBlock::Create(Context, "next", 1366 OrigBB->getParent()); 1367 Instruction *BI = BranchInst::Create(FreeBlock, NextBlock, 1368 Cmp, NullPtrBlock); 1369 1370 // Fill in FreeBlock. 1371 CallInst::CreateFree(GVVal, BI); 1372 new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i], 1373 FreeBlock); 1374 BranchInst::Create(NextBlock, FreeBlock); 1375 1376 NullPtrBlock = NextBlock; 1377 } 1378 1379 BranchInst::Create(ContBB, NullPtrBlock); 1380 1381 // CI and BCI are no longer needed, remove them. 1382 BCI->eraseFromParent(); 1383 CI->eraseFromParent(); 1384 1385 /// InsertedScalarizedLoads - As we process loads, if we can't immediately 1386 /// update all uses of the load, keep track of what scalarized loads are 1387 /// inserted for a given load. 1388 DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues; 1389 InsertedScalarizedValues[GV] = FieldGlobals; 1390 1391 std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite; 1392 1393 // Okay, the malloc site is completely handled. All of the uses of GV are now 1394 // loads, and all uses of those loads are simple. Rewrite them to use loads 1395 // of the per-field globals instead. 1396 for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) { 1397 Instruction *User = cast<Instruction>(*UI++); 1398 1399 if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 1400 RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite, 1401 Context); 1402 continue; 1403 } 1404 1405 // Must be a store of null. 1406 StoreInst *SI = cast<StoreInst>(User); 1407 assert(isa<ConstantPointerNull>(SI->getOperand(0)) && 1408 "Unexpected heap-sra user!"); 1409 1410 // Insert a store of null into each global. 1411 for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { 1412 const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType()); 1413 Constant *Null = Constant::getNullValue(PT->getElementType()); 1414 new StoreInst(Null, FieldGlobals[i], SI); 1415 } 1416 // Erase the original store. 1417 SI->eraseFromParent(); 1418 } 1419 1420 // While we have PHIs that are interesting to rewrite, do it. 1421 while (!PHIsToRewrite.empty()) { 1422 PHINode *PN = PHIsToRewrite.back().first; 1423 unsigned FieldNo = PHIsToRewrite.back().second; 1424 PHIsToRewrite.pop_back(); 1425 PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]); 1426 assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi"); 1427 1428 // Add all the incoming values. This can materialize more phis. 1429 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 1430 Value *InVal = PN->getIncomingValue(i); 1431 InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues, 1432 PHIsToRewrite, Context); 1433 FieldPN->addIncoming(InVal, PN->getIncomingBlock(i)); 1434 } 1435 } 1436 1437 // Drop all inter-phi links and any loads that made it this far. 1438 for (DenseMap<Value*, std::vector<Value*> >::iterator 1439 I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); 1440 I != E; ++I) { 1441 if (PHINode *PN = dyn_cast<PHINode>(I->first)) 1442 PN->dropAllReferences(); 1443 else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) 1444 LI->dropAllReferences(); 1445 } 1446 1447 // Delete all the phis and loads now that inter-references are dead. 1448 for (DenseMap<Value*, std::vector<Value*> >::iterator 1449 I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); 1450 I != E; ++I) { 1451 if (PHINode *PN = dyn_cast<PHINode>(I->first)) 1452 PN->eraseFromParent(); 1453 else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) 1454 LI->eraseFromParent(); 1455 } 1456 1457 // The old global is now dead, remove it. 1458 GV->eraseFromParent(); 1459 1460 ++NumHeapSRA; 1461 return cast<GlobalVariable>(FieldGlobals[0]); 1462} 1463 1464/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a 1465/// pointer global variable with a single value stored it that is a malloc or 1466/// cast of malloc. 1467static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, 1468 CallInst *CI, 1469 BitCastInst *BCI, 1470 Module::global_iterator &GVI, 1471 TargetData *TD, 1472 LLVMContext &Context) { 1473 // If we can't figure out the type being malloced, then we can't optimize. 1474 const Type *AllocTy = getMallocAllocatedType(CI); 1475 assert(AllocTy); 1476 1477 // If this is a malloc of an abstract type, don't touch it. 1478 if (!AllocTy->isSized()) 1479 return false; 1480 1481 // We can't optimize this global unless all uses of it are *known* to be 1482 // of the malloc value, not of the null initializer value (consider a use 1483 // that compares the global's value against zero to see if the malloc has 1484 // been reached). To do this, we check to see if all uses of the global 1485 // would trap if the global were null: this proves that they must all 1486 // happen after the malloc. 1487 if (!AllUsesOfLoadedValueWillTrapIfNull(GV)) 1488 return false; 1489 1490 // We can't optimize this if the malloc itself is used in a complex way, 1491 // for example, being stored into multiple globals. This allows the 1492 // malloc to be stored into the specified global, loaded setcc'd, and 1493 // GEP'd. These are all things we could transform to using the global 1494 // for. 1495 { 1496 SmallPtrSet<PHINode*, 8> PHIs; 1497 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) 1498 return false; 1499 } 1500 1501 // If we have a global that is only initialized with a fixed size malloc, 1502 // transform the program to use global memory instead of malloc'd memory. 1503 // This eliminates dynamic allocation, avoids an indirection accessing the 1504 // data, and exposes the resultant global to further GlobalOpt. 1505 Value *NElems = getMallocArraySize(CI, Context, TD); 1506 // We cannot optimize the malloc if we cannot determine malloc array size. 1507 if (NElems) { 1508 if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems)) 1509 // Restrict this transformation to only working on small allocations 1510 // (2048 bytes currently), as we don't want to introduce a 16M global or 1511 // something. 1512 if (TD && 1513 NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { 1514 GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, Context, TD); 1515 return true; 1516 } 1517 1518 // If the allocation is an array of structures, consider transforming this 1519 // into multiple malloc'd arrays, one for each field. This is basically 1520 // SRoA for malloc'd memory. 1521 1522 // If this is an allocation of a fixed size array of structs, analyze as a 1523 // variable size array. malloc [100 x struct],1 -> malloc struct, 100 1524 if (!isArrayMalloc(CI, Context, TD)) 1525 if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) 1526 AllocTy = AT->getElementType(); 1527 1528 if (const StructType *AllocSTy = dyn_cast<StructType>(AllocTy)) { 1529 // This the structure has an unreasonable number of fields, leave it 1530 // alone. 1531 if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && 1532 AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, BCI)) { 1533 1534 // If this is a fixed size array, transform the Malloc to be an alloc of 1535 // structs. malloc [100 x struct],1 -> malloc struct, 100 1536 if (const ArrayType *AT = 1537 dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { 1538 Value* NumElements = ConstantInt::get(Type::getInt32Ty(Context), 1539 AT->getNumElements()); 1540 Value* NewMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), 1541 AllocSTy, NumElements, 1542 BCI->getName()); 1543 Value *Cast = new BitCastInst(NewMI, getMallocType(CI), "tmp", CI); 1544 BCI->replaceAllUsesWith(Cast); 1545 BCI->eraseFromParent(); 1546 CI->eraseFromParent(); 1547 BCI = cast<BitCastInst>(NewMI); 1548 CI = extractMallocCallFromBitCast(NewMI); 1549 } 1550 1551 GVI = PerformHeapAllocSRoA(GV, CI, BCI, Context, TD); 1552 return true; 1553 } 1554 } 1555 } 1556 1557 return false; 1558} 1559 1560// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge 1561// that only one value (besides its initializer) is ever stored to the global. 1562static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, 1563 Module::global_iterator &GVI, 1564 TargetData *TD, LLVMContext &Context) { 1565 // Ignore no-op GEPs and bitcasts. 1566 StoredOnceVal = StoredOnceVal->stripPointerCasts(); 1567 1568 // If we are dealing with a pointer global that is initialized to null and 1569 // only has one (non-null) value stored into it, then we can optimize any 1570 // users of the loaded value (often calls and loads) that would trap if the 1571 // value was null. 1572 if (isa<PointerType>(GV->getInitializer()->getType()) && 1573 GV->getInitializer()->isNullValue()) { 1574 if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) { 1575 if (GV->getInitializer()->getType() != SOVC->getType()) 1576 SOVC = 1577 ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); 1578 1579 // Optimize away any trapping uses of the loaded value. 1580 if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, Context)) 1581 return true; 1582 } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) { 1583 if (getMallocAllocatedType(CI)) { 1584 BitCastInst* BCI = NULL; 1585 for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); 1586 UI != E; ) 1587 BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++)); 1588 if (BCI && 1589 TryToOptimizeStoreOfMallocToGlobal(GV, CI, BCI, GVI, TD, Context)) 1590 return true; 1591 } 1592 } 1593 } 1594 1595 return false; 1596} 1597 1598/// TryToShrinkGlobalToBoolean - At this point, we have learned that the only 1599/// two values ever stored into GV are its initializer and OtherVal. See if we 1600/// can shrink the global into a boolean and select between the two values 1601/// whenever it is used. This exposes the values to other scalar optimizations. 1602static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal, 1603 LLVMContext &Context) { 1604 const Type *GVElType = GV->getType()->getElementType(); 1605 1606 // If GVElType is already i1, it is already shrunk. If the type of the GV is 1607 // an FP value, pointer or vector, don't do this optimization because a select 1608 // between them is very expensive and unlikely to lead to later 1609 // simplification. In these cases, we typically end up with "cond ? v1 : v2" 1610 // where v1 and v2 both require constant pool loads, a big loss. 1611 if (GVElType == Type::getInt1Ty(Context) || GVElType->isFloatingPoint() || 1612 isa<PointerType>(GVElType) || isa<VectorType>(GVElType)) 1613 return false; 1614 1615 // Walk the use list of the global seeing if all the uses are load or store. 1616 // If there is anything else, bail out. 1617 for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I) 1618 if (!isa<LoadInst>(I) && !isa<StoreInst>(I)) 1619 return false; 1620 1621 DEBUG(errs() << " *** SHRINKING TO BOOL: " << *GV); 1622 1623 // Create the new global, initializing it to false. 1624 GlobalVariable *NewGV = new GlobalVariable(Context, 1625 Type::getInt1Ty(Context), false, 1626 GlobalValue::InternalLinkage, ConstantInt::getFalse(Context), 1627 GV->getName()+".b", 1628 GV->isThreadLocal()); 1629 GV->getParent()->getGlobalList().insert(GV, NewGV); 1630 1631 Constant *InitVal = GV->getInitializer(); 1632 assert(InitVal->getType() != Type::getInt1Ty(Context) && 1633 "No reason to shrink to bool!"); 1634 1635 // If initialized to zero and storing one into the global, we can use a cast 1636 // instead of a select to synthesize the desired value. 1637 bool IsOneZero = false; 1638 if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) 1639 IsOneZero = InitVal->isNullValue() && CI->isOne(); 1640 1641 while (!GV->use_empty()) { 1642 Instruction *UI = cast<Instruction>(GV->use_back()); 1643 if (StoreInst *SI = dyn_cast<StoreInst>(UI)) { 1644 // Change the store into a boolean store. 1645 bool StoringOther = SI->getOperand(0) == OtherVal; 1646 // Only do this if we weren't storing a loaded value. 1647 Value *StoreVal; 1648 if (StoringOther || SI->getOperand(0) == InitVal) 1649 StoreVal = ConstantInt::get(Type::getInt1Ty(Context), StoringOther); 1650 else { 1651 // Otherwise, we are storing a previously loaded copy. To do this, 1652 // change the copy from copying the original value to just copying the 1653 // bool. 1654 Instruction *StoredVal = cast<Instruction>(SI->getOperand(0)); 1655 1656 // If we're already replaced the input, StoredVal will be a cast or 1657 // select instruction. If not, it will be a load of the original 1658 // global. 1659 if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) { 1660 assert(LI->getOperand(0) == GV && "Not a copy!"); 1661 // Insert a new load, to preserve the saved value. 1662 StoreVal = new LoadInst(NewGV, LI->getName()+".b", LI); 1663 } else { 1664 assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) && 1665 "This is not a form that we understand!"); 1666 StoreVal = StoredVal->getOperand(0); 1667 assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!"); 1668 } 1669 } 1670 new StoreInst(StoreVal, NewGV, SI); 1671 } else { 1672 // Change the load into a load of bool then a select. 1673 LoadInst *LI = cast<LoadInst>(UI); 1674 LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", LI); 1675 Value *NSI; 1676 if (IsOneZero) 1677 NSI = new ZExtInst(NLI, LI->getType(), "", LI); 1678 else 1679 NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI); 1680 NSI->takeName(LI); 1681 LI->replaceAllUsesWith(NSI); 1682 } 1683 UI->eraseFromParent(); 1684 } 1685 1686 GV->eraseFromParent(); 1687 return true; 1688} 1689 1690 1691/// ProcessInternalGlobal - Analyze the specified global variable and optimize 1692/// it if possible. If we make a change, return true. 1693bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, 1694 Module::global_iterator &GVI) { 1695 SmallPtrSet<PHINode*, 16> PHIUsers; 1696 GlobalStatus GS; 1697 GV->removeDeadConstantUsers(); 1698 1699 if (GV->use_empty()) { 1700 DEBUG(errs() << "GLOBAL DEAD: " << *GV); 1701 GV->eraseFromParent(); 1702 ++NumDeleted; 1703 return true; 1704 } 1705 1706 if (!AnalyzeGlobal(GV, GS, PHIUsers)) { 1707#if 0 1708 cerr << "Global: " << *GV; 1709 cerr << " isLoaded = " << GS.isLoaded << "\n"; 1710 cerr << " StoredType = "; 1711 switch (GS.StoredType) { 1712 case GlobalStatus::NotStored: cerr << "NEVER STORED\n"; break; 1713 case GlobalStatus::isInitializerStored: cerr << "INIT STORED\n"; break; 1714 case GlobalStatus::isStoredOnce: cerr << "STORED ONCE\n"; break; 1715 case GlobalStatus::isStored: cerr << "stored\n"; break; 1716 } 1717 if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue) 1718 cerr << " StoredOnceValue = " << *GS.StoredOnceValue << "\n"; 1719 if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions) 1720 cerr << " AccessingFunction = " << GS.AccessingFunction->getName() 1721 << "\n"; 1722 cerr << " HasMultipleAccessingFunctions = " 1723 << GS.HasMultipleAccessingFunctions << "\n"; 1724 cerr << " HasNonInstructionUser = " << GS.HasNonInstructionUser<<"\n"; 1725 cerr << "\n"; 1726#endif 1727 1728 // If this is a first class global and has only one accessing function 1729 // and this function is main (which we know is not recursive we can make 1730 // this global a local variable) we replace the global with a local alloca 1731 // in this function. 1732 // 1733 // NOTE: It doesn't make sense to promote non single-value types since we 1734 // are just replacing static memory to stack memory. 1735 // 1736 // If the global is in different address space, don't bring it to stack. 1737 if (!GS.HasMultipleAccessingFunctions && 1738 GS.AccessingFunction && !GS.HasNonInstructionUser && 1739 GV->getType()->getElementType()->isSingleValueType() && 1740 GS.AccessingFunction->getName() == "main" && 1741 GS.AccessingFunction->hasExternalLinkage() && 1742 GV->getType()->getAddressSpace() == 0) { 1743 DEBUG(errs() << "LOCALIZING GLOBAL: " << *GV); 1744 Instruction* FirstI = GS.AccessingFunction->getEntryBlock().begin(); 1745 const Type* ElemTy = GV->getType()->getElementType(); 1746 // FIXME: Pass Global's alignment when globals have alignment 1747 AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), FirstI); 1748 if (!isa<UndefValue>(GV->getInitializer())) 1749 new StoreInst(GV->getInitializer(), Alloca, FirstI); 1750 1751 GV->replaceAllUsesWith(Alloca); 1752 GV->eraseFromParent(); 1753 ++NumLocalized; 1754 return true; 1755 } 1756 1757 // If the global is never loaded (but may be stored to), it is dead. 1758 // Delete it now. 1759 if (!GS.isLoaded) { 1760 DEBUG(errs() << "GLOBAL NEVER LOADED: " << *GV); 1761 1762 // Delete any stores we can find to the global. We may not be able to 1763 // make it completely dead though. 1764 bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), 1765 GV->getContext()); 1766 1767 // If the global is dead now, delete it. 1768 if (GV->use_empty()) { 1769 GV->eraseFromParent(); 1770 ++NumDeleted; 1771 Changed = true; 1772 } 1773 return Changed; 1774 1775 } else if (GS.StoredType <= GlobalStatus::isInitializerStored) { 1776 DEBUG(errs() << "MARKING CONSTANT: " << *GV); 1777 GV->setConstant(true); 1778 1779 // Clean up any obviously simplifiable users now. 1780 CleanupConstantGlobalUsers(GV, GV->getInitializer(), GV->getContext()); 1781 1782 // If the global is dead now, just nuke it. 1783 if (GV->use_empty()) { 1784 DEBUG(errs() << " *** Marking constant allowed us to simplify " 1785 << "all users and delete global!\n"); 1786 GV->eraseFromParent(); 1787 ++NumDeleted; 1788 } 1789 1790 ++NumMarked; 1791 return true; 1792 } else if (!GV->getInitializer()->getType()->isSingleValueType()) { 1793 if (TargetData *TD = getAnalysisIfAvailable<TargetData>()) 1794 if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD, 1795 GV->getContext())) { 1796 GVI = FirstNewGV; // Don't skip the newly produced globals! 1797 return true; 1798 } 1799 } else if (GS.StoredType == GlobalStatus::isStoredOnce) { 1800 // If the initial value for the global was an undef value, and if only 1801 // one other value was stored into it, we can just change the 1802 // initializer to be the stored value, then delete all stores to the 1803 // global. This allows us to mark it constant. 1804 if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) 1805 if (isa<UndefValue>(GV->getInitializer())) { 1806 // Change the initial value here. 1807 GV->setInitializer(SOVConstant); 1808 1809 // Clean up any obviously simplifiable users now. 1810 CleanupConstantGlobalUsers(GV, GV->getInitializer(), 1811 GV->getContext()); 1812 1813 if (GV->use_empty()) { 1814 DEBUG(errs() << " *** Substituting initializer allowed us to " 1815 << "simplify all users and delete global!\n"); 1816 GV->eraseFromParent(); 1817 ++NumDeleted; 1818 } else { 1819 GVI = GV; 1820 } 1821 ++NumSubstitute; 1822 return true; 1823 } 1824 1825 // Try to optimize globals based on the knowledge that only one value 1826 // (besides its initializer) is ever stored to the global. 1827 if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI, 1828 getAnalysisIfAvailable<TargetData>(), 1829 GV->getContext())) 1830 return true; 1831 1832 // Otherwise, if the global was not a boolean, we can shrink it to be a 1833 // boolean. 1834 if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) 1835 if (TryToShrinkGlobalToBoolean(GV, SOVConstant, GV->getContext())) { 1836 ++NumShrunkToBool; 1837 return true; 1838 } 1839 } 1840 } 1841 return false; 1842} 1843 1844/// ChangeCalleesToFastCall - Walk all of the direct calls of the specified 1845/// function, changing them to FastCC. 1846static void ChangeCalleesToFastCall(Function *F) { 1847 for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){ 1848 CallSite User(cast<Instruction>(*UI)); 1849 User.setCallingConv(CallingConv::Fast); 1850 } 1851} 1852 1853static AttrListPtr StripNest(const AttrListPtr &Attrs) { 1854 for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) { 1855 if ((Attrs.getSlot(i).Attrs & Attribute::Nest) == 0) 1856 continue; 1857 1858 // There can be only one. 1859 return Attrs.removeAttr(Attrs.getSlot(i).Index, Attribute::Nest); 1860 } 1861 1862 return Attrs; 1863} 1864 1865static void RemoveNestAttribute(Function *F) { 1866 F->setAttributes(StripNest(F->getAttributes())); 1867 for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){ 1868 CallSite User(cast<Instruction>(*UI)); 1869 User.setAttributes(StripNest(User.getAttributes())); 1870 } 1871} 1872 1873bool GlobalOpt::OptimizeFunctions(Module &M) { 1874 bool Changed = false; 1875 // Optimize functions. 1876 for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) { 1877 Function *F = FI++; 1878 // Functions without names cannot be referenced outside this module. 1879 if (!F->hasName() && !F->isDeclaration()) 1880 F->setLinkage(GlobalValue::InternalLinkage); 1881 F->removeDeadConstantUsers(); 1882 if (F->use_empty() && (F->hasLocalLinkage() || 1883 F->hasLinkOnceLinkage())) { 1884 M.getFunctionList().erase(F); 1885 Changed = true; 1886 ++NumFnDeleted; 1887 } else if (F->hasLocalLinkage()) { 1888 if (F->getCallingConv() == CallingConv::C && !F->isVarArg() && 1889 !F->hasAddressTaken()) { 1890 // If this function has C calling conventions, is not a varargs 1891 // function, and is only called directly, promote it to use the Fast 1892 // calling convention. 1893 F->setCallingConv(CallingConv::Fast); 1894 ChangeCalleesToFastCall(F); 1895 ++NumFastCallFns; 1896 Changed = true; 1897 } 1898 1899 if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) && 1900 !F->hasAddressTaken()) { 1901 // The function is not used by a trampoline intrinsic, so it is safe 1902 // to remove the 'nest' attribute. 1903 RemoveNestAttribute(F); 1904 ++NumNestRemoved; 1905 Changed = true; 1906 } 1907 } 1908 } 1909 return Changed; 1910} 1911 1912bool GlobalOpt::OptimizeGlobalVars(Module &M) { 1913 bool Changed = false; 1914 for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); 1915 GVI != E; ) { 1916 GlobalVariable *GV = GVI++; 1917 // Global variables without names cannot be referenced outside this module. 1918 if (!GV->hasName() && !GV->isDeclaration()) 1919 GV->setLinkage(GlobalValue::InternalLinkage); 1920 if (!GV->isConstant() && GV->hasLocalLinkage() && 1921 GV->hasInitializer()) 1922 Changed |= ProcessInternalGlobal(GV, GVI); 1923 } 1924 return Changed; 1925} 1926 1927/// FindGlobalCtors - Find the llvm.globalctors list, verifying that all 1928/// initializers have an init priority of 65535. 1929GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { 1930 for (Module::global_iterator I = M.global_begin(), E = M.global_end(); 1931 I != E; ++I) 1932 if (I->getName() == "llvm.global_ctors") { 1933 // Found it, verify it's an array of { int, void()* }. 1934 const ArrayType *ATy =dyn_cast<ArrayType>(I->getType()->getElementType()); 1935 if (!ATy) return 0; 1936 const StructType *STy = dyn_cast<StructType>(ATy->getElementType()); 1937 if (!STy || STy->getNumElements() != 2 || 1938 STy->getElementType(0) != Type::getInt32Ty(M.getContext())) return 0; 1939 const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1)); 1940 if (!PFTy) return 0; 1941 const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType()); 1942 if (!FTy || FTy->getReturnType() != Type::getVoidTy(M.getContext()) || 1943 FTy->isVarArg() || FTy->getNumParams() != 0) 1944 return 0; 1945 1946 // Verify that the initializer is simple enough for us to handle. 1947 if (!I->hasDefinitiveInitializer()) return 0; 1948 ConstantArray *CA = dyn_cast<ConstantArray>(I->getInitializer()); 1949 if (!CA) return 0; 1950 for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) 1951 if (ConstantStruct *CS = dyn_cast<ConstantStruct>(*i)) { 1952 if (isa<ConstantPointerNull>(CS->getOperand(1))) 1953 continue; 1954 1955 // Must have a function or null ptr. 1956 if (!isa<Function>(CS->getOperand(1))) 1957 return 0; 1958 1959 // Init priority must be standard. 1960 ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0)); 1961 if (!CI || CI->getZExtValue() != 65535) 1962 return 0; 1963 } else { 1964 return 0; 1965 } 1966 1967 return I; 1968 } 1969 return 0; 1970} 1971 1972/// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand, 1973/// return a list of the functions and null terminator as a vector. 1974static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) { 1975 ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); 1976 std::vector<Function*> Result; 1977 Result.reserve(CA->getNumOperands()); 1978 for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) { 1979 ConstantStruct *CS = cast<ConstantStruct>(*i); 1980 Result.push_back(dyn_cast<Function>(CS->getOperand(1))); 1981 } 1982 return Result; 1983} 1984 1985/// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the 1986/// specified array, returning the new global to use. 1987static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, 1988 const std::vector<Function*> &Ctors, 1989 LLVMContext &Context) { 1990 // If we made a change, reassemble the initializer list. 1991 std::vector<Constant*> CSVals; 1992 CSVals.push_back(ConstantInt::get(Type::getInt32Ty(Context), 65535)); 1993 CSVals.push_back(0); 1994 1995 // Create the new init list. 1996 std::vector<Constant*> CAList; 1997 for (unsigned i = 0, e = Ctors.size(); i != e; ++i) { 1998 if (Ctors[i]) { 1999 CSVals[1] = Ctors[i]; 2000 } else { 2001 const Type *FTy = FunctionType::get(Type::getVoidTy(Context), false); 2002 const PointerType *PFTy = PointerType::getUnqual(FTy); 2003 CSVals[1] = Constant::getNullValue(PFTy); 2004 CSVals[0] = ConstantInt::get(Type::getInt32Ty(Context), 2147483647); 2005 } 2006 CAList.push_back(ConstantStruct::get(Context, CSVals, false)); 2007 } 2008 2009 // Create the array initializer. 2010 const Type *StructTy = 2011 cast<ArrayType>(GCL->getType()->getElementType())->getElementType(); 2012 Constant *CA = ConstantArray::get(ArrayType::get(StructTy, 2013 CAList.size()), CAList); 2014 2015 // If we didn't change the number of elements, don't create a new GV. 2016 if (CA->getType() == GCL->getInitializer()->getType()) { 2017 GCL->setInitializer(CA); 2018 return GCL; 2019 } 2020 2021 // Create the new global and insert it next to the existing list. 2022 GlobalVariable *NGV = new GlobalVariable(Context, CA->getType(), 2023 GCL->isConstant(), 2024 GCL->getLinkage(), CA, "", 2025 GCL->isThreadLocal()); 2026 GCL->getParent()->getGlobalList().insert(GCL, NGV); 2027 NGV->takeName(GCL); 2028 2029 // Nuke the old list, replacing any uses with the new one. 2030 if (!GCL->use_empty()) { 2031 Constant *V = NGV; 2032 if (V->getType() != GCL->getType()) 2033 V = ConstantExpr::getBitCast(V, GCL->getType()); 2034 GCL->replaceAllUsesWith(V); 2035 } 2036 GCL->eraseFromParent(); 2037 2038 if (Ctors.size()) 2039 return NGV; 2040 else 2041 return 0; 2042} 2043 2044 2045static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues, 2046 Value *V) { 2047 if (Constant *CV = dyn_cast<Constant>(V)) return CV; 2048 Constant *R = ComputedValues[V]; 2049 assert(R && "Reference to an uncomputed value!"); 2050 return R; 2051} 2052 2053/// isSimpleEnoughPointerToCommit - Return true if this constant is simple 2054/// enough for us to understand. In particular, if it is a cast of something, 2055/// we punt. We basically just support direct accesses to globals and GEP's of 2056/// globals. This should be kept up to date with CommitValueTo. 2057static bool isSimpleEnoughPointerToCommit(Constant *C, LLVMContext &Context) { 2058 // Conservatively, avoid aggregate types. This is because we don't 2059 // want to worry about them partially overlapping other stores. 2060 if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType()) 2061 return false; 2062 2063 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) 2064 // Do not allow weak/linkonce/dllimport/dllexport linkage or 2065 // external globals. 2066 return GV->hasDefinitiveInitializer(); 2067 2068 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) 2069 // Handle a constantexpr gep. 2070 if (CE->getOpcode() == Instruction::GetElementPtr && 2071 isa<GlobalVariable>(CE->getOperand(0)) && 2072 cast<GEPOperator>(CE)->isInBounds()) { 2073 GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); 2074 // Do not allow weak/linkonce/dllimport/dllexport linkage or 2075 // external globals. 2076 if (!GV->hasDefinitiveInitializer()) 2077 return false; 2078 2079 // The first index must be zero. 2080 ConstantInt *CI = dyn_cast<ConstantInt>(*next(CE->op_begin())); 2081 if (!CI || !CI->isZero()) return false; 2082 2083 // The remaining indices must be compile-time known integers within the 2084 // notional bounds of the corresponding static array types. 2085 if (!CE->isGEPWithNoNotionalOverIndexing()) 2086 return false; 2087 2088 return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); 2089 } 2090 return false; 2091} 2092 2093/// EvaluateStoreInto - Evaluate a piece of a constantexpr store into a global 2094/// initializer. This returns 'Init' modified to reflect 'Val' stored into it. 2095/// At this point, the GEP operands of Addr [0, OpNo) have been stepped into. 2096static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, 2097 ConstantExpr *Addr, unsigned OpNo, 2098 LLVMContext &Context) { 2099 // Base case of the recursion. 2100 if (OpNo == Addr->getNumOperands()) { 2101 assert(Val->getType() == Init->getType() && "Type mismatch!"); 2102 return Val; 2103 } 2104 2105 if (const StructType *STy = dyn_cast<StructType>(Init->getType())) { 2106 std::vector<Constant*> Elts; 2107 2108 // Break up the constant into its elements. 2109 if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) { 2110 for (User::op_iterator i = CS->op_begin(), e = CS->op_end(); i != e; ++i) 2111 Elts.push_back(cast<Constant>(*i)); 2112 } else if (isa<ConstantAggregateZero>(Init)) { 2113 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) 2114 Elts.push_back(Constant::getNullValue(STy->getElementType(i))); 2115 } else if (isa<UndefValue>(Init)) { 2116 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) 2117 Elts.push_back(UndefValue::get(STy->getElementType(i))); 2118 } else { 2119 llvm_unreachable("This code is out of sync with " 2120 " ConstantFoldLoadThroughGEPConstantExpr"); 2121 } 2122 2123 // Replace the element that we are supposed to. 2124 ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo)); 2125 unsigned Idx = CU->getZExtValue(); 2126 assert(Idx < STy->getNumElements() && "Struct index out of range!"); 2127 Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1, Context); 2128 2129 // Return the modified struct. 2130 return ConstantStruct::get(Context, &Elts[0], Elts.size(), STy->isPacked()); 2131 } else { 2132 ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo)); 2133 const ArrayType *ATy = cast<ArrayType>(Init->getType()); 2134 2135 // Break up the array into elements. 2136 std::vector<Constant*> Elts; 2137 if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) { 2138 for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) 2139 Elts.push_back(cast<Constant>(*i)); 2140 } else if (isa<ConstantAggregateZero>(Init)) { 2141 Constant *Elt = Constant::getNullValue(ATy->getElementType()); 2142 Elts.assign(ATy->getNumElements(), Elt); 2143 } else if (isa<UndefValue>(Init)) { 2144 Constant *Elt = UndefValue::get(ATy->getElementType()); 2145 Elts.assign(ATy->getNumElements(), Elt); 2146 } else { 2147 llvm_unreachable("This code is out of sync with " 2148 " ConstantFoldLoadThroughGEPConstantExpr"); 2149 } 2150 2151 assert(CI->getZExtValue() < ATy->getNumElements()); 2152 Elts[CI->getZExtValue()] = 2153 EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1, Context); 2154 return ConstantArray::get(ATy, Elts); 2155 } 2156} 2157 2158/// CommitValueTo - We have decided that Addr (which satisfies the predicate 2159/// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen. 2160static void CommitValueTo(Constant *Val, Constant *Addr, 2161 LLVMContext &Context) { 2162 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { 2163 assert(GV->hasInitializer()); 2164 GV->setInitializer(Val); 2165 return; 2166 } 2167 2168 ConstantExpr *CE = cast<ConstantExpr>(Addr); 2169 GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); 2170 2171 Constant *Init = GV->getInitializer(); 2172 Init = EvaluateStoreInto(Init, Val, CE, 2, Context); 2173 GV->setInitializer(Init); 2174} 2175 2176/// ComputeLoadResult - Return the value that would be computed by a load from 2177/// P after the stores reflected by 'memory' have been performed. If we can't 2178/// decide, return null. 2179static Constant *ComputeLoadResult(Constant *P, 2180 const DenseMap<Constant*, Constant*> &Memory, 2181 LLVMContext &Context) { 2182 // If this memory location has been recently stored, use the stored value: it 2183 // is the most up-to-date. 2184 DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P); 2185 if (I != Memory.end()) return I->second; 2186 2187 // Access it. 2188 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) { 2189 if (GV->hasDefinitiveInitializer()) 2190 return GV->getInitializer(); 2191 return 0; 2192 } 2193 2194 // Handle a constantexpr getelementptr. 2195 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P)) 2196 if (CE->getOpcode() == Instruction::GetElementPtr && 2197 isa<GlobalVariable>(CE->getOperand(0))) { 2198 GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); 2199 if (GV->hasDefinitiveInitializer()) 2200 return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); 2201 } 2202 2203 return 0; // don't know how to evaluate. 2204} 2205 2206/// EvaluateFunction - Evaluate a call to function F, returning true if 2207/// successful, false if we can't evaluate it. ActualArgs contains the formal 2208/// arguments for the function. 2209static bool EvaluateFunction(Function *F, Constant *&RetVal, 2210 const SmallVectorImpl<Constant*> &ActualArgs, 2211 std::vector<Function*> &CallStack, 2212 DenseMap<Constant*, Constant*> &MutatedMemory, 2213 std::vector<GlobalVariable*> &AllocaTmps) { 2214 // Check to see if this function is already executing (recursion). If so, 2215 // bail out. TODO: we might want to accept limited recursion. 2216 if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end()) 2217 return false; 2218 2219 LLVMContext &Context = F->getContext(); 2220 2221 CallStack.push_back(F); 2222 2223 /// Values - As we compute SSA register values, we store their contents here. 2224 DenseMap<Value*, Constant*> Values; 2225 2226 // Initialize arguments to the incoming values specified. 2227 unsigned ArgNo = 0; 2228 for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; 2229 ++AI, ++ArgNo) 2230 Values[AI] = ActualArgs[ArgNo]; 2231 2232 /// ExecutedBlocks - We only handle non-looping, non-recursive code. As such, 2233 /// we can only evaluate any one basic block at most once. This set keeps 2234 /// track of what we have executed so we can detect recursive cases etc. 2235 SmallPtrSet<BasicBlock*, 32> ExecutedBlocks; 2236 2237 // CurInst - The current instruction we're evaluating. 2238 BasicBlock::iterator CurInst = F->begin()->begin(); 2239 2240 // This is the main evaluation loop. 2241 while (1) { 2242 Constant *InstResult = 0; 2243 2244 if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) { 2245 if (SI->isVolatile()) return false; // no volatile accesses. 2246 Constant *Ptr = getVal(Values, SI->getOperand(1)); 2247 if (!isSimpleEnoughPointerToCommit(Ptr, Context)) 2248 // If this is too complex for us to commit, reject it. 2249 return false; 2250 Constant *Val = getVal(Values, SI->getOperand(0)); 2251 MutatedMemory[Ptr] = Val; 2252 } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) { 2253 InstResult = ConstantExpr::get(BO->getOpcode(), 2254 getVal(Values, BO->getOperand(0)), 2255 getVal(Values, BO->getOperand(1))); 2256 } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) { 2257 InstResult = ConstantExpr::getCompare(CI->getPredicate(), 2258 getVal(Values, CI->getOperand(0)), 2259 getVal(Values, CI->getOperand(1))); 2260 } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) { 2261 InstResult = ConstantExpr::getCast(CI->getOpcode(), 2262 getVal(Values, CI->getOperand(0)), 2263 CI->getType()); 2264 } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) { 2265 InstResult = 2266 ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)), 2267 getVal(Values, SI->getOperand(1)), 2268 getVal(Values, SI->getOperand(2))); 2269 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) { 2270 Constant *P = getVal(Values, GEP->getOperand(0)); 2271 SmallVector<Constant*, 8> GEPOps; 2272 for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); 2273 i != e; ++i) 2274 GEPOps.push_back(getVal(Values, *i)); 2275 InstResult = cast<GEPOperator>(GEP)->isInBounds() ? 2276 ConstantExpr::getInBoundsGetElementPtr(P, &GEPOps[0], GEPOps.size()) : 2277 ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size()); 2278 } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { 2279 if (LI->isVolatile()) return false; // no volatile accesses. 2280 InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)), 2281 MutatedMemory, Context); 2282 if (InstResult == 0) return false; // Could not evaluate load. 2283 } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) { 2284 if (AI->isArrayAllocation()) return false; // Cannot handle array allocs. 2285 const Type *Ty = AI->getType()->getElementType(); 2286 AllocaTmps.push_back(new GlobalVariable(Context, Ty, false, 2287 GlobalValue::InternalLinkage, 2288 UndefValue::get(Ty), 2289 AI->getName())); 2290 InstResult = AllocaTmps.back(); 2291 } else if (CallInst *CI = dyn_cast<CallInst>(CurInst)) { 2292 2293 // Debug info can safely be ignored here. 2294 if (isa<DbgInfoIntrinsic>(CI)) { 2295 ++CurInst; 2296 continue; 2297 } 2298 2299 // Cannot handle inline asm. 2300 if (isa<InlineAsm>(CI->getOperand(0))) return false; 2301 2302 // Resolve function pointers. 2303 Function *Callee = dyn_cast<Function>(getVal(Values, CI->getOperand(0))); 2304 if (!Callee) return false; // Cannot resolve. 2305 2306 SmallVector<Constant*, 8> Formals; 2307 for (User::op_iterator i = CI->op_begin() + 1, e = CI->op_end(); 2308 i != e; ++i) 2309 Formals.push_back(getVal(Values, *i)); 2310 2311 if (Callee->isDeclaration()) { 2312 // If this is a function we can constant fold, do it. 2313 if (Constant *C = ConstantFoldCall(Callee, Formals.data(), 2314 Formals.size())) { 2315 InstResult = C; 2316 } else { 2317 return false; 2318 } 2319 } else { 2320 if (Callee->getFunctionType()->isVarArg()) 2321 return false; 2322 2323 Constant *RetVal; 2324 // Execute the call, if successful, use the return value. 2325 if (!EvaluateFunction(Callee, RetVal, Formals, CallStack, 2326 MutatedMemory, AllocaTmps)) 2327 return false; 2328 InstResult = RetVal; 2329 } 2330 } else if (isa<TerminatorInst>(CurInst)) { 2331 BasicBlock *NewBB = 0; 2332 if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) { 2333 if (BI->isUnconditional()) { 2334 NewBB = BI->getSuccessor(0); 2335 } else { 2336 ConstantInt *Cond = 2337 dyn_cast<ConstantInt>(getVal(Values, BI->getCondition())); 2338 if (!Cond) return false; // Cannot determine. 2339 2340 NewBB = BI->getSuccessor(!Cond->getZExtValue()); 2341 } 2342 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) { 2343 ConstantInt *Val = 2344 dyn_cast<ConstantInt>(getVal(Values, SI->getCondition())); 2345 if (!Val) return false; // Cannot determine. 2346 NewBB = SI->getSuccessor(SI->findCaseValue(Val)); 2347 } else if (ReturnInst *RI = dyn_cast<ReturnInst>(CurInst)) { 2348 if (RI->getNumOperands()) 2349 RetVal = getVal(Values, RI->getOperand(0)); 2350 2351 CallStack.pop_back(); // return from fn. 2352 return true; // We succeeded at evaluating this ctor! 2353 } else { 2354 // invoke, unwind, unreachable. 2355 return false; // Cannot handle this terminator. 2356 } 2357 2358 // Okay, we succeeded in evaluating this control flow. See if we have 2359 // executed the new block before. If so, we have a looping function, 2360 // which we cannot evaluate in reasonable time. 2361 if (!ExecutedBlocks.insert(NewBB)) 2362 return false; // looped! 2363 2364 // Okay, we have never been in this block before. Check to see if there 2365 // are any PHI nodes. If so, evaluate them with information about where 2366 // we came from. 2367 BasicBlock *OldBB = CurInst->getParent(); 2368 CurInst = NewBB->begin(); 2369 PHINode *PN; 2370 for (; (PN = dyn_cast<PHINode>(CurInst)); ++CurInst) 2371 Values[PN] = getVal(Values, PN->getIncomingValueForBlock(OldBB)); 2372 2373 // Do NOT increment CurInst. We know that the terminator had no value. 2374 continue; 2375 } else { 2376 // Did not know how to evaluate this! 2377 return false; 2378 } 2379 2380 if (!CurInst->use_empty()) 2381 Values[CurInst] = InstResult; 2382 2383 // Advance program counter. 2384 ++CurInst; 2385 } 2386} 2387 2388/// EvaluateStaticConstructor - Evaluate static constructors in the function, if 2389/// we can. Return true if we can, false otherwise. 2390static bool EvaluateStaticConstructor(Function *F) { 2391 /// MutatedMemory - For each store we execute, we update this map. Loads 2392 /// check this to get the most up-to-date value. If evaluation is successful, 2393 /// this state is committed to the process. 2394 DenseMap<Constant*, Constant*> MutatedMemory; 2395 2396 /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable 2397 /// to represent its body. This vector is needed so we can delete the 2398 /// temporary globals when we are done. 2399 std::vector<GlobalVariable*> AllocaTmps; 2400 2401 /// CallStack - This is used to detect recursion. In pathological situations 2402 /// we could hit exponential behavior, but at least there is nothing 2403 /// unbounded. 2404 std::vector<Function*> CallStack; 2405 2406 // Call the function. 2407 Constant *RetValDummy; 2408 bool EvalSuccess = EvaluateFunction(F, RetValDummy, 2409 SmallVector<Constant*, 0>(), CallStack, 2410 MutatedMemory, AllocaTmps); 2411 if (EvalSuccess) { 2412 // We succeeded at evaluation: commit the result. 2413 DEBUG(errs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" 2414 << F->getName() << "' to " << MutatedMemory.size() 2415 << " stores.\n"); 2416 for (DenseMap<Constant*, Constant*>::iterator I = MutatedMemory.begin(), 2417 E = MutatedMemory.end(); I != E; ++I) 2418 CommitValueTo(I->second, I->first, F->getContext()); 2419 } 2420 2421 // At this point, we are done interpreting. If we created any 'alloca' 2422 // temporaries, release them now. 2423 while (!AllocaTmps.empty()) { 2424 GlobalVariable *Tmp = AllocaTmps.back(); 2425 AllocaTmps.pop_back(); 2426 2427 // If there are still users of the alloca, the program is doing something 2428 // silly, e.g. storing the address of the alloca somewhere and using it 2429 // later. Since this is undefined, we'll just make it be null. 2430 if (!Tmp->use_empty()) 2431 Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType())); 2432 delete Tmp; 2433 } 2434 2435 return EvalSuccess; 2436} 2437 2438 2439 2440/// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible. 2441/// Return true if anything changed. 2442bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { 2443 std::vector<Function*> Ctors = ParseGlobalCtors(GCL); 2444 bool MadeChange = false; 2445 if (Ctors.empty()) return false; 2446 2447 // Loop over global ctors, optimizing them when we can. 2448 for (unsigned i = 0; i != Ctors.size(); ++i) { 2449 Function *F = Ctors[i]; 2450 // Found a null terminator in the middle of the list, prune off the rest of 2451 // the list. 2452 if (F == 0) { 2453 if (i != Ctors.size()-1) { 2454 Ctors.resize(i+1); 2455 MadeChange = true; 2456 } 2457 break; 2458 } 2459 2460 // We cannot simplify external ctor functions. 2461 if (F->empty()) continue; 2462 2463 // If we can evaluate the ctor at compile time, do. 2464 if (EvaluateStaticConstructor(F)) { 2465 Ctors.erase(Ctors.begin()+i); 2466 MadeChange = true; 2467 --i; 2468 ++NumCtorsEvaluated; 2469 continue; 2470 } 2471 } 2472 2473 if (!MadeChange) return false; 2474 2475 GCL = InstallGlobalCtors(GCL, Ctors, GCL->getContext()); 2476 return true; 2477} 2478 2479bool GlobalOpt::OptimizeGlobalAliases(Module &M) { 2480 bool Changed = false; 2481 2482 for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); 2483 I != E;) { 2484 Module::alias_iterator J = I++; 2485 // Aliases without names cannot be referenced outside this module. 2486 if (!J->hasName() && !J->isDeclaration()) 2487 J->setLinkage(GlobalValue::InternalLinkage); 2488 // If the aliasee may change at link time, nothing can be done - bail out. 2489 if (J->mayBeOverridden()) 2490 continue; 2491 2492 Constant *Aliasee = J->getAliasee(); 2493 GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts()); 2494 Target->removeDeadConstantUsers(); 2495 bool hasOneUse = Target->hasOneUse() && Aliasee->hasOneUse(); 2496 2497 // Make all users of the alias use the aliasee instead. 2498 if (!J->use_empty()) { 2499 J->replaceAllUsesWith(Aliasee); 2500 ++NumAliasesResolved; 2501 Changed = true; 2502 } 2503 2504 // If the aliasee has internal linkage, give it the name and linkage 2505 // of the alias, and delete the alias. This turns: 2506 // define internal ... @f(...) 2507 // @a = alias ... @f 2508 // into: 2509 // define ... @a(...) 2510 if (!Target->hasLocalLinkage()) 2511 continue; 2512 2513 // The transform is only useful if the alias does not have internal linkage. 2514 if (J->hasLocalLinkage()) 2515 continue; 2516 2517 // Do not perform the transform if multiple aliases potentially target the 2518 // aliasee. This check also ensures that it is safe to replace the section 2519 // and other attributes of the aliasee with those of the alias. 2520 if (!hasOneUse) 2521 continue; 2522 2523 // Give the aliasee the name, linkage and other attributes of the alias. 2524 Target->takeName(J); 2525 Target->setLinkage(J->getLinkage()); 2526 Target->GlobalValue::copyAttributesFrom(J); 2527 2528 // Delete the alias. 2529 M.getAliasList().erase(J); 2530 ++NumAliasesRemoved; 2531 Changed = true; 2532 } 2533 2534 return Changed; 2535} 2536 2537bool GlobalOpt::runOnModule(Module &M) { 2538 bool Changed = false; 2539 2540 // Try to find the llvm.globalctors list. 2541 GlobalVariable *GlobalCtors = FindGlobalCtors(M); 2542 2543 bool LocalChange = true; 2544 while (LocalChange) { 2545 LocalChange = false; 2546 2547 // Delete functions that are trivially dead, ccc -> fastcc 2548 LocalChange |= OptimizeFunctions(M); 2549 2550 // Optimize global_ctors list. 2551 if (GlobalCtors) 2552 LocalChange |= OptimizeGlobalCtorsList(GlobalCtors); 2553 2554 // Optimize non-address-taken globals. 2555 LocalChange |= OptimizeGlobalVars(M); 2556 2557 // Resolve aliases, when possible. 2558 LocalChange |= OptimizeGlobalAliases(M); 2559 Changed |= LocalChange; 2560 } 2561 2562 // TODO: Move all global ctors functions to the end of the module for code 2563 // layout. 2564 2565 return Changed; 2566} 2567