GlobalOpt.cpp revision dce4a407a24b04eebc6a376f8e62b41aaa7b071f
1//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass transforms simple global variables that never have their address
11// taken.  If obviously true, it marks read/write globals as constant, deletes
12// variables only stored to, etc.
13//
14//===----------------------------------------------------------------------===//
15
16#include "llvm/Transforms/IPO.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/SmallPtrSet.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/ADT/Statistic.h"
22#include "llvm/Analysis/ConstantFolding.h"
23#include "llvm/Analysis/MemoryBuiltins.h"
24#include "llvm/IR/CallSite.h"
25#include "llvm/IR/CallingConv.h"
26#include "llvm/IR/Constants.h"
27#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/DerivedTypes.h"
29#include "llvm/IR/GetElementPtrTypeIterator.h"
30#include "llvm/IR/Instructions.h"
31#include "llvm/IR/IntrinsicInst.h"
32#include "llvm/IR/Module.h"
33#include "llvm/IR/Operator.h"
34#include "llvm/IR/ValueHandle.h"
35#include "llvm/Pass.h"
36#include "llvm/Support/Debug.h"
37#include "llvm/Support/ErrorHandling.h"
38#include "llvm/Support/MathExtras.h"
39#include "llvm/Support/raw_ostream.h"
40#include "llvm/Target/TargetLibraryInfo.h"
41#include "llvm/Transforms/Utils/CtorUtils.h"
42#include "llvm/Transforms/Utils/GlobalStatus.h"
43#include "llvm/Transforms/Utils/ModuleUtils.h"
44#include <algorithm>
45#include <deque>
46using namespace llvm;
47
48#define DEBUG_TYPE "globalopt"
49
50STATISTIC(NumMarked    , "Number of globals marked constant");
51STATISTIC(NumUnnamed   , "Number of globals marked unnamed_addr");
52STATISTIC(NumSRA       , "Number of aggregate globals broken into scalars");
53STATISTIC(NumHeapSRA   , "Number of heap objects SRA'd");
54STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
55STATISTIC(NumDeleted   , "Number of globals deleted");
56STATISTIC(NumFnDeleted , "Number of functions deleted");
57STATISTIC(NumGlobUses  , "Number of global uses devirtualized");
58STATISTIC(NumLocalized , "Number of globals localized");
59STATISTIC(NumShrunkToBool  , "Number of global vars shrunk to booleans");
60STATISTIC(NumFastCallFns   , "Number of functions converted to fastcc");
61STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
62STATISTIC(NumNestRemoved   , "Number of nest attributes removed");
63STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
64STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
65STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
66
67namespace {
68  struct GlobalOpt : public ModulePass {
69    void getAnalysisUsage(AnalysisUsage &AU) const override {
70      AU.addRequired<TargetLibraryInfo>();
71    }
72    static char ID; // Pass identification, replacement for typeid
73    GlobalOpt() : ModulePass(ID) {
74      initializeGlobalOptPass(*PassRegistry::getPassRegistry());
75    }
76
77    bool runOnModule(Module &M) override;
78
79  private:
80    bool OptimizeFunctions(Module &M);
81    bool OptimizeGlobalVars(Module &M);
82    bool OptimizeGlobalAliases(Module &M);
83    bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
84    bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
85                               const GlobalStatus &GS);
86    bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
87
88    const DataLayout *DL;
89    TargetLibraryInfo *TLI;
90  };
91}
92
93char GlobalOpt::ID = 0;
94INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
95                "Global Variable Optimizer", false, false)
96INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
97INITIALIZE_PASS_END(GlobalOpt, "globalopt",
98                "Global Variable Optimizer", false, false)
99
100ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
101
102/// isLeakCheckerRoot - Is this global variable possibly used by a leak checker
103/// as a root?  If so, we might not really want to eliminate the stores to it.
104static bool isLeakCheckerRoot(GlobalVariable *GV) {
105  // A global variable is a root if it is a pointer, or could plausibly contain
106  // a pointer.  There are two challenges; one is that we could have a struct
107  // the has an inner member which is a pointer.  We recurse through the type to
108  // detect these (up to a point).  The other is that we may actually be a union
109  // of a pointer and another type, and so our LLVM type is an integer which
110  // gets converted into a pointer, or our type is an [i8 x #] with a pointer
111  // potentially contained here.
112
113  if (GV->hasPrivateLinkage())
114    return false;
115
116  SmallVector<Type *, 4> Types;
117  Types.push_back(cast<PointerType>(GV->getType())->getElementType());
118
119  unsigned Limit = 20;
120  do {
121    Type *Ty = Types.pop_back_val();
122    switch (Ty->getTypeID()) {
123      default: break;
124      case Type::PointerTyID: return true;
125      case Type::ArrayTyID:
126      case Type::VectorTyID: {
127        SequentialType *STy = cast<SequentialType>(Ty);
128        Types.push_back(STy->getElementType());
129        break;
130      }
131      case Type::StructTyID: {
132        StructType *STy = cast<StructType>(Ty);
133        if (STy->isOpaque()) return true;
134        for (StructType::element_iterator I = STy->element_begin(),
135                 E = STy->element_end(); I != E; ++I) {
136          Type *InnerTy = *I;
137          if (isa<PointerType>(InnerTy)) return true;
138          if (isa<CompositeType>(InnerTy))
139            Types.push_back(InnerTy);
140        }
141        break;
142      }
143    }
144    if (--Limit == 0) return true;
145  } while (!Types.empty());
146  return false;
147}
148
149/// Given a value that is stored to a global but never read, determine whether
150/// it's safe to remove the store and the chain of computation that feeds the
151/// store.
152static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) {
153  do {
154    if (isa<Constant>(V))
155      return true;
156    if (!V->hasOneUse())
157      return false;
158    if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) ||
159        isa<GlobalValue>(V))
160      return false;
161    if (isAllocationFn(V, TLI))
162      return true;
163
164    Instruction *I = cast<Instruction>(V);
165    if (I->mayHaveSideEffects())
166      return false;
167    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
168      if (!GEP->hasAllConstantIndices())
169        return false;
170    } else if (I->getNumOperands() != 1) {
171      return false;
172    }
173
174    V = I->getOperand(0);
175  } while (1);
176}
177
178/// CleanupPointerRootUsers - This GV is a pointer root.  Loop over all users
179/// of the global and clean up any that obviously don't assign the global a
180/// value that isn't dynamically allocated.
181///
182static bool CleanupPointerRootUsers(GlobalVariable *GV,
183                                    const TargetLibraryInfo *TLI) {
184  // A brief explanation of leak checkers.  The goal is to find bugs where
185  // pointers are forgotten, causing an accumulating growth in memory
186  // usage over time.  The common strategy for leak checkers is to whitelist the
187  // memory pointed to by globals at exit.  This is popular because it also
188  // solves another problem where the main thread of a C++ program may shut down
189  // before other threads that are still expecting to use those globals.  To
190  // handle that case, we expect the program may create a singleton and never
191  // destroy it.
192
193  bool Changed = false;
194
195  // If Dead[n].first is the only use of a malloc result, we can delete its
196  // chain of computation and the store to the global in Dead[n].second.
197  SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead;
198
199  // Constants can't be pointers to dynamically allocated memory.
200  for (Value::user_iterator UI = GV->user_begin(), E = GV->user_end();
201       UI != E;) {
202    User *U = *UI++;
203    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
204      Value *V = SI->getValueOperand();
205      if (isa<Constant>(V)) {
206        Changed = true;
207        SI->eraseFromParent();
208      } else if (Instruction *I = dyn_cast<Instruction>(V)) {
209        if (I->hasOneUse())
210          Dead.push_back(std::make_pair(I, SI));
211      }
212    } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) {
213      if (isa<Constant>(MSI->getValue())) {
214        Changed = true;
215        MSI->eraseFromParent();
216      } else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) {
217        if (I->hasOneUse())
218          Dead.push_back(std::make_pair(I, MSI));
219      }
220    } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) {
221      GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource());
222      if (MemSrc && MemSrc->isConstant()) {
223        Changed = true;
224        MTI->eraseFromParent();
225      } else if (Instruction *I = dyn_cast<Instruction>(MemSrc)) {
226        if (I->hasOneUse())
227          Dead.push_back(std::make_pair(I, MTI));
228      }
229    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
230      if (CE->use_empty()) {
231        CE->destroyConstant();
232        Changed = true;
233      }
234    } else if (Constant *C = dyn_cast<Constant>(U)) {
235      if (isSafeToDestroyConstant(C)) {
236        C->destroyConstant();
237        // This could have invalidated UI, start over from scratch.
238        Dead.clear();
239        CleanupPointerRootUsers(GV, TLI);
240        return true;
241      }
242    }
243  }
244
245  for (int i = 0, e = Dead.size(); i != e; ++i) {
246    if (IsSafeComputationToRemove(Dead[i].first, TLI)) {
247      Dead[i].second->eraseFromParent();
248      Instruction *I = Dead[i].first;
249      do {
250        if (isAllocationFn(I, TLI))
251          break;
252        Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
253        if (!J)
254          break;
255        I->eraseFromParent();
256        I = J;
257      } while (1);
258      I->eraseFromParent();
259    }
260  }
261
262  return Changed;
263}
264
265/// CleanupConstantGlobalUsers - We just marked GV constant.  Loop over all
266/// users of the global, cleaning up the obvious ones.  This is largely just a
267/// quick scan over the use list to clean up the easy and obvious cruft.  This
268/// returns true if it made a change.
269static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
270                                       const DataLayout *DL,
271                                       TargetLibraryInfo *TLI) {
272  bool Changed = false;
273  // Note that we need to use a weak value handle for the worklist items. When
274  // we delete a constant array, we may also be holding pointer to one of its
275  // elements (or an element of one of its elements if we're dealing with an
276  // array of arrays) in the worklist.
277  SmallVector<WeakVH, 8> WorkList(V->user_begin(), V->user_end());
278  while (!WorkList.empty()) {
279    Value *UV = WorkList.pop_back_val();
280    if (!UV)
281      continue;
282
283    User *U = cast<User>(UV);
284
285    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
286      if (Init) {
287        // Replace the load with the initializer.
288        LI->replaceAllUsesWith(Init);
289        LI->eraseFromParent();
290        Changed = true;
291      }
292    } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
293      // Store must be unreachable or storing Init into the global.
294      SI->eraseFromParent();
295      Changed = true;
296    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
297      if (CE->getOpcode() == Instruction::GetElementPtr) {
298        Constant *SubInit = nullptr;
299        if (Init)
300          SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
301        Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, TLI);
302      } else if ((CE->getOpcode() == Instruction::BitCast &&
303                  CE->getType()->isPointerTy()) ||
304                 CE->getOpcode() == Instruction::AddrSpaceCast) {
305        // Pointer cast, delete any stores and memsets to the global.
306        Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, TLI);
307      }
308
309      if (CE->use_empty()) {
310        CE->destroyConstant();
311        Changed = true;
312      }
313    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
314      // Do not transform "gepinst (gep constexpr (GV))" here, because forming
315      // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold
316      // and will invalidate our notion of what Init is.
317      Constant *SubInit = nullptr;
318      if (!isa<ConstantExpr>(GEP->getOperand(0))) {
319        ConstantExpr *CE =
320          dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, DL, TLI));
321        if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
322          SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
323
324        // If the initializer is an all-null value and we have an inbounds GEP,
325        // we already know what the result of any load from that GEP is.
326        // TODO: Handle splats.
327        if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds())
328          SubInit = Constant::getNullValue(GEP->getType()->getElementType());
329      }
330      Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, TLI);
331
332      if (GEP->use_empty()) {
333        GEP->eraseFromParent();
334        Changed = true;
335      }
336    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
337      if (MI->getRawDest() == V) {
338        MI->eraseFromParent();
339        Changed = true;
340      }
341
342    } else if (Constant *C = dyn_cast<Constant>(U)) {
343      // If we have a chain of dead constantexprs or other things dangling from
344      // us, and if they are all dead, nuke them without remorse.
345      if (isSafeToDestroyConstant(C)) {
346        C->destroyConstant();
347        CleanupConstantGlobalUsers(V, Init, DL, TLI);
348        return true;
349      }
350    }
351  }
352  return Changed;
353}
354
355/// isSafeSROAElementUse - Return true if the specified instruction is a safe
356/// user of a derived expression from a global that we want to SROA.
357static bool isSafeSROAElementUse(Value *V) {
358  // We might have a dead and dangling constant hanging off of here.
359  if (Constant *C = dyn_cast<Constant>(V))
360    return isSafeToDestroyConstant(C);
361
362  Instruction *I = dyn_cast<Instruction>(V);
363  if (!I) return false;
364
365  // Loads are ok.
366  if (isa<LoadInst>(I)) return true;
367
368  // Stores *to* the pointer are ok.
369  if (StoreInst *SI = dyn_cast<StoreInst>(I))
370    return SI->getOperand(0) != V;
371
372  // Otherwise, it must be a GEP.
373  GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I);
374  if (!GEPI) return false;
375
376  if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) ||
377      !cast<Constant>(GEPI->getOperand(1))->isNullValue())
378    return false;
379
380  for (User *U : GEPI->users())
381    if (!isSafeSROAElementUse(U))
382      return false;
383  return true;
384}
385
386
387/// IsUserOfGlobalSafeForSRA - U is a direct user of the specified global value.
388/// Look at it and its uses and decide whether it is safe to SROA this global.
389///
390static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
391  // The user of the global must be a GEP Inst or a ConstantExpr GEP.
392  if (!isa<GetElementPtrInst>(U) &&
393      (!isa<ConstantExpr>(U) ||
394       cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
395    return false;
396
397  // Check to see if this ConstantExpr GEP is SRA'able.  In particular, we
398  // don't like < 3 operand CE's, and we don't like non-constant integer
399  // indices.  This enforces that all uses are 'gep GV, 0, C, ...' for some
400  // value of C.
401  if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) ||
402      !cast<Constant>(U->getOperand(1))->isNullValue() ||
403      !isa<ConstantInt>(U->getOperand(2)))
404    return false;
405
406  gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
407  ++GEPI;  // Skip over the pointer index.
408
409  // If this is a use of an array allocation, do a bit more checking for sanity.
410  if (ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) {
411    uint64_t NumElements = AT->getNumElements();
412    ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2));
413
414    // Check to make sure that index falls within the array.  If not,
415    // something funny is going on, so we won't do the optimization.
416    //
417    if (Idx->getZExtValue() >= NumElements)
418      return false;
419
420    // We cannot scalar repl this level of the array unless any array
421    // sub-indices are in-range constants.  In particular, consider:
422    // A[0][i].  We cannot know that the user isn't doing invalid things like
423    // allowing i to index an out-of-range subscript that accesses A[1].
424    //
425    // Scalar replacing *just* the outer index of the array is probably not
426    // going to be a win anyway, so just give up.
427    for (++GEPI; // Skip array index.
428         GEPI != E;
429         ++GEPI) {
430      uint64_t NumElements;
431      if (ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI))
432        NumElements = SubArrayTy->getNumElements();
433      else if (VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI))
434        NumElements = SubVectorTy->getNumElements();
435      else {
436        assert((*GEPI)->isStructTy() &&
437               "Indexed GEP type is not array, vector, or struct!");
438        continue;
439      }
440
441      ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
442      if (!IdxVal || IdxVal->getZExtValue() >= NumElements)
443        return false;
444    }
445  }
446
447  for (User *UU : U->users())
448    if (!isSafeSROAElementUse(UU))
449      return false;
450
451  return true;
452}
453
454/// GlobalUsersSafeToSRA - Look at all uses of the global and decide whether it
455/// is safe for us to perform this transformation.
456///
457static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
458  for (User *U : GV->users())
459    if (!IsUserOfGlobalSafeForSRA(U, GV))
460      return false;
461
462  return true;
463}
464
465
466/// SRAGlobal - Perform scalar replacement of aggregates on the specified global
467/// variable.  This opens the door for other optimizations by exposing the
468/// behavior of the program in a more fine-grained way.  We have determined that
469/// this transformation is safe already.  We return the first global variable we
470/// insert so that the caller can reprocess it.
471static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
472  // Make sure this global only has simple uses that we can SRA.
473  if (!GlobalUsersSafeToSRA(GV))
474    return nullptr;
475
476  assert(GV->hasLocalLinkage() && !GV->isConstant());
477  Constant *Init = GV->getInitializer();
478  Type *Ty = Init->getType();
479
480  std::vector<GlobalVariable*> NewGlobals;
481  Module::GlobalListType &Globals = GV->getParent()->getGlobalList();
482
483  // Get the alignment of the global, either explicit or target-specific.
484  unsigned StartAlignment = GV->getAlignment();
485  if (StartAlignment == 0)
486    StartAlignment = DL.getABITypeAlignment(GV->getType());
487
488  if (StructType *STy = dyn_cast<StructType>(Ty)) {
489    NewGlobals.reserve(STy->getNumElements());
490    const StructLayout &Layout = *DL.getStructLayout(STy);
491    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
492      Constant *In = Init->getAggregateElement(i);
493      assert(In && "Couldn't get element of initializer?");
494      GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false,
495                                               GlobalVariable::InternalLinkage,
496                                               In, GV->getName()+"."+Twine(i),
497                                               GV->getThreadLocalMode(),
498                                              GV->getType()->getAddressSpace());
499      Globals.insert(GV, NGV);
500      NewGlobals.push_back(NGV);
501
502      // Calculate the known alignment of the field.  If the original aggregate
503      // had 256 byte alignment for example, something might depend on that:
504      // propagate info to each field.
505      uint64_t FieldOffset = Layout.getElementOffset(i);
506      unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset);
507      if (NewAlign > DL.getABITypeAlignment(STy->getElementType(i)))
508        NGV->setAlignment(NewAlign);
509    }
510  } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
511    unsigned NumElements = 0;
512    if (ArrayType *ATy = dyn_cast<ArrayType>(STy))
513      NumElements = ATy->getNumElements();
514    else
515      NumElements = cast<VectorType>(STy)->getNumElements();
516
517    if (NumElements > 16 && GV->hasNUsesOrMore(16))
518      return nullptr; // It's not worth it.
519    NewGlobals.reserve(NumElements);
520
521    uint64_t EltSize = DL.getTypeAllocSize(STy->getElementType());
522    unsigned EltAlign = DL.getABITypeAlignment(STy->getElementType());
523    for (unsigned i = 0, e = NumElements; i != e; ++i) {
524      Constant *In = Init->getAggregateElement(i);
525      assert(In && "Couldn't get element of initializer?");
526
527      GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false,
528                                               GlobalVariable::InternalLinkage,
529                                               In, GV->getName()+"."+Twine(i),
530                                               GV->getThreadLocalMode(),
531                                              GV->getType()->getAddressSpace());
532      Globals.insert(GV, NGV);
533      NewGlobals.push_back(NGV);
534
535      // Calculate the known alignment of the field.  If the original aggregate
536      // had 256 byte alignment for example, something might depend on that:
537      // propagate info to each field.
538      unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i);
539      if (NewAlign > EltAlign)
540        NGV->setAlignment(NewAlign);
541    }
542  }
543
544  if (NewGlobals.empty())
545    return nullptr;
546
547  DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV);
548
549  Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
550
551  // Loop over all of the uses of the global, replacing the constantexpr geps,
552  // with smaller constantexpr geps or direct references.
553  while (!GV->use_empty()) {
554    User *GEP = GV->user_back();
555    assert(((isa<ConstantExpr>(GEP) &&
556             cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)||
557            isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!");
558
559    // Ignore the 1th operand, which has to be zero or else the program is quite
560    // broken (undefined).  Get the 2nd operand, which is the structure or array
561    // index.
562    unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
563    if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access.
564
565    Value *NewPtr = NewGlobals[Val];
566
567    // Form a shorter GEP if needed.
568    if (GEP->getNumOperands() > 3) {
569      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) {
570        SmallVector<Constant*, 8> Idxs;
571        Idxs.push_back(NullInt);
572        for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)
573          Idxs.push_back(CE->getOperand(i));
574        NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), Idxs);
575      } else {
576        GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);
577        SmallVector<Value*, 8> Idxs;
578        Idxs.push_back(NullInt);
579        for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
580          Idxs.push_back(GEPI->getOperand(i));
581        NewPtr = GetElementPtrInst::Create(NewPtr, Idxs,
582                                           GEPI->getName()+"."+Twine(Val),GEPI);
583      }
584    }
585    GEP->replaceAllUsesWith(NewPtr);
586
587    if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP))
588      GEPI->eraseFromParent();
589    else
590      cast<ConstantExpr>(GEP)->destroyConstant();
591  }
592
593  // Delete the old global, now that it is dead.
594  Globals.erase(GV);
595  ++NumSRA;
596
597  // Loop over the new globals array deleting any globals that are obviously
598  // dead.  This can arise due to scalarization of a structure or an array that
599  // has elements that are dead.
600  unsigned FirstGlobal = 0;
601  for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i)
602    if (NewGlobals[i]->use_empty()) {
603      Globals.erase(NewGlobals[i]);
604      if (FirstGlobal == i) ++FirstGlobal;
605    }
606
607  return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : nullptr;
608}
609
610/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified
611/// value will trap if the value is dynamically null.  PHIs keeps track of any
612/// phi nodes we've seen to avoid reprocessing them.
613static bool AllUsesOfValueWillTrapIfNull(const Value *V,
614                                         SmallPtrSet<const PHINode*, 8> &PHIs) {
615  for (const User *U : V->users())
616    if (isa<LoadInst>(U)) {
617      // Will trap.
618    } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
619      if (SI->getOperand(0) == V) {
620        //cerr << "NONTRAPPING USE: " << *U;
621        return false;  // Storing the value.
622      }
623    } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
624      if (CI->getCalledValue() != V) {
625        //cerr << "NONTRAPPING USE: " << *U;
626        return false;  // Not calling the ptr
627      }
628    } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
629      if (II->getCalledValue() != V) {
630        //cerr << "NONTRAPPING USE: " << *U;
631        return false;  // Not calling the ptr
632      }
633    } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) {
634      if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false;
635    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
636      if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
637    } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
638      // If we've already seen this phi node, ignore it, it has already been
639      // checked.
640      if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
641        return false;
642    } else if (isa<ICmpInst>(U) &&
643               isa<ConstantPointerNull>(U->getOperand(1))) {
644      // Ignore icmp X, null
645    } else {
646      //cerr << "NONTRAPPING USE: " << *U;
647      return false;
648    }
649
650  return true;
651}
652
653/// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads
654/// from GV will trap if the loaded value is null.  Note that this also permits
655/// comparisons of the loaded value against null, as a special case.
656static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
657  for (const User *U : GV->users())
658    if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
659      SmallPtrSet<const PHINode*, 8> PHIs;
660      if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
661        return false;
662    } else if (isa<StoreInst>(U)) {
663      // Ignore stores to the global.
664    } else {
665      // We don't know or understand this user, bail out.
666      //cerr << "UNKNOWN USER OF GLOBAL!: " << *U;
667      return false;
668    }
669  return true;
670}
671
672static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
673  bool Changed = false;
674  for (auto UI = V->user_begin(), E = V->user_end(); UI != E; ) {
675    Instruction *I = cast<Instruction>(*UI++);
676    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
677      LI->setOperand(0, NewV);
678      Changed = true;
679    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
680      if (SI->getOperand(1) == V) {
681        SI->setOperand(1, NewV);
682        Changed = true;
683      }
684    } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
685      CallSite CS(I);
686      if (CS.getCalledValue() == V) {
687        // Calling through the pointer!  Turn into a direct call, but be careful
688        // that the pointer is not also being passed as an argument.
689        CS.setCalledFunction(NewV);
690        Changed = true;
691        bool PassedAsArg = false;
692        for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
693          if (CS.getArgument(i) == V) {
694            PassedAsArg = true;
695            CS.setArgument(i, NewV);
696          }
697
698        if (PassedAsArg) {
699          // Being passed as an argument also.  Be careful to not invalidate UI!
700          UI = V->user_begin();
701        }
702      }
703    } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
704      Changed |= OptimizeAwayTrappingUsesOfValue(CI,
705                                ConstantExpr::getCast(CI->getOpcode(),
706                                                      NewV, CI->getType()));
707      if (CI->use_empty()) {
708        Changed = true;
709        CI->eraseFromParent();
710      }
711    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
712      // Should handle GEP here.
713      SmallVector<Constant*, 8> Idxs;
714      Idxs.reserve(GEPI->getNumOperands()-1);
715      for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
716           i != e; ++i)
717        if (Constant *C = dyn_cast<Constant>(*i))
718          Idxs.push_back(C);
719        else
720          break;
721      if (Idxs.size() == GEPI->getNumOperands()-1)
722        Changed |= OptimizeAwayTrappingUsesOfValue(GEPI,
723                          ConstantExpr::getGetElementPtr(NewV, Idxs));
724      if (GEPI->use_empty()) {
725        Changed = true;
726        GEPI->eraseFromParent();
727      }
728    }
729  }
730
731  return Changed;
732}
733
734
735/// OptimizeAwayTrappingUsesOfLoads - The specified global has only one non-null
736/// value stored into it.  If there are uses of the loaded value that would trap
737/// if the loaded value is dynamically null, then we know that they cannot be
738/// reachable with a null optimize away the load.
739static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
740                                            const DataLayout *DL,
741                                            TargetLibraryInfo *TLI) {
742  bool Changed = false;
743
744  // Keep track of whether we are able to remove all the uses of the global
745  // other than the store that defines it.
746  bool AllNonStoreUsesGone = true;
747
748  // Replace all uses of loads with uses of uses of the stored value.
749  for (Value::user_iterator GUI = GV->user_begin(), E = GV->user_end(); GUI != E;){
750    User *GlobalUser = *GUI++;
751    if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
752      Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
753      // If we were able to delete all uses of the loads
754      if (LI->use_empty()) {
755        LI->eraseFromParent();
756        Changed = true;
757      } else {
758        AllNonStoreUsesGone = false;
759      }
760    } else if (isa<StoreInst>(GlobalUser)) {
761      // Ignore the store that stores "LV" to the global.
762      assert(GlobalUser->getOperand(1) == GV &&
763             "Must be storing *to* the global");
764    } else {
765      AllNonStoreUsesGone = false;
766
767      // If we get here we could have other crazy uses that are transitively
768      // loaded.
769      assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
770              isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) ||
771              isa<BitCastInst>(GlobalUser) ||
772              isa<GetElementPtrInst>(GlobalUser)) &&
773             "Only expect load and stores!");
774    }
775  }
776
777  if (Changed) {
778    DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV);
779    ++NumGlobUses;
780  }
781
782  // If we nuked all of the loads, then none of the stores are needed either,
783  // nor is the global.
784  if (AllNonStoreUsesGone) {
785    if (isLeakCheckerRoot(GV)) {
786      Changed |= CleanupPointerRootUsers(GV, TLI);
787    } else {
788      Changed = true;
789      CleanupConstantGlobalUsers(GV, nullptr, DL, TLI);
790    }
791    if (GV->use_empty()) {
792      DEBUG(dbgs() << "  *** GLOBAL NOW DEAD!\n");
793      Changed = true;
794      GV->eraseFromParent();
795      ++NumDeleted;
796    }
797  }
798  return Changed;
799}
800
801/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
802/// instructions that are foldable.
803static void ConstantPropUsersOf(Value *V, const DataLayout *DL,
804                                TargetLibraryInfo *TLI) {
805  for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; )
806    if (Instruction *I = dyn_cast<Instruction>(*UI++))
807      if (Constant *NewC = ConstantFoldInstruction(I, DL, TLI)) {
808        I->replaceAllUsesWith(NewC);
809
810        // Advance UI to the next non-I use to avoid invalidating it!
811        // Instructions could multiply use V.
812        while (UI != E && *UI == I)
813          ++UI;
814        I->eraseFromParent();
815      }
816}
817
818/// OptimizeGlobalAddressOfMalloc - This function takes the specified global
819/// variable, and transforms the program as if it always contained the result of
820/// the specified malloc.  Because it is always the result of the specified
821/// malloc, there is no reason to actually DO the malloc.  Instead, turn the
822/// malloc into a global, and any loads of GV as uses of the new global.
823static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
824                                                     CallInst *CI,
825                                                     Type *AllocTy,
826                                                     ConstantInt *NElements,
827                                                     const DataLayout *DL,
828                                                     TargetLibraryInfo *TLI) {
829  DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << "  CALL = " << *CI << '\n');
830
831  Type *GlobalType;
832  if (NElements->getZExtValue() == 1)
833    GlobalType = AllocTy;
834  else
835    // If we have an array allocation, the global variable is of an array.
836    GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue());
837
838  // Create the new global variable.  The contents of the malloc'd memory is
839  // undefined, so initialize with an undef value.
840  GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),
841                                             GlobalType, false,
842                                             GlobalValue::InternalLinkage,
843                                             UndefValue::get(GlobalType),
844                                             GV->getName()+".body",
845                                             GV,
846                                             GV->getThreadLocalMode());
847
848  // If there are bitcast users of the malloc (which is typical, usually we have
849  // a malloc + bitcast) then replace them with uses of the new global.  Update
850  // other users to use the global as well.
851  BitCastInst *TheBC = nullptr;
852  while (!CI->use_empty()) {
853    Instruction *User = cast<Instruction>(CI->user_back());
854    if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
855      if (BCI->getType() == NewGV->getType()) {
856        BCI->replaceAllUsesWith(NewGV);
857        BCI->eraseFromParent();
858      } else {
859        BCI->setOperand(0, NewGV);
860      }
861    } else {
862      if (!TheBC)
863        TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
864      User->replaceUsesOfWith(CI, TheBC);
865    }
866  }
867
868  Constant *RepValue = NewGV;
869  if (NewGV->getType() != GV->getType()->getElementType())
870    RepValue = ConstantExpr::getBitCast(RepValue,
871                                        GV->getType()->getElementType());
872
873  // If there is a comparison against null, we will insert a global bool to
874  // keep track of whether the global was initialized yet or not.
875  GlobalVariable *InitBool =
876    new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
877                       GlobalValue::InternalLinkage,
878                       ConstantInt::getFalse(GV->getContext()),
879                       GV->getName()+".init", GV->getThreadLocalMode());
880  bool InitBoolUsed = false;
881
882  // Loop over all uses of GV, processing them in turn.
883  while (!GV->use_empty()) {
884    if (StoreInst *SI = dyn_cast<StoreInst>(GV->user_back())) {
885      // The global is initialized when the store to it occurs.
886      new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0,
887                    SI->getOrdering(), SI->getSynchScope(), SI);
888      SI->eraseFromParent();
889      continue;
890    }
891
892    LoadInst *LI = cast<LoadInst>(GV->user_back());
893    while (!LI->use_empty()) {
894      Use &LoadUse = *LI->use_begin();
895      ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser());
896      if (!ICI) {
897        LoadUse = RepValue;
898        continue;
899      }
900
901      // Replace the cmp X, 0 with a use of the bool value.
902      // Sink the load to where the compare was, if atomic rules allow us to.
903      Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", false, 0,
904                               LI->getOrdering(), LI->getSynchScope(),
905                               LI->isUnordered() ? (Instruction*)ICI : LI);
906      InitBoolUsed = true;
907      switch (ICI->getPredicate()) {
908      default: llvm_unreachable("Unknown ICmp Predicate!");
909      case ICmpInst::ICMP_ULT:
910      case ICmpInst::ICMP_SLT:   // X < null -> always false
911        LV = ConstantInt::getFalse(GV->getContext());
912        break;
913      case ICmpInst::ICMP_ULE:
914      case ICmpInst::ICMP_SLE:
915      case ICmpInst::ICMP_EQ:
916        LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
917        break;
918      case ICmpInst::ICMP_NE:
919      case ICmpInst::ICMP_UGE:
920      case ICmpInst::ICMP_SGE:
921      case ICmpInst::ICMP_UGT:
922      case ICmpInst::ICMP_SGT:
923        break;  // no change.
924      }
925      ICI->replaceAllUsesWith(LV);
926      ICI->eraseFromParent();
927    }
928    LI->eraseFromParent();
929  }
930
931  // If the initialization boolean was used, insert it, otherwise delete it.
932  if (!InitBoolUsed) {
933    while (!InitBool->use_empty())  // Delete initializations
934      cast<StoreInst>(InitBool->user_back())->eraseFromParent();
935    delete InitBool;
936  } else
937    GV->getParent()->getGlobalList().insert(GV, InitBool);
938
939  // Now the GV is dead, nuke it and the malloc..
940  GV->eraseFromParent();
941  CI->eraseFromParent();
942
943  // To further other optimizations, loop over all users of NewGV and try to
944  // constant prop them.  This will promote GEP instructions with constant
945  // indices into GEP constant-exprs, which will allow global-opt to hack on it.
946  ConstantPropUsersOf(NewGV, DL, TLI);
947  if (RepValue != NewGV)
948    ConstantPropUsersOf(RepValue, DL, TLI);
949
950  return NewGV;
951}
952
953/// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking
954/// to make sure that there are no complex uses of V.  We permit simple things
955/// like dereferencing the pointer, but not storing through the address, unless
956/// it is to the specified global.
957static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
958                                                      const GlobalVariable *GV,
959                                         SmallPtrSet<const PHINode*, 8> &PHIs) {
960  for (const User *U : V->users()) {
961    const Instruction *Inst = cast<Instruction>(U);
962
963    if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
964      continue; // Fine, ignore.
965    }
966
967    if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
968      if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
969        return false;  // Storing the pointer itself... bad.
970      continue; // Otherwise, storing through it, or storing into GV... fine.
971    }
972
973    // Must index into the array and into the struct.
974    if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) {
975      if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
976        return false;
977      continue;
978    }
979
980    if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
981      // PHIs are ok if all uses are ok.  Don't infinitely recurse through PHI
982      // cycles.
983      if (PHIs.insert(PN))
984        if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs))
985          return false;
986      continue;
987    }
988
989    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
990      if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
991        return false;
992      continue;
993    }
994
995    return false;
996  }
997  return true;
998}
999
1000/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV
1001/// somewhere.  Transform all uses of the allocation into loads from the
1002/// global and uses of the resultant pointer.  Further, delete the store into
1003/// GV.  This assumes that these value pass the
1004/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
1005static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
1006                                          GlobalVariable *GV) {
1007  while (!Alloc->use_empty()) {
1008    Instruction *U = cast<Instruction>(*Alloc->user_begin());
1009    Instruction *InsertPt = U;
1010    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
1011      // If this is the store of the allocation into the global, remove it.
1012      if (SI->getOperand(1) == GV) {
1013        SI->eraseFromParent();
1014        continue;
1015      }
1016    } else if (PHINode *PN = dyn_cast<PHINode>(U)) {
1017      // Insert the load in the corresponding predecessor, not right before the
1018      // PHI.
1019      InsertPt = PN->getIncomingBlock(*Alloc->use_begin())->getTerminator();
1020    } else if (isa<BitCastInst>(U)) {
1021      // Must be bitcast between the malloc and store to initialize the global.
1022      ReplaceUsesOfMallocWithGlobal(U, GV);
1023      U->eraseFromParent();
1024      continue;
1025    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
1026      // If this is a "GEP bitcast" and the user is a store to the global, then
1027      // just process it as a bitcast.
1028      if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse())
1029        if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->user_back()))
1030          if (SI->getOperand(1) == GV) {
1031            // Must be bitcast GEP between the malloc and store to initialize
1032            // the global.
1033            ReplaceUsesOfMallocWithGlobal(GEPI, GV);
1034            GEPI->eraseFromParent();
1035            continue;
1036          }
1037    }
1038
1039    // Insert a load from the global, and use it instead of the malloc.
1040    Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt);
1041    U->replaceUsesOfWith(Alloc, NL);
1042  }
1043}
1044
1045/// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi
1046/// of a load) are simple enough to perform heap SRA on.  This permits GEP's
1047/// that index through the array and struct field, icmps of null, and PHIs.
1048static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
1049                        SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs,
1050                        SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) {
1051  // We permit two users of the load: setcc comparing against the null
1052  // pointer, and a getelementptr of a specific form.
1053  for (const User *U : V->users()) {
1054    const Instruction *UI = cast<Instruction>(U);
1055
1056    // Comparison against null is ok.
1057    if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UI)) {
1058      if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
1059        return false;
1060      continue;
1061    }
1062
1063    // getelementptr is also ok, but only a simple form.
1064    if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(UI)) {
1065      // Must index into the array and into the struct.
1066      if (GEPI->getNumOperands() < 3)
1067        return false;
1068
1069      // Otherwise the GEP is ok.
1070      continue;
1071    }
1072
1073    if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
1074      if (!LoadUsingPHIsPerLoad.insert(PN))
1075        // This means some phi nodes are dependent on each other.
1076        // Avoid infinite looping!
1077        return false;
1078      if (!LoadUsingPHIs.insert(PN))
1079        // If we have already analyzed this PHI, then it is safe.
1080        continue;
1081
1082      // Make sure all uses of the PHI are simple enough to transform.
1083      if (!LoadUsesSimpleEnoughForHeapSRA(PN,
1084                                          LoadUsingPHIs, LoadUsingPHIsPerLoad))
1085        return false;
1086
1087      continue;
1088    }
1089
1090    // Otherwise we don't know what this is, not ok.
1091    return false;
1092  }
1093
1094  return true;
1095}
1096
1097
1098/// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from
1099/// GV are simple enough to perform HeapSRA, return true.
1100static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
1101                                                    Instruction *StoredVal) {
1102  SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
1103  SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad;
1104  for (const User *U : GV->users())
1105    if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
1106      if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs,
1107                                          LoadUsingPHIsPerLoad))
1108        return false;
1109      LoadUsingPHIsPerLoad.clear();
1110    }
1111
1112  // If we reach here, we know that all uses of the loads and transitive uses
1113  // (through PHI nodes) are simple enough to transform.  However, we don't know
1114  // that all inputs the to the PHI nodes are in the same equivalence sets.
1115  // Check to verify that all operands of the PHIs are either PHIS that can be
1116  // transformed, loads from GV, or MI itself.
1117  for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin()
1118       , E = LoadUsingPHIs.end(); I != E; ++I) {
1119    const PHINode *PN = *I;
1120    for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
1121      Value *InVal = PN->getIncomingValue(op);
1122
1123      // PHI of the stored value itself is ok.
1124      if (InVal == StoredVal) continue;
1125
1126      if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) {
1127        // One of the PHIs in our set is (optimistically) ok.
1128        if (LoadUsingPHIs.count(InPN))
1129          continue;
1130        return false;
1131      }
1132
1133      // Load from GV is ok.
1134      if (const LoadInst *LI = dyn_cast<LoadInst>(InVal))
1135        if (LI->getOperand(0) == GV)
1136          continue;
1137
1138      // UNDEF? NULL?
1139
1140      // Anything else is rejected.
1141      return false;
1142    }
1143  }
1144
1145  return true;
1146}
1147
1148static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
1149               DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
1150                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
1151  std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
1152
1153  if (FieldNo >= FieldVals.size())
1154    FieldVals.resize(FieldNo+1);
1155
1156  // If we already have this value, just reuse the previously scalarized
1157  // version.
1158  if (Value *FieldVal = FieldVals[FieldNo])
1159    return FieldVal;
1160
1161  // Depending on what instruction this is, we have several cases.
1162  Value *Result;
1163  if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
1164    // This is a scalarized version of the load from the global.  Just create
1165    // a new Load of the scalarized global.
1166    Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo,
1167                                           InsertedScalarizedValues,
1168                                           PHIsToRewrite),
1169                          LI->getName()+".f"+Twine(FieldNo), LI);
1170  } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
1171    // PN's type is pointer to struct.  Make a new PHI of pointer to struct
1172    // field.
1173
1174    PointerType *PTy = cast<PointerType>(PN->getType());
1175    StructType *ST = cast<StructType>(PTy->getElementType());
1176
1177    unsigned AS = PTy->getAddressSpace();
1178    PHINode *NewPN =
1179      PHINode::Create(PointerType::get(ST->getElementType(FieldNo), AS),
1180                     PN->getNumIncomingValues(),
1181                     PN->getName()+".f"+Twine(FieldNo), PN);
1182    Result = NewPN;
1183    PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
1184  } else {
1185    llvm_unreachable("Unknown usable value");
1186  }
1187
1188  return FieldVals[FieldNo] = Result;
1189}
1190
1191/// RewriteHeapSROALoadUser - Given a load instruction and a value derived from
1192/// the load, rewrite the derived value to use the HeapSRoA'd load.
1193static void RewriteHeapSROALoadUser(Instruction *LoadUser,
1194             DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
1195                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
1196  // If this is a comparison against null, handle it.
1197  if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
1198    assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
1199    // If we have a setcc of the loaded pointer, we can use a setcc of any
1200    // field.
1201    Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
1202                                   InsertedScalarizedValues, PHIsToRewrite);
1203
1204    Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
1205                              Constant::getNullValue(NPtr->getType()),
1206                              SCI->getName());
1207    SCI->replaceAllUsesWith(New);
1208    SCI->eraseFromParent();
1209    return;
1210  }
1211
1212  // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
1213  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
1214    assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
1215           && "Unexpected GEPI!");
1216
1217    // Load the pointer for this field.
1218    unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
1219    Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
1220                                     InsertedScalarizedValues, PHIsToRewrite);
1221
1222    // Create the new GEP idx vector.
1223    SmallVector<Value*, 8> GEPIdx;
1224    GEPIdx.push_back(GEPI->getOperand(1));
1225    GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
1226
1227    Value *NGEPI = GetElementPtrInst::Create(NewPtr, GEPIdx,
1228                                             GEPI->getName(), GEPI);
1229    GEPI->replaceAllUsesWith(NGEPI);
1230    GEPI->eraseFromParent();
1231    return;
1232  }
1233
1234  // Recursively transform the users of PHI nodes.  This will lazily create the
1235  // PHIs that are needed for individual elements.  Keep track of what PHIs we
1236  // see in InsertedScalarizedValues so that we don't get infinite loops (very
1237  // antisocial).  If the PHI is already in InsertedScalarizedValues, it has
1238  // already been seen first by another load, so its uses have already been
1239  // processed.
1240  PHINode *PN = cast<PHINode>(LoadUser);
1241  if (!InsertedScalarizedValues.insert(std::make_pair(PN,
1242                                              std::vector<Value*>())).second)
1243    return;
1244
1245  // If this is the first time we've seen this PHI, recursively process all
1246  // users.
1247  for (auto UI = PN->user_begin(), E = PN->user_end(); UI != E;) {
1248    Instruction *User = cast<Instruction>(*UI++);
1249    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
1250  }
1251}
1252
1253/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global.  Ptr
1254/// is a value loaded from the global.  Eliminate all uses of Ptr, making them
1255/// use FieldGlobals instead.  All uses of loaded values satisfy
1256/// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
1257static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
1258               DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
1259                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
1260  for (auto UI = Load->user_begin(), E = Load->user_end(); UI != E;) {
1261    Instruction *User = cast<Instruction>(*UI++);
1262    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
1263  }
1264
1265  if (Load->use_empty()) {
1266    Load->eraseFromParent();
1267    InsertedScalarizedValues.erase(Load);
1268  }
1269}
1270
1271/// PerformHeapAllocSRoA - CI is an allocation of an array of structures.  Break
1272/// it up into multiple allocations of arrays of the fields.
1273static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
1274                                            Value *NElems, const DataLayout *DL,
1275                                            const TargetLibraryInfo *TLI) {
1276  DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *CI << '\n');
1277  Type *MAT = getMallocAllocatedType(CI, TLI);
1278  StructType *STy = cast<StructType>(MAT);
1279
1280  // There is guaranteed to be at least one use of the malloc (storing
1281  // it into GV).  If there are other uses, change them to be uses of
1282  // the global to simplify later code.  This also deletes the store
1283  // into GV.
1284  ReplaceUsesOfMallocWithGlobal(CI, GV);
1285
1286  // Okay, at this point, there are no users of the malloc.  Insert N
1287  // new mallocs at the same place as CI, and N globals.
1288  std::vector<Value*> FieldGlobals;
1289  std::vector<Value*> FieldMallocs;
1290
1291  unsigned AS = GV->getType()->getPointerAddressSpace();
1292  for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
1293    Type *FieldTy = STy->getElementType(FieldNo);
1294    PointerType *PFieldTy = PointerType::get(FieldTy, AS);
1295
1296    GlobalVariable *NGV =
1297      new GlobalVariable(*GV->getParent(),
1298                         PFieldTy, false, GlobalValue::InternalLinkage,
1299                         Constant::getNullValue(PFieldTy),
1300                         GV->getName() + ".f" + Twine(FieldNo), GV,
1301                         GV->getThreadLocalMode());
1302    FieldGlobals.push_back(NGV);
1303
1304    unsigned TypeSize = DL->getTypeAllocSize(FieldTy);
1305    if (StructType *ST = dyn_cast<StructType>(FieldTy))
1306      TypeSize = DL->getStructLayout(ST)->getSizeInBytes();
1307    Type *IntPtrTy = DL->getIntPtrType(CI->getType());
1308    Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
1309                                        ConstantInt::get(IntPtrTy, TypeSize),
1310                                        NElems, nullptr,
1311                                        CI->getName() + ".f" + Twine(FieldNo));
1312    FieldMallocs.push_back(NMI);
1313    new StoreInst(NMI, NGV, CI);
1314  }
1315
1316  // The tricky aspect of this transformation is handling the case when malloc
1317  // fails.  In the original code, malloc failing would set the result pointer
1318  // of malloc to null.  In this case, some mallocs could succeed and others
1319  // could fail.  As such, we emit code that looks like this:
1320  //    F0 = malloc(field0)
1321  //    F1 = malloc(field1)
1322  //    F2 = malloc(field2)
1323  //    if (F0 == 0 || F1 == 0 || F2 == 0) {
1324  //      if (F0) { free(F0); F0 = 0; }
1325  //      if (F1) { free(F1); F1 = 0; }
1326  //      if (F2) { free(F2); F2 = 0; }
1327  //    }
1328  // The malloc can also fail if its argument is too large.
1329  Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0);
1330  Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0),
1331                                  ConstantZero, "isneg");
1332  for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
1333    Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
1334                             Constant::getNullValue(FieldMallocs[i]->getType()),
1335                               "isnull");
1336    RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
1337  }
1338
1339  // Split the basic block at the old malloc.
1340  BasicBlock *OrigBB = CI->getParent();
1341  BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont");
1342
1343  // Create the block to check the first condition.  Put all these blocks at the
1344  // end of the function as they are unlikely to be executed.
1345  BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
1346                                                "malloc_ret_null",
1347                                                OrigBB->getParent());
1348
1349  // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
1350  // branch on RunningOr.
1351  OrigBB->getTerminator()->eraseFromParent();
1352  BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
1353
1354  // Within the NullPtrBlock, we need to emit a comparison and branch for each
1355  // pointer, because some may be null while others are not.
1356  for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
1357    Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
1358    Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
1359                              Constant::getNullValue(GVVal->getType()));
1360    BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
1361                                               OrigBB->getParent());
1362    BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next",
1363                                               OrigBB->getParent());
1364    Instruction *BI = BranchInst::Create(FreeBlock, NextBlock,
1365                                         Cmp, NullPtrBlock);
1366
1367    // Fill in FreeBlock.
1368    CallInst::CreateFree(GVVal, BI);
1369    new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
1370                  FreeBlock);
1371    BranchInst::Create(NextBlock, FreeBlock);
1372
1373    NullPtrBlock = NextBlock;
1374  }
1375
1376  BranchInst::Create(ContBB, NullPtrBlock);
1377
1378  // CI is no longer needed, remove it.
1379  CI->eraseFromParent();
1380
1381  /// InsertedScalarizedLoads - As we process loads, if we can't immediately
1382  /// update all uses of the load, keep track of what scalarized loads are
1383  /// inserted for a given load.
1384  DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
1385  InsertedScalarizedValues[GV] = FieldGlobals;
1386
1387  std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
1388
1389  // Okay, the malloc site is completely handled.  All of the uses of GV are now
1390  // loads, and all uses of those loads are simple.  Rewrite them to use loads
1391  // of the per-field globals instead.
1392  for (auto UI = GV->user_begin(), E = GV->user_end(); UI != E;) {
1393    Instruction *User = cast<Instruction>(*UI++);
1394
1395    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
1396      RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
1397      continue;
1398    }
1399
1400    // Must be a store of null.
1401    StoreInst *SI = cast<StoreInst>(User);
1402    assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
1403           "Unexpected heap-sra user!");
1404
1405    // Insert a store of null into each global.
1406    for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
1407      PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
1408      Constant *Null = Constant::getNullValue(PT->getElementType());
1409      new StoreInst(Null, FieldGlobals[i], SI);
1410    }
1411    // Erase the original store.
1412    SI->eraseFromParent();
1413  }
1414
1415  // While we have PHIs that are interesting to rewrite, do it.
1416  while (!PHIsToRewrite.empty()) {
1417    PHINode *PN = PHIsToRewrite.back().first;
1418    unsigned FieldNo = PHIsToRewrite.back().second;
1419    PHIsToRewrite.pop_back();
1420    PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
1421    assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi");
1422
1423    // Add all the incoming values.  This can materialize more phis.
1424    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1425      Value *InVal = PN->getIncomingValue(i);
1426      InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
1427                               PHIsToRewrite);
1428      FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
1429    }
1430  }
1431
1432  // Drop all inter-phi links and any loads that made it this far.
1433  for (DenseMap<Value*, std::vector<Value*> >::iterator
1434       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
1435       I != E; ++I) {
1436    if (PHINode *PN = dyn_cast<PHINode>(I->first))
1437      PN->dropAllReferences();
1438    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
1439      LI->dropAllReferences();
1440  }
1441
1442  // Delete all the phis and loads now that inter-references are dead.
1443  for (DenseMap<Value*, std::vector<Value*> >::iterator
1444       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
1445       I != E; ++I) {
1446    if (PHINode *PN = dyn_cast<PHINode>(I->first))
1447      PN->eraseFromParent();
1448    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
1449      LI->eraseFromParent();
1450  }
1451
1452  // The old global is now dead, remove it.
1453  GV->eraseFromParent();
1454
1455  ++NumHeapSRA;
1456  return cast<GlobalVariable>(FieldGlobals[0]);
1457}
1458
1459/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a
1460/// pointer global variable with a single value stored it that is a malloc or
1461/// cast of malloc.
1462static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
1463                                               CallInst *CI,
1464                                               Type *AllocTy,
1465                                               AtomicOrdering Ordering,
1466                                               Module::global_iterator &GVI,
1467                                               const DataLayout *DL,
1468                                               TargetLibraryInfo *TLI) {
1469  if (!DL)
1470    return false;
1471
1472  // If this is a malloc of an abstract type, don't touch it.
1473  if (!AllocTy->isSized())
1474    return false;
1475
1476  // We can't optimize this global unless all uses of it are *known* to be
1477  // of the malloc value, not of the null initializer value (consider a use
1478  // that compares the global's value against zero to see if the malloc has
1479  // been reached).  To do this, we check to see if all uses of the global
1480  // would trap if the global were null: this proves that they must all
1481  // happen after the malloc.
1482  if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
1483    return false;
1484
1485  // We can't optimize this if the malloc itself is used in a complex way,
1486  // for example, being stored into multiple globals.  This allows the
1487  // malloc to be stored into the specified global, loaded icmp'd, and
1488  // GEP'd.  These are all things we could transform to using the global
1489  // for.
1490  SmallPtrSet<const PHINode*, 8> PHIs;
1491  if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
1492    return false;
1493
1494  // If we have a global that is only initialized with a fixed size malloc,
1495  // transform the program to use global memory instead of malloc'd memory.
1496  // This eliminates dynamic allocation, avoids an indirection accessing the
1497  // data, and exposes the resultant global to further GlobalOpt.
1498  // We cannot optimize the malloc if we cannot determine malloc array size.
1499  Value *NElems = getMallocArraySize(CI, DL, TLI, true);
1500  if (!NElems)
1501    return false;
1502
1503  if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
1504    // Restrict this transformation to only working on small allocations
1505    // (2048 bytes currently), as we don't want to introduce a 16M global or
1506    // something.
1507    if (NElements->getZExtValue() * DL->getTypeAllocSize(AllocTy) < 2048) {
1508      GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI);
1509      return true;
1510    }
1511
1512  // If the allocation is an array of structures, consider transforming this
1513  // into multiple malloc'd arrays, one for each field.  This is basically
1514  // SRoA for malloc'd memory.
1515
1516  if (Ordering != NotAtomic)
1517    return false;
1518
1519  // If this is an allocation of a fixed size array of structs, analyze as a
1520  // variable size array.  malloc [100 x struct],1 -> malloc struct, 100
1521  if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
1522    if (ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
1523      AllocTy = AT->getElementType();
1524
1525  StructType *AllocSTy = dyn_cast<StructType>(AllocTy);
1526  if (!AllocSTy)
1527    return false;
1528
1529  // This the structure has an unreasonable number of fields, leave it
1530  // alone.
1531  if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
1532      AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {
1533
1534    // If this is a fixed size array, transform the Malloc to be an alloc of
1535    // structs.  malloc [100 x struct],1 -> malloc struct, 100
1536    if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
1537      Type *IntPtrTy = DL->getIntPtrType(CI->getType());
1538      unsigned TypeSize = DL->getStructLayout(AllocSTy)->getSizeInBytes();
1539      Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
1540      Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
1541      Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
1542                                                   AllocSize, NumElements,
1543                                                   nullptr, CI->getName());
1544      Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
1545      CI->replaceAllUsesWith(Cast);
1546      CI->eraseFromParent();
1547      if (BitCastInst *BCI = dyn_cast<BitCastInst>(Malloc))
1548        CI = cast<CallInst>(BCI->getOperand(0));
1549      else
1550        CI = cast<CallInst>(Malloc);
1551    }
1552
1553    GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true),
1554                               DL, TLI);
1555    return true;
1556  }
1557
1558  return false;
1559}
1560
1561// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
1562// that only one value (besides its initializer) is ever stored to the global.
1563static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
1564                                     AtomicOrdering Ordering,
1565                                     Module::global_iterator &GVI,
1566                                     const DataLayout *DL,
1567                                     TargetLibraryInfo *TLI) {
1568  // Ignore no-op GEPs and bitcasts.
1569  StoredOnceVal = StoredOnceVal->stripPointerCasts();
1570
1571  // If we are dealing with a pointer global that is initialized to null and
1572  // only has one (non-null) value stored into it, then we can optimize any
1573  // users of the loaded value (often calls and loads) that would trap if the
1574  // value was null.
1575  if (GV->getInitializer()->getType()->isPointerTy() &&
1576      GV->getInitializer()->isNullValue()) {
1577    if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
1578      if (GV->getInitializer()->getType() != SOVC->getType())
1579        SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
1580
1581      // Optimize away any trapping uses of the loaded value.
1582      if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, TLI))
1583        return true;
1584    } else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) {
1585      Type *MallocType = getMallocAllocatedType(CI, TLI);
1586      if (MallocType &&
1587          TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, Ordering, GVI,
1588                                             DL, TLI))
1589        return true;
1590    }
1591  }
1592
1593  return false;
1594}
1595
1596/// TryToShrinkGlobalToBoolean - At this point, we have learned that the only
1597/// two values ever stored into GV are its initializer and OtherVal.  See if we
1598/// can shrink the global into a boolean and select between the two values
1599/// whenever it is used.  This exposes the values to other scalar optimizations.
1600static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
1601  Type *GVElType = GV->getType()->getElementType();
1602
1603  // If GVElType is already i1, it is already shrunk.  If the type of the GV is
1604  // an FP value, pointer or vector, don't do this optimization because a select
1605  // between them is very expensive and unlikely to lead to later
1606  // simplification.  In these cases, we typically end up with "cond ? v1 : v2"
1607  // where v1 and v2 both require constant pool loads, a big loss.
1608  if (GVElType == Type::getInt1Ty(GV->getContext()) ||
1609      GVElType->isFloatingPointTy() ||
1610      GVElType->isPointerTy() || GVElType->isVectorTy())
1611    return false;
1612
1613  // Walk the use list of the global seeing if all the uses are load or store.
1614  // If there is anything else, bail out.
1615  for (User *U : GV->users())
1616    if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
1617      return false;
1618
1619  DEBUG(dbgs() << "   *** SHRINKING TO BOOL: " << *GV);
1620
1621  // Create the new global, initializing it to false.
1622  GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
1623                                             false,
1624                                             GlobalValue::InternalLinkage,
1625                                        ConstantInt::getFalse(GV->getContext()),
1626                                             GV->getName()+".b",
1627                                             GV->getThreadLocalMode(),
1628                                             GV->getType()->getAddressSpace());
1629  GV->getParent()->getGlobalList().insert(GV, NewGV);
1630
1631  Constant *InitVal = GV->getInitializer();
1632  assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
1633         "No reason to shrink to bool!");
1634
1635  // If initialized to zero and storing one into the global, we can use a cast
1636  // instead of a select to synthesize the desired value.
1637  bool IsOneZero = false;
1638  if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal))
1639    IsOneZero = InitVal->isNullValue() && CI->isOne();
1640
1641  while (!GV->use_empty()) {
1642    Instruction *UI = cast<Instruction>(GV->user_back());
1643    if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
1644      // Change the store into a boolean store.
1645      bool StoringOther = SI->getOperand(0) == OtherVal;
1646      // Only do this if we weren't storing a loaded value.
1647      Value *StoreVal;
1648      if (StoringOther || SI->getOperand(0) == InitVal) {
1649        StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
1650                                    StoringOther);
1651      } else {
1652        // Otherwise, we are storing a previously loaded copy.  To do this,
1653        // change the copy from copying the original value to just copying the
1654        // bool.
1655        Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
1656
1657        // If we've already replaced the input, StoredVal will be a cast or
1658        // select instruction.  If not, it will be a load of the original
1659        // global.
1660        if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
1661          assert(LI->getOperand(0) == GV && "Not a copy!");
1662          // Insert a new load, to preserve the saved value.
1663          StoreVal = new LoadInst(NewGV, LI->getName()+".b", false, 0,
1664                                  LI->getOrdering(), LI->getSynchScope(), LI);
1665        } else {
1666          assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
1667                 "This is not a form that we understand!");
1668          StoreVal = StoredVal->getOperand(0);
1669          assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
1670        }
1671      }
1672      new StoreInst(StoreVal, NewGV, false, 0,
1673                    SI->getOrdering(), SI->getSynchScope(), SI);
1674    } else {
1675      // Change the load into a load of bool then a select.
1676      LoadInst *LI = cast<LoadInst>(UI);
1677      LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", false, 0,
1678                                   LI->getOrdering(), LI->getSynchScope(), LI);
1679      Value *NSI;
1680      if (IsOneZero)
1681        NSI = new ZExtInst(NLI, LI->getType(), "", LI);
1682      else
1683        NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI);
1684      NSI->takeName(LI);
1685      LI->replaceAllUsesWith(NSI);
1686    }
1687    UI->eraseFromParent();
1688  }
1689
1690  // Retain the name of the old global variable. People who are debugging their
1691  // programs may expect these variables to be named the same.
1692  NewGV->takeName(GV);
1693  GV->eraseFromParent();
1694  return true;
1695}
1696
1697
1698/// ProcessGlobal - Analyze the specified global variable and optimize it if
1699/// possible.  If we make a change, return true.
1700bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
1701                              Module::global_iterator &GVI) {
1702  if (!GV->isDiscardableIfUnused())
1703    return false;
1704
1705  // Do more involved optimizations if the global is internal.
1706  GV->removeDeadConstantUsers();
1707
1708  if (GV->use_empty()) {
1709    DEBUG(dbgs() << "GLOBAL DEAD: " << *GV);
1710    GV->eraseFromParent();
1711    ++NumDeleted;
1712    return true;
1713  }
1714
1715  if (!GV->hasLocalLinkage())
1716    return false;
1717
1718  GlobalStatus GS;
1719
1720  if (GlobalStatus::analyzeGlobal(GV, GS))
1721    return false;
1722
1723  if (!GS.IsCompared && !GV->hasUnnamedAddr()) {
1724    GV->setUnnamedAddr(true);
1725    NumUnnamed++;
1726  }
1727
1728  if (GV->isConstant() || !GV->hasInitializer())
1729    return false;
1730
1731  return ProcessInternalGlobal(GV, GVI, GS);
1732}
1733
1734/// ProcessInternalGlobal - Analyze the specified global variable and optimize
1735/// it if possible.  If we make a change, return true.
1736bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
1737                                      Module::global_iterator &GVI,
1738                                      const GlobalStatus &GS) {
1739  // If this is a first class global and has only one accessing function
1740  // and this function is main (which we know is not recursive), we replace
1741  // the global with a local alloca in this function.
1742  //
1743  // NOTE: It doesn't make sense to promote non-single-value types since we
1744  // are just replacing static memory to stack memory.
1745  //
1746  // If the global is in different address space, don't bring it to stack.
1747  if (!GS.HasMultipleAccessingFunctions &&
1748      GS.AccessingFunction && !GS.HasNonInstructionUser &&
1749      GV->getType()->getElementType()->isSingleValueType() &&
1750      GS.AccessingFunction->getName() == "main" &&
1751      GS.AccessingFunction->hasExternalLinkage() &&
1752      GV->getType()->getAddressSpace() == 0) {
1753    DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
1754    Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
1755                                                   ->getEntryBlock().begin());
1756    Type *ElemTy = GV->getType()->getElementType();
1757    // FIXME: Pass Global's alignment when globals have alignment
1758    AllocaInst *Alloca = new AllocaInst(ElemTy, nullptr,
1759                                        GV->getName(), &FirstI);
1760    if (!isa<UndefValue>(GV->getInitializer()))
1761      new StoreInst(GV->getInitializer(), Alloca, &FirstI);
1762
1763    GV->replaceAllUsesWith(Alloca);
1764    GV->eraseFromParent();
1765    ++NumLocalized;
1766    return true;
1767  }
1768
1769  // If the global is never loaded (but may be stored to), it is dead.
1770  // Delete it now.
1771  if (!GS.IsLoaded) {
1772    DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
1773
1774    bool Changed;
1775    if (isLeakCheckerRoot(GV)) {
1776      // Delete any constant stores to the global.
1777      Changed = CleanupPointerRootUsers(GV, TLI);
1778    } else {
1779      // Delete any stores we can find to the global.  We may not be able to
1780      // make it completely dead though.
1781      Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
1782    }
1783
1784    // If the global is dead now, delete it.
1785    if (GV->use_empty()) {
1786      GV->eraseFromParent();
1787      ++NumDeleted;
1788      Changed = true;
1789    }
1790    return Changed;
1791
1792  } else if (GS.StoredType <= GlobalStatus::InitializerStored) {
1793    DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
1794    GV->setConstant(true);
1795
1796    // Clean up any obviously simplifiable users now.
1797    CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
1798
1799    // If the global is dead now, just nuke it.
1800    if (GV->use_empty()) {
1801      DEBUG(dbgs() << "   *** Marking constant allowed us to simplify "
1802            << "all users and delete global!\n");
1803      GV->eraseFromParent();
1804      ++NumDeleted;
1805    }
1806
1807    ++NumMarked;
1808    return true;
1809  } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
1810    if (DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>()) {
1811      const DataLayout &DL = DLP->getDataLayout();
1812      if (GlobalVariable *FirstNewGV = SRAGlobal(GV, DL)) {
1813        GVI = FirstNewGV;  // Don't skip the newly produced globals!
1814        return true;
1815      }
1816    }
1817  } else if (GS.StoredType == GlobalStatus::StoredOnce) {
1818    // If the initial value for the global was an undef value, and if only
1819    // one other value was stored into it, we can just change the
1820    // initializer to be the stored value, then delete all stores to the
1821    // global.  This allows us to mark it constant.
1822    if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
1823      if (isa<UndefValue>(GV->getInitializer())) {
1824        // Change the initial value here.
1825        GV->setInitializer(SOVConstant);
1826
1827        // Clean up any obviously simplifiable users now.
1828        CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
1829
1830        if (GV->use_empty()) {
1831          DEBUG(dbgs() << "   *** Substituting initializer allowed us to "
1832                       << "simplify all users and delete global!\n");
1833          GV->eraseFromParent();
1834          ++NumDeleted;
1835        } else {
1836          GVI = GV;
1837        }
1838        ++NumSubstitute;
1839        return true;
1840      }
1841
1842    // Try to optimize globals based on the knowledge that only one value
1843    // (besides its initializer) is ever stored to the global.
1844    if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, GVI,
1845                                 DL, TLI))
1846      return true;
1847
1848    // Otherwise, if the global was not a boolean, we can shrink it to be a
1849    // boolean.
1850    if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) {
1851      if (GS.Ordering == NotAtomic) {
1852        if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
1853          ++NumShrunkToBool;
1854          return true;
1855        }
1856      }
1857    }
1858  }
1859
1860  return false;
1861}
1862
1863/// ChangeCalleesToFastCall - Walk all of the direct calls of the specified
1864/// function, changing them to FastCC.
1865static void ChangeCalleesToFastCall(Function *F) {
1866  for (User *U : F->users()) {
1867    if (isa<BlockAddress>(U))
1868      continue;
1869    CallSite CS(cast<Instruction>(U));
1870    CS.setCallingConv(CallingConv::Fast);
1871  }
1872}
1873
1874static AttributeSet StripNest(LLVMContext &C, const AttributeSet &Attrs) {
1875  for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
1876    unsigned Index = Attrs.getSlotIndex(i);
1877    if (!Attrs.getSlotAttributes(i).hasAttribute(Index, Attribute::Nest))
1878      continue;
1879
1880    // There can be only one.
1881    return Attrs.removeAttribute(C, Index, Attribute::Nest);
1882  }
1883
1884  return Attrs;
1885}
1886
1887static void RemoveNestAttribute(Function *F) {
1888  F->setAttributes(StripNest(F->getContext(), F->getAttributes()));
1889  for (User *U : F->users()) {
1890    if (isa<BlockAddress>(U))
1891      continue;
1892    CallSite CS(cast<Instruction>(U));
1893    CS.setAttributes(StripNest(F->getContext(), CS.getAttributes()));
1894  }
1895}
1896
1897/// Return true if this is a calling convention that we'd like to change.  The
1898/// idea here is that we don't want to mess with the convention if the user
1899/// explicitly requested something with performance implications like coldcc,
1900/// GHC, or anyregcc.
1901static bool isProfitableToMakeFastCC(Function *F) {
1902  CallingConv::ID CC = F->getCallingConv();
1903  // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc?
1904  return CC == CallingConv::C || CC == CallingConv::X86_ThisCall;
1905}
1906
1907bool GlobalOpt::OptimizeFunctions(Module &M) {
1908  bool Changed = false;
1909  // Optimize functions.
1910  for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
1911    Function *F = FI++;
1912    // Functions without names cannot be referenced outside this module.
1913    if (!F->hasName() && !F->isDeclaration())
1914      F->setLinkage(GlobalValue::InternalLinkage);
1915    F->removeDeadConstantUsers();
1916    if (F->isDefTriviallyDead()) {
1917      F->eraseFromParent();
1918      Changed = true;
1919      ++NumFnDeleted;
1920    } else if (F->hasLocalLinkage()) {
1921      if (isProfitableToMakeFastCC(F) && !F->isVarArg() &&
1922          !F->hasAddressTaken()) {
1923        // If this function has a calling convention worth changing, is not a
1924        // varargs function, and is only called directly, promote it to use the
1925        // Fast calling convention.
1926        F->setCallingConv(CallingConv::Fast);
1927        ChangeCalleesToFastCall(F);
1928        ++NumFastCallFns;
1929        Changed = true;
1930      }
1931
1932      if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
1933          !F->hasAddressTaken()) {
1934        // The function is not used by a trampoline intrinsic, so it is safe
1935        // to remove the 'nest' attribute.
1936        RemoveNestAttribute(F);
1937        ++NumNestRemoved;
1938        Changed = true;
1939      }
1940    }
1941  }
1942  return Changed;
1943}
1944
1945bool GlobalOpt::OptimizeGlobalVars(Module &M) {
1946  bool Changed = false;
1947  for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
1948       GVI != E; ) {
1949    GlobalVariable *GV = GVI++;
1950    // Global variables without names cannot be referenced outside this module.
1951    if (!GV->hasName() && !GV->isDeclaration())
1952      GV->setLinkage(GlobalValue::InternalLinkage);
1953    // Simplify the initializer.
1954    if (GV->hasInitializer())
1955      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) {
1956        Constant *New = ConstantFoldConstantExpression(CE, DL, TLI);
1957        if (New && New != CE)
1958          GV->setInitializer(New);
1959      }
1960
1961    Changed |= ProcessGlobal(GV, GVI);
1962  }
1963  return Changed;
1964}
1965
1966static inline bool
1967isSimpleEnoughValueToCommit(Constant *C,
1968                            SmallPtrSet<Constant*, 8> &SimpleConstants,
1969                            const DataLayout *DL);
1970
1971
1972/// isSimpleEnoughValueToCommit - Return true if the specified constant can be
1973/// handled by the code generator.  We don't want to generate something like:
1974///   void *X = &X/42;
1975/// because the code generator doesn't have a relocation that can handle that.
1976///
1977/// This function should be called if C was not found (but just got inserted)
1978/// in SimpleConstants to avoid having to rescan the same constants all the
1979/// time.
1980static bool isSimpleEnoughValueToCommitHelper(Constant *C,
1981                                   SmallPtrSet<Constant*, 8> &SimpleConstants,
1982                                   const DataLayout *DL) {
1983  // Simple integer, undef, constant aggregate zero, global addresses, etc are
1984  // all supported.
1985  if (C->getNumOperands() == 0 || isa<BlockAddress>(C) ||
1986      isa<GlobalValue>(C))
1987    return true;
1988
1989  // Aggregate values are safe if all their elements are.
1990  if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
1991      isa<ConstantVector>(C)) {
1992    for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
1993      Constant *Op = cast<Constant>(C->getOperand(i));
1994      if (!isSimpleEnoughValueToCommit(Op, SimpleConstants, DL))
1995        return false;
1996    }
1997    return true;
1998  }
1999
2000  // We don't know exactly what relocations are allowed in constant expressions,
2001  // so we allow &global+constantoffset, which is safe and uniformly supported
2002  // across targets.
2003  ConstantExpr *CE = cast<ConstantExpr>(C);
2004  switch (CE->getOpcode()) {
2005  case Instruction::BitCast:
2006    // Bitcast is fine if the casted value is fine.
2007    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
2008
2009  case Instruction::IntToPtr:
2010  case Instruction::PtrToInt:
2011    // int <=> ptr is fine if the int type is the same size as the
2012    // pointer type.
2013    if (!DL || DL->getTypeSizeInBits(CE->getType()) !=
2014               DL->getTypeSizeInBits(CE->getOperand(0)->getType()))
2015      return false;
2016    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
2017
2018  // GEP is fine if it is simple + constant offset.
2019  case Instruction::GetElementPtr:
2020    for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
2021      if (!isa<ConstantInt>(CE->getOperand(i)))
2022        return false;
2023    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
2024
2025  case Instruction::Add:
2026    // We allow simple+cst.
2027    if (!isa<ConstantInt>(CE->getOperand(1)))
2028      return false;
2029    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
2030  }
2031  return false;
2032}
2033
2034static inline bool
2035isSimpleEnoughValueToCommit(Constant *C,
2036                            SmallPtrSet<Constant*, 8> &SimpleConstants,
2037                            const DataLayout *DL) {
2038  // If we already checked this constant, we win.
2039  if (!SimpleConstants.insert(C)) return true;
2040  // Check the constant.
2041  return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, DL);
2042}
2043
2044
2045/// isSimpleEnoughPointerToCommit - Return true if this constant is simple
2046/// enough for us to understand.  In particular, if it is a cast to anything
2047/// other than from one pointer type to another pointer type, we punt.
2048/// We basically just support direct accesses to globals and GEP's of
2049/// globals.  This should be kept up to date with CommitValueTo.
2050static bool isSimpleEnoughPointerToCommit(Constant *C) {
2051  // Conservatively, avoid aggregate types. This is because we don't
2052  // want to worry about them partially overlapping other stores.
2053  if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType())
2054    return false;
2055
2056  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
2057    // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
2058    // external globals.
2059    return GV->hasUniqueInitializer();
2060
2061  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
2062    // Handle a constantexpr gep.
2063    if (CE->getOpcode() == Instruction::GetElementPtr &&
2064        isa<GlobalVariable>(CE->getOperand(0)) &&
2065        cast<GEPOperator>(CE)->isInBounds()) {
2066      GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
2067      // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
2068      // external globals.
2069      if (!GV->hasUniqueInitializer())
2070        return false;
2071
2072      // The first index must be zero.
2073      ConstantInt *CI = dyn_cast<ConstantInt>(*std::next(CE->op_begin()));
2074      if (!CI || !CI->isZero()) return false;
2075
2076      // The remaining indices must be compile-time known integers within the
2077      // notional bounds of the corresponding static array types.
2078      if (!CE->isGEPWithNoNotionalOverIndexing())
2079        return false;
2080
2081      return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
2082
2083    // A constantexpr bitcast from a pointer to another pointer is a no-op,
2084    // and we know how to evaluate it by moving the bitcast from the pointer
2085    // operand to the value operand.
2086    } else if (CE->getOpcode() == Instruction::BitCast &&
2087               isa<GlobalVariable>(CE->getOperand(0))) {
2088      // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
2089      // external globals.
2090      return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer();
2091    }
2092  }
2093
2094  return false;
2095}
2096
2097/// EvaluateStoreInto - Evaluate a piece of a constantexpr store into a global
2098/// initializer.  This returns 'Init' modified to reflect 'Val' stored into it.
2099/// At this point, the GEP operands of Addr [0, OpNo) have been stepped into.
2100static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
2101                                   ConstantExpr *Addr, unsigned OpNo) {
2102  // Base case of the recursion.
2103  if (OpNo == Addr->getNumOperands()) {
2104    assert(Val->getType() == Init->getType() && "Type mismatch!");
2105    return Val;
2106  }
2107
2108  SmallVector<Constant*, 32> Elts;
2109  if (StructType *STy = dyn_cast<StructType>(Init->getType())) {
2110    // Break up the constant into its elements.
2111    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
2112      Elts.push_back(Init->getAggregateElement(i));
2113
2114    // Replace the element that we are supposed to.
2115    ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
2116    unsigned Idx = CU->getZExtValue();
2117    assert(Idx < STy->getNumElements() && "Struct index out of range!");
2118    Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
2119
2120    // Return the modified struct.
2121    return ConstantStruct::get(STy, Elts);
2122  }
2123
2124  ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
2125  SequentialType *InitTy = cast<SequentialType>(Init->getType());
2126
2127  uint64_t NumElts;
2128  if (ArrayType *ATy = dyn_cast<ArrayType>(InitTy))
2129    NumElts = ATy->getNumElements();
2130  else
2131    NumElts = InitTy->getVectorNumElements();
2132
2133  // Break up the array into elements.
2134  for (uint64_t i = 0, e = NumElts; i != e; ++i)
2135    Elts.push_back(Init->getAggregateElement(i));
2136
2137  assert(CI->getZExtValue() < NumElts);
2138  Elts[CI->getZExtValue()] =
2139    EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
2140
2141  if (Init->getType()->isArrayTy())
2142    return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
2143  return ConstantVector::get(Elts);
2144}
2145
2146/// CommitValueTo - We have decided that Addr (which satisfies the predicate
2147/// isSimpleEnoughPointerToCommit) should get Val as its value.  Make it happen.
2148static void CommitValueTo(Constant *Val, Constant *Addr) {
2149  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
2150    assert(GV->hasInitializer());
2151    GV->setInitializer(Val);
2152    return;
2153  }
2154
2155  ConstantExpr *CE = cast<ConstantExpr>(Addr);
2156  GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
2157  GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2));
2158}
2159
2160namespace {
2161
2162/// Evaluator - This class evaluates LLVM IR, producing the Constant
2163/// representing each SSA instruction.  Changes to global variables are stored
2164/// in a mapping that can be iterated over after the evaluation is complete.
2165/// Once an evaluation call fails, the evaluation object should not be reused.
2166class Evaluator {
2167public:
2168  Evaluator(const DataLayout *DL, const TargetLibraryInfo *TLI)
2169    : DL(DL), TLI(TLI) {
2170    ValueStack.emplace_back();
2171  }
2172
2173  ~Evaluator() {
2174    for (auto &Tmp : AllocaTmps)
2175      // If there are still users of the alloca, the program is doing something
2176      // silly, e.g. storing the address of the alloca somewhere and using it
2177      // later.  Since this is undefined, we'll just make it be null.
2178      if (!Tmp->use_empty())
2179        Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
2180  }
2181
2182  /// EvaluateFunction - Evaluate a call to function F, returning true if
2183  /// successful, false if we can't evaluate it.  ActualArgs contains the formal
2184  /// arguments for the function.
2185  bool EvaluateFunction(Function *F, Constant *&RetVal,
2186                        const SmallVectorImpl<Constant*> &ActualArgs);
2187
2188  /// EvaluateBlock - Evaluate all instructions in block BB, returning true if
2189  /// successful, false if we can't evaluate it.  NewBB returns the next BB that
2190  /// control flows into, or null upon return.
2191  bool EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB);
2192
2193  Constant *getVal(Value *V) {
2194    if (Constant *CV = dyn_cast<Constant>(V)) return CV;
2195    Constant *R = ValueStack.back().lookup(V);
2196    assert(R && "Reference to an uncomputed value!");
2197    return R;
2198  }
2199
2200  void setVal(Value *V, Constant *C) {
2201    ValueStack.back()[V] = C;
2202  }
2203
2204  const DenseMap<Constant*, Constant*> &getMutatedMemory() const {
2205    return MutatedMemory;
2206  }
2207
2208  const SmallPtrSet<GlobalVariable*, 8> &getInvariants() const {
2209    return Invariants;
2210  }
2211
2212private:
2213  Constant *ComputeLoadResult(Constant *P);
2214
2215  /// ValueStack - As we compute SSA register values, we store their contents
2216  /// here. The back of the deque contains the current function and the stack
2217  /// contains the values in the calling frames.
2218  std::deque<DenseMap<Value*, Constant*>> ValueStack;
2219
2220  /// CallStack - This is used to detect recursion.  In pathological situations
2221  /// we could hit exponential behavior, but at least there is nothing
2222  /// unbounded.
2223  SmallVector<Function*, 4> CallStack;
2224
2225  /// MutatedMemory - For each store we execute, we update this map.  Loads
2226  /// check this to get the most up-to-date value.  If evaluation is successful,
2227  /// this state is committed to the process.
2228  DenseMap<Constant*, Constant*> MutatedMemory;
2229
2230  /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable
2231  /// to represent its body.  This vector is needed so we can delete the
2232  /// temporary globals when we are done.
2233  SmallVector<std::unique_ptr<GlobalVariable>, 32> AllocaTmps;
2234
2235  /// Invariants - These global variables have been marked invariant by the
2236  /// static constructor.
2237  SmallPtrSet<GlobalVariable*, 8> Invariants;
2238
2239  /// SimpleConstants - These are constants we have checked and know to be
2240  /// simple enough to live in a static initializer of a global.
2241  SmallPtrSet<Constant*, 8> SimpleConstants;
2242
2243  const DataLayout *DL;
2244  const TargetLibraryInfo *TLI;
2245};
2246
2247}  // anonymous namespace
2248
2249/// ComputeLoadResult - Return the value that would be computed by a load from
2250/// P after the stores reflected by 'memory' have been performed.  If we can't
2251/// decide, return null.
2252Constant *Evaluator::ComputeLoadResult(Constant *P) {
2253  // If this memory location has been recently stored, use the stored value: it
2254  // is the most up-to-date.
2255  DenseMap<Constant*, Constant*>::const_iterator I = MutatedMemory.find(P);
2256  if (I != MutatedMemory.end()) return I->second;
2257
2258  // Access it.
2259  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
2260    if (GV->hasDefinitiveInitializer())
2261      return GV->getInitializer();
2262    return nullptr;
2263  }
2264
2265  // Handle a constantexpr getelementptr.
2266  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P))
2267    if (CE->getOpcode() == Instruction::GetElementPtr &&
2268        isa<GlobalVariable>(CE->getOperand(0))) {
2269      GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
2270      if (GV->hasDefinitiveInitializer())
2271        return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
2272    }
2273
2274  return nullptr;  // don't know how to evaluate.
2275}
2276
2277/// EvaluateBlock - Evaluate all instructions in block BB, returning true if
2278/// successful, false if we can't evaluate it.  NewBB returns the next BB that
2279/// control flows into, or null upon return.
2280bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
2281                              BasicBlock *&NextBB) {
2282  // This is the main evaluation loop.
2283  while (1) {
2284    Constant *InstResult = nullptr;
2285
2286    DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n");
2287
2288    if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
2289      if (!SI->isSimple()) {
2290        DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n");
2291        return false;  // no volatile/atomic accesses.
2292      }
2293      Constant *Ptr = getVal(SI->getOperand(1));
2294      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
2295        DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr);
2296        Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
2297        DEBUG(dbgs() << "; To: " << *Ptr << "\n");
2298      }
2299      if (!isSimpleEnoughPointerToCommit(Ptr)) {
2300        // If this is too complex for us to commit, reject it.
2301        DEBUG(dbgs() << "Pointer is too complex for us to evaluate store.");
2302        return false;
2303      }
2304
2305      Constant *Val = getVal(SI->getOperand(0));
2306
2307      // If this might be too difficult for the backend to handle (e.g. the addr
2308      // of one global variable divided by another) then we can't commit it.
2309      if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, DL)) {
2310        DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val
2311              << "\n");
2312        return false;
2313      }
2314
2315      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
2316        if (CE->getOpcode() == Instruction::BitCast) {
2317          DEBUG(dbgs() << "Attempting to resolve bitcast on constant ptr.\n");
2318          // If we're evaluating a store through a bitcast, then we need
2319          // to pull the bitcast off the pointer type and push it onto the
2320          // stored value.
2321          Ptr = CE->getOperand(0);
2322
2323          Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType();
2324
2325          // In order to push the bitcast onto the stored value, a bitcast
2326          // from NewTy to Val's type must be legal.  If it's not, we can try
2327          // introspecting NewTy to find a legal conversion.
2328          while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) {
2329            // If NewTy is a struct, we can convert the pointer to the struct
2330            // into a pointer to its first member.
2331            // FIXME: This could be extended to support arrays as well.
2332            if (StructType *STy = dyn_cast<StructType>(NewTy)) {
2333              NewTy = STy->getTypeAtIndex(0U);
2334
2335              IntegerType *IdxTy = IntegerType::get(NewTy->getContext(), 32);
2336              Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
2337              Constant * const IdxList[] = {IdxZero, IdxZero};
2338
2339              Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList);
2340              if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
2341                Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
2342
2343            // If we can't improve the situation by introspecting NewTy,
2344            // we have to give up.
2345            } else {
2346              DEBUG(dbgs() << "Failed to bitcast constant ptr, can not "
2347                    "evaluate.\n");
2348              return false;
2349            }
2350          }
2351
2352          // If we found compatible types, go ahead and push the bitcast
2353          // onto the stored value.
2354          Val = ConstantExpr::getBitCast(Val, NewTy);
2355
2356          DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n");
2357        }
2358      }
2359
2360      MutatedMemory[Ptr] = Val;
2361    } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
2362      InstResult = ConstantExpr::get(BO->getOpcode(),
2363                                     getVal(BO->getOperand(0)),
2364                                     getVal(BO->getOperand(1)));
2365      DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " << *InstResult
2366            << "\n");
2367    } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) {
2368      InstResult = ConstantExpr::getCompare(CI->getPredicate(),
2369                                            getVal(CI->getOperand(0)),
2370                                            getVal(CI->getOperand(1)));
2371      DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult
2372            << "\n");
2373    } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) {
2374      InstResult = ConstantExpr::getCast(CI->getOpcode(),
2375                                         getVal(CI->getOperand(0)),
2376                                         CI->getType());
2377      DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult
2378            << "\n");
2379    } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
2380      InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)),
2381                                           getVal(SI->getOperand(1)),
2382                                           getVal(SI->getOperand(2)));
2383      DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult
2384            << "\n");
2385    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
2386      Constant *P = getVal(GEP->getOperand(0));
2387      SmallVector<Constant*, 8> GEPOps;
2388      for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
2389           i != e; ++i)
2390        GEPOps.push_back(getVal(*i));
2391      InstResult =
2392        ConstantExpr::getGetElementPtr(P, GEPOps,
2393                                       cast<GEPOperator>(GEP)->isInBounds());
2394      DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult
2395            << "\n");
2396    } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
2397
2398      if (!LI->isSimple()) {
2399        DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n");
2400        return false;  // no volatile/atomic accesses.
2401      }
2402
2403      Constant *Ptr = getVal(LI->getOperand(0));
2404      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
2405        Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
2406        DEBUG(dbgs() << "Found a constant pointer expression, constant "
2407              "folding: " << *Ptr << "\n");
2408      }
2409      InstResult = ComputeLoadResult(Ptr);
2410      if (!InstResult) {
2411        DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load."
2412              "\n");
2413        return false; // Could not evaluate load.
2414      }
2415
2416      DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n");
2417    } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
2418      if (AI->isArrayAllocation()) {
2419        DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n");
2420        return false;  // Cannot handle array allocs.
2421      }
2422      Type *Ty = AI->getType()->getElementType();
2423      AllocaTmps.push_back(
2424          make_unique<GlobalVariable>(Ty, false, GlobalValue::InternalLinkage,
2425                                      UndefValue::get(Ty), AI->getName()));
2426      InstResult = AllocaTmps.back().get();
2427      DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");
2428    } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
2429      CallSite CS(CurInst);
2430
2431      // Debug info can safely be ignored here.
2432      if (isa<DbgInfoIntrinsic>(CS.getInstruction())) {
2433        DEBUG(dbgs() << "Ignoring debug info.\n");
2434        ++CurInst;
2435        continue;
2436      }
2437
2438      // Cannot handle inline asm.
2439      if (isa<InlineAsm>(CS.getCalledValue())) {
2440        DEBUG(dbgs() << "Found inline asm, can not evaluate.\n");
2441        return false;
2442      }
2443
2444      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
2445        if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) {
2446          if (MSI->isVolatile()) {
2447            DEBUG(dbgs() << "Can not optimize a volatile memset " <<
2448                  "intrinsic.\n");
2449            return false;
2450          }
2451          Constant *Ptr = getVal(MSI->getDest());
2452          Constant *Val = getVal(MSI->getValue());
2453          Constant *DestVal = ComputeLoadResult(getVal(Ptr));
2454          if (Val->isNullValue() && DestVal && DestVal->isNullValue()) {
2455            // This memset is a no-op.
2456            DEBUG(dbgs() << "Ignoring no-op memset.\n");
2457            ++CurInst;
2458            continue;
2459          }
2460        }
2461
2462        if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
2463            II->getIntrinsicID() == Intrinsic::lifetime_end) {
2464          DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n");
2465          ++CurInst;
2466          continue;
2467        }
2468
2469        if (II->getIntrinsicID() == Intrinsic::invariant_start) {
2470          // We don't insert an entry into Values, as it doesn't have a
2471          // meaningful return value.
2472          if (!II->use_empty()) {
2473            DEBUG(dbgs() << "Found unused invariant_start. Can't evaluate.\n");
2474            return false;
2475          }
2476          ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0));
2477          Value *PtrArg = getVal(II->getArgOperand(1));
2478          Value *Ptr = PtrArg->stripPointerCasts();
2479          if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
2480            Type *ElemTy = cast<PointerType>(GV->getType())->getElementType();
2481            if (DL && !Size->isAllOnesValue() &&
2482                Size->getValue().getLimitedValue() >=
2483                DL->getTypeStoreSize(ElemTy)) {
2484              Invariants.insert(GV);
2485              DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV
2486                    << "\n");
2487            } else {
2488              DEBUG(dbgs() << "Found a global var, but can not treat it as an "
2489                    "invariant.\n");
2490            }
2491          }
2492          // Continue even if we do nothing.
2493          ++CurInst;
2494          continue;
2495        }
2496
2497        DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n");
2498        return false;
2499      }
2500
2501      // Resolve function pointers.
2502      Function *Callee = dyn_cast<Function>(getVal(CS.getCalledValue()));
2503      if (!Callee || Callee->mayBeOverridden()) {
2504        DEBUG(dbgs() << "Can not resolve function pointer.\n");
2505        return false;  // Cannot resolve.
2506      }
2507
2508      SmallVector<Constant*, 8> Formals;
2509      for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i)
2510        Formals.push_back(getVal(*i));
2511
2512      if (Callee->isDeclaration()) {
2513        // If this is a function we can constant fold, do it.
2514        if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) {
2515          InstResult = C;
2516          DEBUG(dbgs() << "Constant folded function call. Result: " <<
2517                *InstResult << "\n");
2518        } else {
2519          DEBUG(dbgs() << "Can not constant fold function call.\n");
2520          return false;
2521        }
2522      } else {
2523        if (Callee->getFunctionType()->isVarArg()) {
2524          DEBUG(dbgs() << "Can not constant fold vararg function call.\n");
2525          return false;
2526        }
2527
2528        Constant *RetVal = nullptr;
2529        // Execute the call, if successful, use the return value.
2530        ValueStack.emplace_back();
2531        if (!EvaluateFunction(Callee, RetVal, Formals)) {
2532          DEBUG(dbgs() << "Failed to evaluate function.\n");
2533          return false;
2534        }
2535        ValueStack.pop_back();
2536        InstResult = RetVal;
2537
2538        if (InstResult) {
2539          DEBUG(dbgs() << "Successfully evaluated function. Result: " <<
2540                InstResult << "\n\n");
2541        } else {
2542          DEBUG(dbgs() << "Successfully evaluated function. Result: 0\n\n");
2543        }
2544      }
2545    } else if (isa<TerminatorInst>(CurInst)) {
2546      DEBUG(dbgs() << "Found a terminator instruction.\n");
2547
2548      if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
2549        if (BI->isUnconditional()) {
2550          NextBB = BI->getSuccessor(0);
2551        } else {
2552          ConstantInt *Cond =
2553            dyn_cast<ConstantInt>(getVal(BI->getCondition()));
2554          if (!Cond) return false;  // Cannot determine.
2555
2556          NextBB = BI->getSuccessor(!Cond->getZExtValue());
2557        }
2558      } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) {
2559        ConstantInt *Val =
2560          dyn_cast<ConstantInt>(getVal(SI->getCondition()));
2561        if (!Val) return false;  // Cannot determine.
2562        NextBB = SI->findCaseValue(Val).getCaseSuccessor();
2563      } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) {
2564        Value *Val = getVal(IBI->getAddress())->stripPointerCasts();
2565        if (BlockAddress *BA = dyn_cast<BlockAddress>(Val))
2566          NextBB = BA->getBasicBlock();
2567        else
2568          return false;  // Cannot determine.
2569      } else if (isa<ReturnInst>(CurInst)) {
2570        NextBB = nullptr;
2571      } else {
2572        // invoke, unwind, resume, unreachable.
2573        DEBUG(dbgs() << "Can not handle terminator.");
2574        return false;  // Cannot handle this terminator.
2575      }
2576
2577      // We succeeded at evaluating this block!
2578      DEBUG(dbgs() << "Successfully evaluated block.\n");
2579      return true;
2580    } else {
2581      // Did not know how to evaluate this!
2582      DEBUG(dbgs() << "Failed to evaluate block due to unhandled instruction."
2583            "\n");
2584      return false;
2585    }
2586
2587    if (!CurInst->use_empty()) {
2588      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
2589        InstResult = ConstantFoldConstantExpression(CE, DL, TLI);
2590
2591      setVal(CurInst, InstResult);
2592    }
2593
2594    // If we just processed an invoke, we finished evaluating the block.
2595    if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) {
2596      NextBB = II->getNormalDest();
2597      DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n");
2598      return true;
2599    }
2600
2601    // Advance program counter.
2602    ++CurInst;
2603  }
2604}
2605
2606/// EvaluateFunction - Evaluate a call to function F, returning true if
2607/// successful, false if we can't evaluate it.  ActualArgs contains the formal
2608/// arguments for the function.
2609bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
2610                                 const SmallVectorImpl<Constant*> &ActualArgs) {
2611  // Check to see if this function is already executing (recursion).  If so,
2612  // bail out.  TODO: we might want to accept limited recursion.
2613  if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
2614    return false;
2615
2616  CallStack.push_back(F);
2617
2618  // Initialize arguments to the incoming values specified.
2619  unsigned ArgNo = 0;
2620  for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
2621       ++AI, ++ArgNo)
2622    setVal(AI, ActualArgs[ArgNo]);
2623
2624  // ExecutedBlocks - We only handle non-looping, non-recursive code.  As such,
2625  // we can only evaluate any one basic block at most once.  This set keeps
2626  // track of what we have executed so we can detect recursive cases etc.
2627  SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
2628
2629  // CurBB - The current basic block we're evaluating.
2630  BasicBlock *CurBB = F->begin();
2631
2632  BasicBlock::iterator CurInst = CurBB->begin();
2633
2634  while (1) {
2635    BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings.
2636    DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n");
2637
2638    if (!EvaluateBlock(CurInst, NextBB))
2639      return false;
2640
2641    if (!NextBB) {
2642      // Successfully running until there's no next block means that we found
2643      // the return.  Fill it the return value and pop the call stack.
2644      ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator());
2645      if (RI->getNumOperands())
2646        RetVal = getVal(RI->getOperand(0));
2647      CallStack.pop_back();
2648      return true;
2649    }
2650
2651    // Okay, we succeeded in evaluating this control flow.  See if we have
2652    // executed the new block before.  If so, we have a looping function,
2653    // which we cannot evaluate in reasonable time.
2654    if (!ExecutedBlocks.insert(NextBB))
2655      return false;  // looped!
2656
2657    // Okay, we have never been in this block before.  Check to see if there
2658    // are any PHI nodes.  If so, evaluate them with information about where
2659    // we came from.
2660    PHINode *PN = nullptr;
2661    for (CurInst = NextBB->begin();
2662         (PN = dyn_cast<PHINode>(CurInst)); ++CurInst)
2663      setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB)));
2664
2665    // Advance to the next block.
2666    CurBB = NextBB;
2667  }
2668}
2669
2670/// EvaluateStaticConstructor - Evaluate static constructors in the function, if
2671/// we can.  Return true if we can, false otherwise.
2672static bool EvaluateStaticConstructor(Function *F, const DataLayout *DL,
2673                                      const TargetLibraryInfo *TLI) {
2674  // Call the function.
2675  Evaluator Eval(DL, TLI);
2676  Constant *RetValDummy;
2677  bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy,
2678                                           SmallVector<Constant*, 0>());
2679
2680  if (EvalSuccess) {
2681    ++NumCtorsEvaluated;
2682
2683    // We succeeded at evaluation: commit the result.
2684    DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
2685          << F->getName() << "' to " << Eval.getMutatedMemory().size()
2686          << " stores.\n");
2687    for (DenseMap<Constant*, Constant*>::const_iterator I =
2688           Eval.getMutatedMemory().begin(), E = Eval.getMutatedMemory().end();
2689         I != E; ++I)
2690      CommitValueTo(I->second, I->first);
2691    for (SmallPtrSet<GlobalVariable*, 8>::const_iterator I =
2692           Eval.getInvariants().begin(), E = Eval.getInvariants().end();
2693         I != E; ++I)
2694      (*I)->setConstant(true);
2695  }
2696
2697  return EvalSuccess;
2698}
2699
2700static int compareNames(Constant *const *A, Constant *const *B) {
2701  return (*A)->getName().compare((*B)->getName());
2702}
2703
2704static void setUsedInitializer(GlobalVariable &V,
2705                               SmallPtrSet<GlobalValue *, 8> Init) {
2706  if (Init.empty()) {
2707    V.eraseFromParent();
2708    return;
2709  }
2710
2711  // Type of pointer to the array of pointers.
2712  PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext(), 0);
2713
2714  SmallVector<llvm::Constant *, 8> UsedArray;
2715  for (SmallPtrSet<GlobalValue *, 8>::iterator I = Init.begin(), E = Init.end();
2716       I != E; ++I) {
2717    Constant *Cast
2718      = ConstantExpr::getPointerBitCastOrAddrSpaceCast(*I, Int8PtrTy);
2719    UsedArray.push_back(Cast);
2720  }
2721  // Sort to get deterministic order.
2722  array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames);
2723  ArrayType *ATy = ArrayType::get(Int8PtrTy, UsedArray.size());
2724
2725  Module *M = V.getParent();
2726  V.removeFromParent();
2727  GlobalVariable *NV =
2728      new GlobalVariable(*M, ATy, false, llvm::GlobalValue::AppendingLinkage,
2729                         llvm::ConstantArray::get(ATy, UsedArray), "");
2730  NV->takeName(&V);
2731  NV->setSection("llvm.metadata");
2732  delete &V;
2733}
2734
2735namespace {
2736/// \brief An easy to access representation of llvm.used and llvm.compiler.used.
2737class LLVMUsed {
2738  SmallPtrSet<GlobalValue *, 8> Used;
2739  SmallPtrSet<GlobalValue *, 8> CompilerUsed;
2740  GlobalVariable *UsedV;
2741  GlobalVariable *CompilerUsedV;
2742
2743public:
2744  LLVMUsed(Module &M) {
2745    UsedV = collectUsedGlobalVariables(M, Used, false);
2746    CompilerUsedV = collectUsedGlobalVariables(M, CompilerUsed, true);
2747  }
2748  typedef SmallPtrSet<GlobalValue *, 8>::iterator iterator;
2749  iterator usedBegin() { return Used.begin(); }
2750  iterator usedEnd() { return Used.end(); }
2751  iterator compilerUsedBegin() { return CompilerUsed.begin(); }
2752  iterator compilerUsedEnd() { return CompilerUsed.end(); }
2753  bool usedCount(GlobalValue *GV) const { return Used.count(GV); }
2754  bool compilerUsedCount(GlobalValue *GV) const {
2755    return CompilerUsed.count(GV);
2756  }
2757  bool usedErase(GlobalValue *GV) { return Used.erase(GV); }
2758  bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); }
2759  bool usedInsert(GlobalValue *GV) { return Used.insert(GV); }
2760  bool compilerUsedInsert(GlobalValue *GV) { return CompilerUsed.insert(GV); }
2761
2762  void syncVariablesAndSets() {
2763    if (UsedV)
2764      setUsedInitializer(*UsedV, Used);
2765    if (CompilerUsedV)
2766      setUsedInitializer(*CompilerUsedV, CompilerUsed);
2767  }
2768};
2769}
2770
2771static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) {
2772  if (GA.use_empty()) // No use at all.
2773    return false;
2774
2775  assert((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) &&
2776         "We should have removed the duplicated "
2777         "element from llvm.compiler.used");
2778  if (!GA.hasOneUse())
2779    // Strictly more than one use. So at least one is not in llvm.used and
2780    // llvm.compiler.used.
2781    return true;
2782
2783  // Exactly one use. Check if it is in llvm.used or llvm.compiler.used.
2784  return !U.usedCount(&GA) && !U.compilerUsedCount(&GA);
2785}
2786
2787static bool hasMoreThanOneUseOtherThanLLVMUsed(GlobalValue &V,
2788                                               const LLVMUsed &U) {
2789  unsigned N = 2;
2790  assert((!U.usedCount(&V) || !U.compilerUsedCount(&V)) &&
2791         "We should have removed the duplicated "
2792         "element from llvm.compiler.used");
2793  if (U.usedCount(&V) || U.compilerUsedCount(&V))
2794    ++N;
2795  return V.hasNUsesOrMore(N);
2796}
2797
2798static bool mayHaveOtherReferences(GlobalAlias &GA, const LLVMUsed &U) {
2799  if (!GA.hasLocalLinkage())
2800    return true;
2801
2802  return U.usedCount(&GA) || U.compilerUsedCount(&GA);
2803}
2804
2805static bool hasUsesToReplace(GlobalAlias &GA, LLVMUsed &U, bool &RenameTarget) {
2806  RenameTarget = false;
2807  bool Ret = false;
2808  if (hasUseOtherThanLLVMUsed(GA, U))
2809    Ret = true;
2810
2811  // If the alias is externally visible, we may still be able to simplify it.
2812  if (!mayHaveOtherReferences(GA, U))
2813    return Ret;
2814
2815  // If the aliasee has internal linkage, give it the name and linkage
2816  // of the alias, and delete the alias.  This turns:
2817  //   define internal ... @f(...)
2818  //   @a = alias ... @f
2819  // into:
2820  //   define ... @a(...)
2821  Constant *Aliasee = GA.getAliasee();
2822  GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
2823  if (!Target->hasLocalLinkage())
2824    return Ret;
2825
2826  // Do not perform the transform if multiple aliases potentially target the
2827  // aliasee. This check also ensures that it is safe to replace the section
2828  // and other attributes of the aliasee with those of the alias.
2829  if (hasMoreThanOneUseOtherThanLLVMUsed(*Target, U))
2830    return Ret;
2831
2832  RenameTarget = true;
2833  return true;
2834}
2835
2836bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
2837  bool Changed = false;
2838  LLVMUsed Used(M);
2839
2840  for (SmallPtrSet<GlobalValue *, 8>::iterator I = Used.usedBegin(),
2841                                               E = Used.usedEnd();
2842       I != E; ++I)
2843    Used.compilerUsedErase(*I);
2844
2845  for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
2846       I != E;) {
2847    Module::alias_iterator J = I++;
2848    // Aliases without names cannot be referenced outside this module.
2849    if (!J->hasName() && !J->isDeclaration())
2850      J->setLinkage(GlobalValue::InternalLinkage);
2851    // If the aliasee may change at link time, nothing can be done - bail out.
2852    if (J->mayBeOverridden())
2853      continue;
2854
2855    Constant *Aliasee = J->getAliasee();
2856    GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
2857    Target->removeDeadConstantUsers();
2858
2859    // Make all users of the alias use the aliasee instead.
2860    bool RenameTarget;
2861    if (!hasUsesToReplace(*J, Used, RenameTarget))
2862      continue;
2863
2864    J->replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J->getType()));
2865    ++NumAliasesResolved;
2866    Changed = true;
2867
2868    if (RenameTarget) {
2869      // Give the aliasee the name, linkage and other attributes of the alias.
2870      Target->takeName(J);
2871      Target->setLinkage(J->getLinkage());
2872      Target->setVisibility(J->getVisibility());
2873      Target->setDLLStorageClass(J->getDLLStorageClass());
2874
2875      if (Used.usedErase(J))
2876        Used.usedInsert(Target);
2877
2878      if (Used.compilerUsedErase(J))
2879        Used.compilerUsedInsert(Target);
2880    } else if (mayHaveOtherReferences(*J, Used))
2881      continue;
2882
2883    // Delete the alias.
2884    M.getAliasList().erase(J);
2885    ++NumAliasesRemoved;
2886    Changed = true;
2887  }
2888
2889  Used.syncVariablesAndSets();
2890
2891  return Changed;
2892}
2893
2894static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
2895  if (!TLI->has(LibFunc::cxa_atexit))
2896    return nullptr;
2897
2898  Function *Fn = M.getFunction(TLI->getName(LibFunc::cxa_atexit));
2899
2900  if (!Fn)
2901    return nullptr;
2902
2903  FunctionType *FTy = Fn->getFunctionType();
2904
2905  // Checking that the function has the right return type, the right number of
2906  // parameters and that they all have pointer types should be enough.
2907  if (!FTy->getReturnType()->isIntegerTy() ||
2908      FTy->getNumParams() != 3 ||
2909      !FTy->getParamType(0)->isPointerTy() ||
2910      !FTy->getParamType(1)->isPointerTy() ||
2911      !FTy->getParamType(2)->isPointerTy())
2912    return nullptr;
2913
2914  return Fn;
2915}
2916
2917/// cxxDtorIsEmpty - Returns whether the given function is an empty C++
2918/// destructor and can therefore be eliminated.
2919/// Note that we assume that other optimization passes have already simplified
2920/// the code so we only look for a function with a single basic block, where
2921/// the only allowed instructions are 'ret', 'call' to an empty C++ dtor and
2922/// other side-effect free instructions.
2923static bool cxxDtorIsEmpty(const Function &Fn,
2924                           SmallPtrSet<const Function *, 8> &CalledFunctions) {
2925  // FIXME: We could eliminate C++ destructors if they're readonly/readnone and
2926  // nounwind, but that doesn't seem worth doing.
2927  if (Fn.isDeclaration())
2928    return false;
2929
2930  if (++Fn.begin() != Fn.end())
2931    return false;
2932
2933  const BasicBlock &EntryBlock = Fn.getEntryBlock();
2934  for (BasicBlock::const_iterator I = EntryBlock.begin(), E = EntryBlock.end();
2935       I != E; ++I) {
2936    if (const CallInst *CI = dyn_cast<CallInst>(I)) {
2937      // Ignore debug intrinsics.
2938      if (isa<DbgInfoIntrinsic>(CI))
2939        continue;
2940
2941      const Function *CalledFn = CI->getCalledFunction();
2942
2943      if (!CalledFn)
2944        return false;
2945
2946      SmallPtrSet<const Function *, 8> NewCalledFunctions(CalledFunctions);
2947
2948      // Don't treat recursive functions as empty.
2949      if (!NewCalledFunctions.insert(CalledFn))
2950        return false;
2951
2952      if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions))
2953        return false;
2954    } else if (isa<ReturnInst>(*I))
2955      return true; // We're done.
2956    else if (I->mayHaveSideEffects())
2957      return false; // Destructor with side effects, bail.
2958  }
2959
2960  return false;
2961}
2962
2963bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
2964  /// Itanium C++ ABI p3.3.5:
2965  ///
2966  ///   After constructing a global (or local static) object, that will require
2967  ///   destruction on exit, a termination function is registered as follows:
2968  ///
2969  ///   extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d );
2970  ///
2971  ///   This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the
2972  ///   call f(p) when DSO d is unloaded, before all such termination calls
2973  ///   registered before this one. It returns zero if registration is
2974  ///   successful, nonzero on failure.
2975
2976  // This pass will look for calls to __cxa_atexit where the function is trivial
2977  // and remove them.
2978  bool Changed = false;
2979
2980  for (auto I = CXAAtExitFn->user_begin(), E = CXAAtExitFn->user_end();
2981       I != E;) {
2982    // We're only interested in calls. Theoretically, we could handle invoke
2983    // instructions as well, but neither llvm-gcc nor clang generate invokes
2984    // to __cxa_atexit.
2985    CallInst *CI = dyn_cast<CallInst>(*I++);
2986    if (!CI)
2987      continue;
2988
2989    Function *DtorFn =
2990      dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
2991    if (!DtorFn)
2992      continue;
2993
2994    SmallPtrSet<const Function *, 8> CalledFunctions;
2995    if (!cxxDtorIsEmpty(*DtorFn, CalledFunctions))
2996      continue;
2997
2998    // Just remove the call.
2999    CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
3000    CI->eraseFromParent();
3001
3002    ++NumCXXDtorsRemoved;
3003
3004    Changed |= true;
3005  }
3006
3007  return Changed;
3008}
3009
3010bool GlobalOpt::runOnModule(Module &M) {
3011  bool Changed = false;
3012
3013  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
3014  DL = DLP ? &DLP->getDataLayout() : nullptr;
3015  TLI = &getAnalysis<TargetLibraryInfo>();
3016
3017  bool LocalChange = true;
3018  while (LocalChange) {
3019    LocalChange = false;
3020
3021    // Delete functions that are trivially dead, ccc -> fastcc
3022    LocalChange |= OptimizeFunctions(M);
3023
3024    // Optimize global_ctors list.
3025    LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) {
3026      return EvaluateStaticConstructor(F, DL, TLI);
3027    });
3028
3029    // Optimize non-address-taken globals.
3030    LocalChange |= OptimizeGlobalVars(M);
3031
3032    // Resolve aliases, when possible.
3033    LocalChange |= OptimizeGlobalAliases(M);
3034
3035    // Try to remove trivial global destructors if they are not removed
3036    // already.
3037    Function *CXAAtExitFn = FindCXAAtExit(M, TLI);
3038    if (CXAAtExitFn)
3039      LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
3040
3041    Changed |= LocalChange;
3042  }
3043
3044  // TODO: Move all global ctors functions to the end of the module for code
3045  // layout.
3046
3047  return Changed;
3048}
3049