1//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass transforms simple global variables that never have their address
11// taken.  If obviously true, it marks read/write globals as constant, deletes
12// variables only stored to, etc.
13//
14//===----------------------------------------------------------------------===//
15
16#include "llvm/Transforms/IPO.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/SmallPtrSet.h"
20#include "llvm/ADT/SmallSet.h"
21#include "llvm/ADT/SmallVector.h"
22#include "llvm/ADT/Statistic.h"
23#include "llvm/Analysis/ConstantFolding.h"
24#include "llvm/Analysis/MemoryBuiltins.h"
25#include "llvm/IR/CallSite.h"
26#include "llvm/IR/CallingConv.h"
27#include "llvm/IR/Constants.h"
28#include "llvm/IR/DataLayout.h"
29#include "llvm/IR/DerivedTypes.h"
30#include "llvm/IR/GetElementPtrTypeIterator.h"
31#include "llvm/IR/Instructions.h"
32#include "llvm/IR/IntrinsicInst.h"
33#include "llvm/IR/Module.h"
34#include "llvm/IR/Operator.h"
35#include "llvm/IR/ValueHandle.h"
36#include "llvm/Pass.h"
37#include "llvm/Support/Debug.h"
38#include "llvm/Support/ErrorHandling.h"
39#include "llvm/Support/MathExtras.h"
40#include "llvm/Support/raw_ostream.h"
41#include "llvm/Target/TargetLibraryInfo.h"
42#include "llvm/Transforms/Utils/CtorUtils.h"
43#include "llvm/Transforms/Utils/GlobalStatus.h"
44#include "llvm/Transforms/Utils/ModuleUtils.h"
45#include <algorithm>
46#include <deque>
47using namespace llvm;
48
49#define DEBUG_TYPE "globalopt"
50
51STATISTIC(NumMarked    , "Number of globals marked constant");
52STATISTIC(NumUnnamed   , "Number of globals marked unnamed_addr");
53STATISTIC(NumSRA       , "Number of aggregate globals broken into scalars");
54STATISTIC(NumHeapSRA   , "Number of heap objects SRA'd");
55STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
56STATISTIC(NumDeleted   , "Number of globals deleted");
57STATISTIC(NumFnDeleted , "Number of functions deleted");
58STATISTIC(NumGlobUses  , "Number of global uses devirtualized");
59STATISTIC(NumLocalized , "Number of globals localized");
60STATISTIC(NumShrunkToBool  , "Number of global vars shrunk to booleans");
61STATISTIC(NumFastCallFns   , "Number of functions converted to fastcc");
62STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
63STATISTIC(NumNestRemoved   , "Number of nest attributes removed");
64STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
65STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
66STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
67
68namespace {
69  struct GlobalOpt : public ModulePass {
70    void getAnalysisUsage(AnalysisUsage &AU) const override {
71      AU.addRequired<TargetLibraryInfo>();
72    }
73    static char ID; // Pass identification, replacement for typeid
74    GlobalOpt() : ModulePass(ID) {
75      initializeGlobalOptPass(*PassRegistry::getPassRegistry());
76    }
77
78    bool runOnModule(Module &M) override;
79
80  private:
81    bool OptimizeFunctions(Module &M);
82    bool OptimizeGlobalVars(Module &M);
83    bool OptimizeGlobalAliases(Module &M);
84    bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
85    bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
86                               const GlobalStatus &GS);
87    bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
88
89    const DataLayout *DL;
90    TargetLibraryInfo *TLI;
91  };
92}
93
94char GlobalOpt::ID = 0;
95INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
96                "Global Variable Optimizer", false, false)
97INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
98INITIALIZE_PASS_END(GlobalOpt, "globalopt",
99                "Global Variable Optimizer", false, false)
100
101ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
102
103/// isLeakCheckerRoot - Is this global variable possibly used by a leak checker
104/// as a root?  If so, we might not really want to eliminate the stores to it.
105static bool isLeakCheckerRoot(GlobalVariable *GV) {
106  // A global variable is a root if it is a pointer, or could plausibly contain
107  // a pointer.  There are two challenges; one is that we could have a struct
108  // the has an inner member which is a pointer.  We recurse through the type to
109  // detect these (up to a point).  The other is that we may actually be a union
110  // of a pointer and another type, and so our LLVM type is an integer which
111  // gets converted into a pointer, or our type is an [i8 x #] with a pointer
112  // potentially contained here.
113
114  if (GV->hasPrivateLinkage())
115    return false;
116
117  SmallVector<Type *, 4> Types;
118  Types.push_back(cast<PointerType>(GV->getType())->getElementType());
119
120  unsigned Limit = 20;
121  do {
122    Type *Ty = Types.pop_back_val();
123    switch (Ty->getTypeID()) {
124      default: break;
125      case Type::PointerTyID: return true;
126      case Type::ArrayTyID:
127      case Type::VectorTyID: {
128        SequentialType *STy = cast<SequentialType>(Ty);
129        Types.push_back(STy->getElementType());
130        break;
131      }
132      case Type::StructTyID: {
133        StructType *STy = cast<StructType>(Ty);
134        if (STy->isOpaque()) return true;
135        for (StructType::element_iterator I = STy->element_begin(),
136                 E = STy->element_end(); I != E; ++I) {
137          Type *InnerTy = *I;
138          if (isa<PointerType>(InnerTy)) return true;
139          if (isa<CompositeType>(InnerTy))
140            Types.push_back(InnerTy);
141        }
142        break;
143      }
144    }
145    if (--Limit == 0) return true;
146  } while (!Types.empty());
147  return false;
148}
149
150/// Given a value that is stored to a global but never read, determine whether
151/// it's safe to remove the store and the chain of computation that feeds the
152/// store.
153static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) {
154  do {
155    if (isa<Constant>(V))
156      return true;
157    if (!V->hasOneUse())
158      return false;
159    if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) ||
160        isa<GlobalValue>(V))
161      return false;
162    if (isAllocationFn(V, TLI))
163      return true;
164
165    Instruction *I = cast<Instruction>(V);
166    if (I->mayHaveSideEffects())
167      return false;
168    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
169      if (!GEP->hasAllConstantIndices())
170        return false;
171    } else if (I->getNumOperands() != 1) {
172      return false;
173    }
174
175    V = I->getOperand(0);
176  } while (1);
177}
178
179/// CleanupPointerRootUsers - This GV is a pointer root.  Loop over all users
180/// of the global and clean up any that obviously don't assign the global a
181/// value that isn't dynamically allocated.
182///
183static bool CleanupPointerRootUsers(GlobalVariable *GV,
184                                    const TargetLibraryInfo *TLI) {
185  // A brief explanation of leak checkers.  The goal is to find bugs where
186  // pointers are forgotten, causing an accumulating growth in memory
187  // usage over time.  The common strategy for leak checkers is to whitelist the
188  // memory pointed to by globals at exit.  This is popular because it also
189  // solves another problem where the main thread of a C++ program may shut down
190  // before other threads that are still expecting to use those globals.  To
191  // handle that case, we expect the program may create a singleton and never
192  // destroy it.
193
194  bool Changed = false;
195
196  // If Dead[n].first is the only use of a malloc result, we can delete its
197  // chain of computation and the store to the global in Dead[n].second.
198  SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead;
199
200  // Constants can't be pointers to dynamically allocated memory.
201  for (Value::user_iterator UI = GV->user_begin(), E = GV->user_end();
202       UI != E;) {
203    User *U = *UI++;
204    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
205      Value *V = SI->getValueOperand();
206      if (isa<Constant>(V)) {
207        Changed = true;
208        SI->eraseFromParent();
209      } else if (Instruction *I = dyn_cast<Instruction>(V)) {
210        if (I->hasOneUse())
211          Dead.push_back(std::make_pair(I, SI));
212      }
213    } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) {
214      if (isa<Constant>(MSI->getValue())) {
215        Changed = true;
216        MSI->eraseFromParent();
217      } else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) {
218        if (I->hasOneUse())
219          Dead.push_back(std::make_pair(I, MSI));
220      }
221    } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) {
222      GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource());
223      if (MemSrc && MemSrc->isConstant()) {
224        Changed = true;
225        MTI->eraseFromParent();
226      } else if (Instruction *I = dyn_cast<Instruction>(MemSrc)) {
227        if (I->hasOneUse())
228          Dead.push_back(std::make_pair(I, MTI));
229      }
230    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
231      if (CE->use_empty()) {
232        CE->destroyConstant();
233        Changed = true;
234      }
235    } else if (Constant *C = dyn_cast<Constant>(U)) {
236      if (isSafeToDestroyConstant(C)) {
237        C->destroyConstant();
238        // This could have invalidated UI, start over from scratch.
239        Dead.clear();
240        CleanupPointerRootUsers(GV, TLI);
241        return true;
242      }
243    }
244  }
245
246  for (int i = 0, e = Dead.size(); i != e; ++i) {
247    if (IsSafeComputationToRemove(Dead[i].first, TLI)) {
248      Dead[i].second->eraseFromParent();
249      Instruction *I = Dead[i].first;
250      do {
251        if (isAllocationFn(I, TLI))
252          break;
253        Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
254        if (!J)
255          break;
256        I->eraseFromParent();
257        I = J;
258      } while (1);
259      I->eraseFromParent();
260    }
261  }
262
263  return Changed;
264}
265
266/// CleanupConstantGlobalUsers - We just marked GV constant.  Loop over all
267/// users of the global, cleaning up the obvious ones.  This is largely just a
268/// quick scan over the use list to clean up the easy and obvious cruft.  This
269/// returns true if it made a change.
270static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
271                                       const DataLayout *DL,
272                                       TargetLibraryInfo *TLI) {
273  bool Changed = false;
274  // Note that we need to use a weak value handle for the worklist items. When
275  // we delete a constant array, we may also be holding pointer to one of its
276  // elements (or an element of one of its elements if we're dealing with an
277  // array of arrays) in the worklist.
278  SmallVector<WeakVH, 8> WorkList(V->user_begin(), V->user_end());
279  while (!WorkList.empty()) {
280    Value *UV = WorkList.pop_back_val();
281    if (!UV)
282      continue;
283
284    User *U = cast<User>(UV);
285
286    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
287      if (Init) {
288        // Replace the load with the initializer.
289        LI->replaceAllUsesWith(Init);
290        LI->eraseFromParent();
291        Changed = true;
292      }
293    } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
294      // Store must be unreachable or storing Init into the global.
295      SI->eraseFromParent();
296      Changed = true;
297    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
298      if (CE->getOpcode() == Instruction::GetElementPtr) {
299        Constant *SubInit = nullptr;
300        if (Init)
301          SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
302        Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, TLI);
303      } else if ((CE->getOpcode() == Instruction::BitCast &&
304                  CE->getType()->isPointerTy()) ||
305                 CE->getOpcode() == Instruction::AddrSpaceCast) {
306        // Pointer cast, delete any stores and memsets to the global.
307        Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, TLI);
308      }
309
310      if (CE->use_empty()) {
311        CE->destroyConstant();
312        Changed = true;
313      }
314    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
315      // Do not transform "gepinst (gep constexpr (GV))" here, because forming
316      // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold
317      // and will invalidate our notion of what Init is.
318      Constant *SubInit = nullptr;
319      if (!isa<ConstantExpr>(GEP->getOperand(0))) {
320        ConstantExpr *CE =
321          dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, DL, TLI));
322        if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
323          SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
324
325        // If the initializer is an all-null value and we have an inbounds GEP,
326        // we already know what the result of any load from that GEP is.
327        // TODO: Handle splats.
328        if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds())
329          SubInit = Constant::getNullValue(GEP->getType()->getElementType());
330      }
331      Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, TLI);
332
333      if (GEP->use_empty()) {
334        GEP->eraseFromParent();
335        Changed = true;
336      }
337    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
338      if (MI->getRawDest() == V) {
339        MI->eraseFromParent();
340        Changed = true;
341      }
342
343    } else if (Constant *C = dyn_cast<Constant>(U)) {
344      // If we have a chain of dead constantexprs or other things dangling from
345      // us, and if they are all dead, nuke them without remorse.
346      if (isSafeToDestroyConstant(C)) {
347        C->destroyConstant();
348        CleanupConstantGlobalUsers(V, Init, DL, TLI);
349        return true;
350      }
351    }
352  }
353  return Changed;
354}
355
356/// isSafeSROAElementUse - Return true if the specified instruction is a safe
357/// user of a derived expression from a global that we want to SROA.
358static bool isSafeSROAElementUse(Value *V) {
359  // We might have a dead and dangling constant hanging off of here.
360  if (Constant *C = dyn_cast<Constant>(V))
361    return isSafeToDestroyConstant(C);
362
363  Instruction *I = dyn_cast<Instruction>(V);
364  if (!I) return false;
365
366  // Loads are ok.
367  if (isa<LoadInst>(I)) return true;
368
369  // Stores *to* the pointer are ok.
370  if (StoreInst *SI = dyn_cast<StoreInst>(I))
371    return SI->getOperand(0) != V;
372
373  // Otherwise, it must be a GEP.
374  GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I);
375  if (!GEPI) return false;
376
377  if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) ||
378      !cast<Constant>(GEPI->getOperand(1))->isNullValue())
379    return false;
380
381  for (User *U : GEPI->users())
382    if (!isSafeSROAElementUse(U))
383      return false;
384  return true;
385}
386
387
388/// IsUserOfGlobalSafeForSRA - U is a direct user of the specified global value.
389/// Look at it and its uses and decide whether it is safe to SROA this global.
390///
391static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
392  // The user of the global must be a GEP Inst or a ConstantExpr GEP.
393  if (!isa<GetElementPtrInst>(U) &&
394      (!isa<ConstantExpr>(U) ||
395       cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
396    return false;
397
398  // Check to see if this ConstantExpr GEP is SRA'able.  In particular, we
399  // don't like < 3 operand CE's, and we don't like non-constant integer
400  // indices.  This enforces that all uses are 'gep GV, 0, C, ...' for some
401  // value of C.
402  if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) ||
403      !cast<Constant>(U->getOperand(1))->isNullValue() ||
404      !isa<ConstantInt>(U->getOperand(2)))
405    return false;
406
407  gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
408  ++GEPI;  // Skip over the pointer index.
409
410  // If this is a use of an array allocation, do a bit more checking for sanity.
411  if (ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) {
412    uint64_t NumElements = AT->getNumElements();
413    ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2));
414
415    // Check to make sure that index falls within the array.  If not,
416    // something funny is going on, so we won't do the optimization.
417    //
418    if (Idx->getZExtValue() >= NumElements)
419      return false;
420
421    // We cannot scalar repl this level of the array unless any array
422    // sub-indices are in-range constants.  In particular, consider:
423    // A[0][i].  We cannot know that the user isn't doing invalid things like
424    // allowing i to index an out-of-range subscript that accesses A[1].
425    //
426    // Scalar replacing *just* the outer index of the array is probably not
427    // going to be a win anyway, so just give up.
428    for (++GEPI; // Skip array index.
429         GEPI != E;
430         ++GEPI) {
431      uint64_t NumElements;
432      if (ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI))
433        NumElements = SubArrayTy->getNumElements();
434      else if (VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI))
435        NumElements = SubVectorTy->getNumElements();
436      else {
437        assert((*GEPI)->isStructTy() &&
438               "Indexed GEP type is not array, vector, or struct!");
439        continue;
440      }
441
442      ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
443      if (!IdxVal || IdxVal->getZExtValue() >= NumElements)
444        return false;
445    }
446  }
447
448  for (User *UU : U->users())
449    if (!isSafeSROAElementUse(UU))
450      return false;
451
452  return true;
453}
454
455/// GlobalUsersSafeToSRA - Look at all uses of the global and decide whether it
456/// is safe for us to perform this transformation.
457///
458static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
459  for (User *U : GV->users())
460    if (!IsUserOfGlobalSafeForSRA(U, GV))
461      return false;
462
463  return true;
464}
465
466
467/// SRAGlobal - Perform scalar replacement of aggregates on the specified global
468/// variable.  This opens the door for other optimizations by exposing the
469/// behavior of the program in a more fine-grained way.  We have determined that
470/// this transformation is safe already.  We return the first global variable we
471/// insert so that the caller can reprocess it.
472static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
473  // Make sure this global only has simple uses that we can SRA.
474  if (!GlobalUsersSafeToSRA(GV))
475    return nullptr;
476
477  assert(GV->hasLocalLinkage() && !GV->isConstant());
478  Constant *Init = GV->getInitializer();
479  Type *Ty = Init->getType();
480
481  std::vector<GlobalVariable*> NewGlobals;
482  Module::GlobalListType &Globals = GV->getParent()->getGlobalList();
483
484  // Get the alignment of the global, either explicit or target-specific.
485  unsigned StartAlignment = GV->getAlignment();
486  if (StartAlignment == 0)
487    StartAlignment = DL.getABITypeAlignment(GV->getType());
488
489  if (StructType *STy = dyn_cast<StructType>(Ty)) {
490    NewGlobals.reserve(STy->getNumElements());
491    const StructLayout &Layout = *DL.getStructLayout(STy);
492    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
493      Constant *In = Init->getAggregateElement(i);
494      assert(In && "Couldn't get element of initializer?");
495      GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false,
496                                               GlobalVariable::InternalLinkage,
497                                               In, GV->getName()+"."+Twine(i),
498                                               GV->getThreadLocalMode(),
499                                              GV->getType()->getAddressSpace());
500      Globals.insert(GV, NGV);
501      NewGlobals.push_back(NGV);
502
503      // Calculate the known alignment of the field.  If the original aggregate
504      // had 256 byte alignment for example, something might depend on that:
505      // propagate info to each field.
506      uint64_t FieldOffset = Layout.getElementOffset(i);
507      unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset);
508      if (NewAlign > DL.getABITypeAlignment(STy->getElementType(i)))
509        NGV->setAlignment(NewAlign);
510    }
511  } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
512    unsigned NumElements = 0;
513    if (ArrayType *ATy = dyn_cast<ArrayType>(STy))
514      NumElements = ATy->getNumElements();
515    else
516      NumElements = cast<VectorType>(STy)->getNumElements();
517
518    if (NumElements > 16 && GV->hasNUsesOrMore(16))
519      return nullptr; // It's not worth it.
520    NewGlobals.reserve(NumElements);
521
522    uint64_t EltSize = DL.getTypeAllocSize(STy->getElementType());
523    unsigned EltAlign = DL.getABITypeAlignment(STy->getElementType());
524    for (unsigned i = 0, e = NumElements; i != e; ++i) {
525      Constant *In = Init->getAggregateElement(i);
526      assert(In && "Couldn't get element of initializer?");
527
528      GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false,
529                                               GlobalVariable::InternalLinkage,
530                                               In, GV->getName()+"."+Twine(i),
531                                               GV->getThreadLocalMode(),
532                                              GV->getType()->getAddressSpace());
533      Globals.insert(GV, NGV);
534      NewGlobals.push_back(NGV);
535
536      // Calculate the known alignment of the field.  If the original aggregate
537      // had 256 byte alignment for example, something might depend on that:
538      // propagate info to each field.
539      unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i);
540      if (NewAlign > EltAlign)
541        NGV->setAlignment(NewAlign);
542    }
543  }
544
545  if (NewGlobals.empty())
546    return nullptr;
547
548  DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV);
549
550  Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
551
552  // Loop over all of the uses of the global, replacing the constantexpr geps,
553  // with smaller constantexpr geps or direct references.
554  while (!GV->use_empty()) {
555    User *GEP = GV->user_back();
556    assert(((isa<ConstantExpr>(GEP) &&
557             cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)||
558            isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!");
559
560    // Ignore the 1th operand, which has to be zero or else the program is quite
561    // broken (undefined).  Get the 2nd operand, which is the structure or array
562    // index.
563    unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
564    if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access.
565
566    Value *NewPtr = NewGlobals[Val];
567
568    // Form a shorter GEP if needed.
569    if (GEP->getNumOperands() > 3) {
570      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) {
571        SmallVector<Constant*, 8> Idxs;
572        Idxs.push_back(NullInt);
573        for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)
574          Idxs.push_back(CE->getOperand(i));
575        NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), Idxs);
576      } else {
577        GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);
578        SmallVector<Value*, 8> Idxs;
579        Idxs.push_back(NullInt);
580        for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
581          Idxs.push_back(GEPI->getOperand(i));
582        NewPtr = GetElementPtrInst::Create(NewPtr, Idxs,
583                                           GEPI->getName()+"."+Twine(Val),GEPI);
584      }
585    }
586    GEP->replaceAllUsesWith(NewPtr);
587
588    if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP))
589      GEPI->eraseFromParent();
590    else
591      cast<ConstantExpr>(GEP)->destroyConstant();
592  }
593
594  // Delete the old global, now that it is dead.
595  Globals.erase(GV);
596  ++NumSRA;
597
598  // Loop over the new globals array deleting any globals that are obviously
599  // dead.  This can arise due to scalarization of a structure or an array that
600  // has elements that are dead.
601  unsigned FirstGlobal = 0;
602  for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i)
603    if (NewGlobals[i]->use_empty()) {
604      Globals.erase(NewGlobals[i]);
605      if (FirstGlobal == i) ++FirstGlobal;
606    }
607
608  return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : nullptr;
609}
610
611/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified
612/// value will trap if the value is dynamically null.  PHIs keeps track of any
613/// phi nodes we've seen to avoid reprocessing them.
614static bool AllUsesOfValueWillTrapIfNull(const Value *V,
615                                         SmallPtrSet<const PHINode*, 8> &PHIs) {
616  for (const User *U : V->users())
617    if (isa<LoadInst>(U)) {
618      // Will trap.
619    } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
620      if (SI->getOperand(0) == V) {
621        //cerr << "NONTRAPPING USE: " << *U;
622        return false;  // Storing the value.
623      }
624    } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
625      if (CI->getCalledValue() != V) {
626        //cerr << "NONTRAPPING USE: " << *U;
627        return false;  // Not calling the ptr
628      }
629    } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
630      if (II->getCalledValue() != V) {
631        //cerr << "NONTRAPPING USE: " << *U;
632        return false;  // Not calling the ptr
633      }
634    } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) {
635      if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false;
636    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
637      if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
638    } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
639      // If we've already seen this phi node, ignore it, it has already been
640      // checked.
641      if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
642        return false;
643    } else if (isa<ICmpInst>(U) &&
644               isa<ConstantPointerNull>(U->getOperand(1))) {
645      // Ignore icmp X, null
646    } else {
647      //cerr << "NONTRAPPING USE: " << *U;
648      return false;
649    }
650
651  return true;
652}
653
654/// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads
655/// from GV will trap if the loaded value is null.  Note that this also permits
656/// comparisons of the loaded value against null, as a special case.
657static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
658  for (const User *U : GV->users())
659    if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
660      SmallPtrSet<const PHINode*, 8> PHIs;
661      if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
662        return false;
663    } else if (isa<StoreInst>(U)) {
664      // Ignore stores to the global.
665    } else {
666      // We don't know or understand this user, bail out.
667      //cerr << "UNKNOWN USER OF GLOBAL!: " << *U;
668      return false;
669    }
670  return true;
671}
672
673static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
674  bool Changed = false;
675  for (auto UI = V->user_begin(), E = V->user_end(); UI != E; ) {
676    Instruction *I = cast<Instruction>(*UI++);
677    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
678      LI->setOperand(0, NewV);
679      Changed = true;
680    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
681      if (SI->getOperand(1) == V) {
682        SI->setOperand(1, NewV);
683        Changed = true;
684      }
685    } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
686      CallSite CS(I);
687      if (CS.getCalledValue() == V) {
688        // Calling through the pointer!  Turn into a direct call, but be careful
689        // that the pointer is not also being passed as an argument.
690        CS.setCalledFunction(NewV);
691        Changed = true;
692        bool PassedAsArg = false;
693        for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
694          if (CS.getArgument(i) == V) {
695            PassedAsArg = true;
696            CS.setArgument(i, NewV);
697          }
698
699        if (PassedAsArg) {
700          // Being passed as an argument also.  Be careful to not invalidate UI!
701          UI = V->user_begin();
702        }
703      }
704    } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
705      Changed |= OptimizeAwayTrappingUsesOfValue(CI,
706                                ConstantExpr::getCast(CI->getOpcode(),
707                                                      NewV, CI->getType()));
708      if (CI->use_empty()) {
709        Changed = true;
710        CI->eraseFromParent();
711      }
712    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
713      // Should handle GEP here.
714      SmallVector<Constant*, 8> Idxs;
715      Idxs.reserve(GEPI->getNumOperands()-1);
716      for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
717           i != e; ++i)
718        if (Constant *C = dyn_cast<Constant>(*i))
719          Idxs.push_back(C);
720        else
721          break;
722      if (Idxs.size() == GEPI->getNumOperands()-1)
723        Changed |= OptimizeAwayTrappingUsesOfValue(GEPI,
724                          ConstantExpr::getGetElementPtr(NewV, Idxs));
725      if (GEPI->use_empty()) {
726        Changed = true;
727        GEPI->eraseFromParent();
728      }
729    }
730  }
731
732  return Changed;
733}
734
735
736/// OptimizeAwayTrappingUsesOfLoads - The specified global has only one non-null
737/// value stored into it.  If there are uses of the loaded value that would trap
738/// if the loaded value is dynamically null, then we know that they cannot be
739/// reachable with a null optimize away the load.
740static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
741                                            const DataLayout *DL,
742                                            TargetLibraryInfo *TLI) {
743  bool Changed = false;
744
745  // Keep track of whether we are able to remove all the uses of the global
746  // other than the store that defines it.
747  bool AllNonStoreUsesGone = true;
748
749  // Replace all uses of loads with uses of uses of the stored value.
750  for (Value::user_iterator GUI = GV->user_begin(), E = GV->user_end(); GUI != E;){
751    User *GlobalUser = *GUI++;
752    if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
753      Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
754      // If we were able to delete all uses of the loads
755      if (LI->use_empty()) {
756        LI->eraseFromParent();
757        Changed = true;
758      } else {
759        AllNonStoreUsesGone = false;
760      }
761    } else if (isa<StoreInst>(GlobalUser)) {
762      // Ignore the store that stores "LV" to the global.
763      assert(GlobalUser->getOperand(1) == GV &&
764             "Must be storing *to* the global");
765    } else {
766      AllNonStoreUsesGone = false;
767
768      // If we get here we could have other crazy uses that are transitively
769      // loaded.
770      assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
771              isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) ||
772              isa<BitCastInst>(GlobalUser) ||
773              isa<GetElementPtrInst>(GlobalUser)) &&
774             "Only expect load and stores!");
775    }
776  }
777
778  if (Changed) {
779    DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV);
780    ++NumGlobUses;
781  }
782
783  // If we nuked all of the loads, then none of the stores are needed either,
784  // nor is the global.
785  if (AllNonStoreUsesGone) {
786    if (isLeakCheckerRoot(GV)) {
787      Changed |= CleanupPointerRootUsers(GV, TLI);
788    } else {
789      Changed = true;
790      CleanupConstantGlobalUsers(GV, nullptr, DL, TLI);
791    }
792    if (GV->use_empty()) {
793      DEBUG(dbgs() << "  *** GLOBAL NOW DEAD!\n");
794      Changed = true;
795      GV->eraseFromParent();
796      ++NumDeleted;
797    }
798  }
799  return Changed;
800}
801
802/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
803/// instructions that are foldable.
804static void ConstantPropUsersOf(Value *V, const DataLayout *DL,
805                                TargetLibraryInfo *TLI) {
806  for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; )
807    if (Instruction *I = dyn_cast<Instruction>(*UI++))
808      if (Constant *NewC = ConstantFoldInstruction(I, DL, TLI)) {
809        I->replaceAllUsesWith(NewC);
810
811        // Advance UI to the next non-I use to avoid invalidating it!
812        // Instructions could multiply use V.
813        while (UI != E && *UI == I)
814          ++UI;
815        I->eraseFromParent();
816      }
817}
818
819/// OptimizeGlobalAddressOfMalloc - This function takes the specified global
820/// variable, and transforms the program as if it always contained the result of
821/// the specified malloc.  Because it is always the result of the specified
822/// malloc, there is no reason to actually DO the malloc.  Instead, turn the
823/// malloc into a global, and any loads of GV as uses of the new global.
824static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
825                                                     CallInst *CI,
826                                                     Type *AllocTy,
827                                                     ConstantInt *NElements,
828                                                     const DataLayout *DL,
829                                                     TargetLibraryInfo *TLI) {
830  DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << "  CALL = " << *CI << '\n');
831
832  Type *GlobalType;
833  if (NElements->getZExtValue() == 1)
834    GlobalType = AllocTy;
835  else
836    // If we have an array allocation, the global variable is of an array.
837    GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue());
838
839  // Create the new global variable.  The contents of the malloc'd memory is
840  // undefined, so initialize with an undef value.
841  GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),
842                                             GlobalType, false,
843                                             GlobalValue::InternalLinkage,
844                                             UndefValue::get(GlobalType),
845                                             GV->getName()+".body",
846                                             GV,
847                                             GV->getThreadLocalMode());
848
849  // If there are bitcast users of the malloc (which is typical, usually we have
850  // a malloc + bitcast) then replace them with uses of the new global.  Update
851  // other users to use the global as well.
852  BitCastInst *TheBC = nullptr;
853  while (!CI->use_empty()) {
854    Instruction *User = cast<Instruction>(CI->user_back());
855    if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
856      if (BCI->getType() == NewGV->getType()) {
857        BCI->replaceAllUsesWith(NewGV);
858        BCI->eraseFromParent();
859      } else {
860        BCI->setOperand(0, NewGV);
861      }
862    } else {
863      if (!TheBC)
864        TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
865      User->replaceUsesOfWith(CI, TheBC);
866    }
867  }
868
869  Constant *RepValue = NewGV;
870  if (NewGV->getType() != GV->getType()->getElementType())
871    RepValue = ConstantExpr::getBitCast(RepValue,
872                                        GV->getType()->getElementType());
873
874  // If there is a comparison against null, we will insert a global bool to
875  // keep track of whether the global was initialized yet or not.
876  GlobalVariable *InitBool =
877    new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
878                       GlobalValue::InternalLinkage,
879                       ConstantInt::getFalse(GV->getContext()),
880                       GV->getName()+".init", GV->getThreadLocalMode());
881  bool InitBoolUsed = false;
882
883  // Loop over all uses of GV, processing them in turn.
884  while (!GV->use_empty()) {
885    if (StoreInst *SI = dyn_cast<StoreInst>(GV->user_back())) {
886      // The global is initialized when the store to it occurs.
887      new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0,
888                    SI->getOrdering(), SI->getSynchScope(), SI);
889      SI->eraseFromParent();
890      continue;
891    }
892
893    LoadInst *LI = cast<LoadInst>(GV->user_back());
894    while (!LI->use_empty()) {
895      Use &LoadUse = *LI->use_begin();
896      ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser());
897      if (!ICI) {
898        LoadUse = RepValue;
899        continue;
900      }
901
902      // Replace the cmp X, 0 with a use of the bool value.
903      // Sink the load to where the compare was, if atomic rules allow us to.
904      Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", false, 0,
905                               LI->getOrdering(), LI->getSynchScope(),
906                               LI->isUnordered() ? (Instruction*)ICI : LI);
907      InitBoolUsed = true;
908      switch (ICI->getPredicate()) {
909      default: llvm_unreachable("Unknown ICmp Predicate!");
910      case ICmpInst::ICMP_ULT:
911      case ICmpInst::ICMP_SLT:   // X < null -> always false
912        LV = ConstantInt::getFalse(GV->getContext());
913        break;
914      case ICmpInst::ICMP_ULE:
915      case ICmpInst::ICMP_SLE:
916      case ICmpInst::ICMP_EQ:
917        LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
918        break;
919      case ICmpInst::ICMP_NE:
920      case ICmpInst::ICMP_UGE:
921      case ICmpInst::ICMP_SGE:
922      case ICmpInst::ICMP_UGT:
923      case ICmpInst::ICMP_SGT:
924        break;  // no change.
925      }
926      ICI->replaceAllUsesWith(LV);
927      ICI->eraseFromParent();
928    }
929    LI->eraseFromParent();
930  }
931
932  // If the initialization boolean was used, insert it, otherwise delete it.
933  if (!InitBoolUsed) {
934    while (!InitBool->use_empty())  // Delete initializations
935      cast<StoreInst>(InitBool->user_back())->eraseFromParent();
936    delete InitBool;
937  } else
938    GV->getParent()->getGlobalList().insert(GV, InitBool);
939
940  // Now the GV is dead, nuke it and the malloc..
941  GV->eraseFromParent();
942  CI->eraseFromParent();
943
944  // To further other optimizations, loop over all users of NewGV and try to
945  // constant prop them.  This will promote GEP instructions with constant
946  // indices into GEP constant-exprs, which will allow global-opt to hack on it.
947  ConstantPropUsersOf(NewGV, DL, TLI);
948  if (RepValue != NewGV)
949    ConstantPropUsersOf(RepValue, DL, TLI);
950
951  return NewGV;
952}
953
954/// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking
955/// to make sure that there are no complex uses of V.  We permit simple things
956/// like dereferencing the pointer, but not storing through the address, unless
957/// it is to the specified global.
958static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
959                                                      const GlobalVariable *GV,
960                                         SmallPtrSet<const PHINode*, 8> &PHIs) {
961  for (const User *U : V->users()) {
962    const Instruction *Inst = cast<Instruction>(U);
963
964    if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
965      continue; // Fine, ignore.
966    }
967
968    if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
969      if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
970        return false;  // Storing the pointer itself... bad.
971      continue; // Otherwise, storing through it, or storing into GV... fine.
972    }
973
974    // Must index into the array and into the struct.
975    if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) {
976      if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
977        return false;
978      continue;
979    }
980
981    if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
982      // PHIs are ok if all uses are ok.  Don't infinitely recurse through PHI
983      // cycles.
984      if (PHIs.insert(PN))
985        if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs))
986          return false;
987      continue;
988    }
989
990    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
991      if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
992        return false;
993      continue;
994    }
995
996    return false;
997  }
998  return true;
999}
1000
1001/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV
1002/// somewhere.  Transform all uses of the allocation into loads from the
1003/// global and uses of the resultant pointer.  Further, delete the store into
1004/// GV.  This assumes that these value pass the
1005/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
1006static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
1007                                          GlobalVariable *GV) {
1008  while (!Alloc->use_empty()) {
1009    Instruction *U = cast<Instruction>(*Alloc->user_begin());
1010    Instruction *InsertPt = U;
1011    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
1012      // If this is the store of the allocation into the global, remove it.
1013      if (SI->getOperand(1) == GV) {
1014        SI->eraseFromParent();
1015        continue;
1016      }
1017    } else if (PHINode *PN = dyn_cast<PHINode>(U)) {
1018      // Insert the load in the corresponding predecessor, not right before the
1019      // PHI.
1020      InsertPt = PN->getIncomingBlock(*Alloc->use_begin())->getTerminator();
1021    } else if (isa<BitCastInst>(U)) {
1022      // Must be bitcast between the malloc and store to initialize the global.
1023      ReplaceUsesOfMallocWithGlobal(U, GV);
1024      U->eraseFromParent();
1025      continue;
1026    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
1027      // If this is a "GEP bitcast" and the user is a store to the global, then
1028      // just process it as a bitcast.
1029      if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse())
1030        if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->user_back()))
1031          if (SI->getOperand(1) == GV) {
1032            // Must be bitcast GEP between the malloc and store to initialize
1033            // the global.
1034            ReplaceUsesOfMallocWithGlobal(GEPI, GV);
1035            GEPI->eraseFromParent();
1036            continue;
1037          }
1038    }
1039
1040    // Insert a load from the global, and use it instead of the malloc.
1041    Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt);
1042    U->replaceUsesOfWith(Alloc, NL);
1043  }
1044}
1045
1046/// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi
1047/// of a load) are simple enough to perform heap SRA on.  This permits GEP's
1048/// that index through the array and struct field, icmps of null, and PHIs.
1049static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
1050                        SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs,
1051                        SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) {
1052  // We permit two users of the load: setcc comparing against the null
1053  // pointer, and a getelementptr of a specific form.
1054  for (const User *U : V->users()) {
1055    const Instruction *UI = cast<Instruction>(U);
1056
1057    // Comparison against null is ok.
1058    if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UI)) {
1059      if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
1060        return false;
1061      continue;
1062    }
1063
1064    // getelementptr is also ok, but only a simple form.
1065    if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(UI)) {
1066      // Must index into the array and into the struct.
1067      if (GEPI->getNumOperands() < 3)
1068        return false;
1069
1070      // Otherwise the GEP is ok.
1071      continue;
1072    }
1073
1074    if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
1075      if (!LoadUsingPHIsPerLoad.insert(PN))
1076        // This means some phi nodes are dependent on each other.
1077        // Avoid infinite looping!
1078        return false;
1079      if (!LoadUsingPHIs.insert(PN))
1080        // If we have already analyzed this PHI, then it is safe.
1081        continue;
1082
1083      // Make sure all uses of the PHI are simple enough to transform.
1084      if (!LoadUsesSimpleEnoughForHeapSRA(PN,
1085                                          LoadUsingPHIs, LoadUsingPHIsPerLoad))
1086        return false;
1087
1088      continue;
1089    }
1090
1091    // Otherwise we don't know what this is, not ok.
1092    return false;
1093  }
1094
1095  return true;
1096}
1097
1098
1099/// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from
1100/// GV are simple enough to perform HeapSRA, return true.
1101static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
1102                                                    Instruction *StoredVal) {
1103  SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
1104  SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad;
1105  for (const User *U : GV->users())
1106    if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
1107      if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs,
1108                                          LoadUsingPHIsPerLoad))
1109        return false;
1110      LoadUsingPHIsPerLoad.clear();
1111    }
1112
1113  // If we reach here, we know that all uses of the loads and transitive uses
1114  // (through PHI nodes) are simple enough to transform.  However, we don't know
1115  // that all inputs the to the PHI nodes are in the same equivalence sets.
1116  // Check to verify that all operands of the PHIs are either PHIS that can be
1117  // transformed, loads from GV, or MI itself.
1118  for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin()
1119       , E = LoadUsingPHIs.end(); I != E; ++I) {
1120    const PHINode *PN = *I;
1121    for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
1122      Value *InVal = PN->getIncomingValue(op);
1123
1124      // PHI of the stored value itself is ok.
1125      if (InVal == StoredVal) continue;
1126
1127      if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) {
1128        // One of the PHIs in our set is (optimistically) ok.
1129        if (LoadUsingPHIs.count(InPN))
1130          continue;
1131        return false;
1132      }
1133
1134      // Load from GV is ok.
1135      if (const LoadInst *LI = dyn_cast<LoadInst>(InVal))
1136        if (LI->getOperand(0) == GV)
1137          continue;
1138
1139      // UNDEF? NULL?
1140
1141      // Anything else is rejected.
1142      return false;
1143    }
1144  }
1145
1146  return true;
1147}
1148
1149static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
1150               DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
1151                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
1152  std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
1153
1154  if (FieldNo >= FieldVals.size())
1155    FieldVals.resize(FieldNo+1);
1156
1157  // If we already have this value, just reuse the previously scalarized
1158  // version.
1159  if (Value *FieldVal = FieldVals[FieldNo])
1160    return FieldVal;
1161
1162  // Depending on what instruction this is, we have several cases.
1163  Value *Result;
1164  if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
1165    // This is a scalarized version of the load from the global.  Just create
1166    // a new Load of the scalarized global.
1167    Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo,
1168                                           InsertedScalarizedValues,
1169                                           PHIsToRewrite),
1170                          LI->getName()+".f"+Twine(FieldNo), LI);
1171  } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
1172    // PN's type is pointer to struct.  Make a new PHI of pointer to struct
1173    // field.
1174
1175    PointerType *PTy = cast<PointerType>(PN->getType());
1176    StructType *ST = cast<StructType>(PTy->getElementType());
1177
1178    unsigned AS = PTy->getAddressSpace();
1179    PHINode *NewPN =
1180      PHINode::Create(PointerType::get(ST->getElementType(FieldNo), AS),
1181                     PN->getNumIncomingValues(),
1182                     PN->getName()+".f"+Twine(FieldNo), PN);
1183    Result = NewPN;
1184    PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
1185  } else {
1186    llvm_unreachable("Unknown usable value");
1187  }
1188
1189  return FieldVals[FieldNo] = Result;
1190}
1191
1192/// RewriteHeapSROALoadUser - Given a load instruction and a value derived from
1193/// the load, rewrite the derived value to use the HeapSRoA'd load.
1194static void RewriteHeapSROALoadUser(Instruction *LoadUser,
1195             DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
1196                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
1197  // If this is a comparison against null, handle it.
1198  if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
1199    assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
1200    // If we have a setcc of the loaded pointer, we can use a setcc of any
1201    // field.
1202    Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
1203                                   InsertedScalarizedValues, PHIsToRewrite);
1204
1205    Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
1206                              Constant::getNullValue(NPtr->getType()),
1207                              SCI->getName());
1208    SCI->replaceAllUsesWith(New);
1209    SCI->eraseFromParent();
1210    return;
1211  }
1212
1213  // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
1214  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
1215    assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
1216           && "Unexpected GEPI!");
1217
1218    // Load the pointer for this field.
1219    unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
1220    Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
1221                                     InsertedScalarizedValues, PHIsToRewrite);
1222
1223    // Create the new GEP idx vector.
1224    SmallVector<Value*, 8> GEPIdx;
1225    GEPIdx.push_back(GEPI->getOperand(1));
1226    GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
1227
1228    Value *NGEPI = GetElementPtrInst::Create(NewPtr, GEPIdx,
1229                                             GEPI->getName(), GEPI);
1230    GEPI->replaceAllUsesWith(NGEPI);
1231    GEPI->eraseFromParent();
1232    return;
1233  }
1234
1235  // Recursively transform the users of PHI nodes.  This will lazily create the
1236  // PHIs that are needed for individual elements.  Keep track of what PHIs we
1237  // see in InsertedScalarizedValues so that we don't get infinite loops (very
1238  // antisocial).  If the PHI is already in InsertedScalarizedValues, it has
1239  // already been seen first by another load, so its uses have already been
1240  // processed.
1241  PHINode *PN = cast<PHINode>(LoadUser);
1242  if (!InsertedScalarizedValues.insert(std::make_pair(PN,
1243                                              std::vector<Value*>())).second)
1244    return;
1245
1246  // If this is the first time we've seen this PHI, recursively process all
1247  // users.
1248  for (auto UI = PN->user_begin(), E = PN->user_end(); UI != E;) {
1249    Instruction *User = cast<Instruction>(*UI++);
1250    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
1251  }
1252}
1253
1254/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global.  Ptr
1255/// is a value loaded from the global.  Eliminate all uses of Ptr, making them
1256/// use FieldGlobals instead.  All uses of loaded values satisfy
1257/// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
1258static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
1259               DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
1260                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
1261  for (auto UI = Load->user_begin(), E = Load->user_end(); UI != E;) {
1262    Instruction *User = cast<Instruction>(*UI++);
1263    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
1264  }
1265
1266  if (Load->use_empty()) {
1267    Load->eraseFromParent();
1268    InsertedScalarizedValues.erase(Load);
1269  }
1270}
1271
1272/// PerformHeapAllocSRoA - CI is an allocation of an array of structures.  Break
1273/// it up into multiple allocations of arrays of the fields.
1274static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
1275                                            Value *NElems, const DataLayout *DL,
1276                                            const TargetLibraryInfo *TLI) {
1277  DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *CI << '\n');
1278  Type *MAT = getMallocAllocatedType(CI, TLI);
1279  StructType *STy = cast<StructType>(MAT);
1280
1281  // There is guaranteed to be at least one use of the malloc (storing
1282  // it into GV).  If there are other uses, change them to be uses of
1283  // the global to simplify later code.  This also deletes the store
1284  // into GV.
1285  ReplaceUsesOfMallocWithGlobal(CI, GV);
1286
1287  // Okay, at this point, there are no users of the malloc.  Insert N
1288  // new mallocs at the same place as CI, and N globals.
1289  std::vector<Value*> FieldGlobals;
1290  std::vector<Value*> FieldMallocs;
1291
1292  unsigned AS = GV->getType()->getPointerAddressSpace();
1293  for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
1294    Type *FieldTy = STy->getElementType(FieldNo);
1295    PointerType *PFieldTy = PointerType::get(FieldTy, AS);
1296
1297    GlobalVariable *NGV =
1298      new GlobalVariable(*GV->getParent(),
1299                         PFieldTy, false, GlobalValue::InternalLinkage,
1300                         Constant::getNullValue(PFieldTy),
1301                         GV->getName() + ".f" + Twine(FieldNo), GV,
1302                         GV->getThreadLocalMode());
1303    FieldGlobals.push_back(NGV);
1304
1305    unsigned TypeSize = DL->getTypeAllocSize(FieldTy);
1306    if (StructType *ST = dyn_cast<StructType>(FieldTy))
1307      TypeSize = DL->getStructLayout(ST)->getSizeInBytes();
1308    Type *IntPtrTy = DL->getIntPtrType(CI->getType());
1309    Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
1310                                        ConstantInt::get(IntPtrTy, TypeSize),
1311                                        NElems, nullptr,
1312                                        CI->getName() + ".f" + Twine(FieldNo));
1313    FieldMallocs.push_back(NMI);
1314    new StoreInst(NMI, NGV, CI);
1315  }
1316
1317  // The tricky aspect of this transformation is handling the case when malloc
1318  // fails.  In the original code, malloc failing would set the result pointer
1319  // of malloc to null.  In this case, some mallocs could succeed and others
1320  // could fail.  As such, we emit code that looks like this:
1321  //    F0 = malloc(field0)
1322  //    F1 = malloc(field1)
1323  //    F2 = malloc(field2)
1324  //    if (F0 == 0 || F1 == 0 || F2 == 0) {
1325  //      if (F0) { free(F0); F0 = 0; }
1326  //      if (F1) { free(F1); F1 = 0; }
1327  //      if (F2) { free(F2); F2 = 0; }
1328  //    }
1329  // The malloc can also fail if its argument is too large.
1330  Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0);
1331  Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0),
1332                                  ConstantZero, "isneg");
1333  for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
1334    Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
1335                             Constant::getNullValue(FieldMallocs[i]->getType()),
1336                               "isnull");
1337    RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
1338  }
1339
1340  // Split the basic block at the old malloc.
1341  BasicBlock *OrigBB = CI->getParent();
1342  BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont");
1343
1344  // Create the block to check the first condition.  Put all these blocks at the
1345  // end of the function as they are unlikely to be executed.
1346  BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
1347                                                "malloc_ret_null",
1348                                                OrigBB->getParent());
1349
1350  // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
1351  // branch on RunningOr.
1352  OrigBB->getTerminator()->eraseFromParent();
1353  BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
1354
1355  // Within the NullPtrBlock, we need to emit a comparison and branch for each
1356  // pointer, because some may be null while others are not.
1357  for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
1358    Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
1359    Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
1360                              Constant::getNullValue(GVVal->getType()));
1361    BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
1362                                               OrigBB->getParent());
1363    BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next",
1364                                               OrigBB->getParent());
1365    Instruction *BI = BranchInst::Create(FreeBlock, NextBlock,
1366                                         Cmp, NullPtrBlock);
1367
1368    // Fill in FreeBlock.
1369    CallInst::CreateFree(GVVal, BI);
1370    new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
1371                  FreeBlock);
1372    BranchInst::Create(NextBlock, FreeBlock);
1373
1374    NullPtrBlock = NextBlock;
1375  }
1376
1377  BranchInst::Create(ContBB, NullPtrBlock);
1378
1379  // CI is no longer needed, remove it.
1380  CI->eraseFromParent();
1381
1382  /// InsertedScalarizedLoads - As we process loads, if we can't immediately
1383  /// update all uses of the load, keep track of what scalarized loads are
1384  /// inserted for a given load.
1385  DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
1386  InsertedScalarizedValues[GV] = FieldGlobals;
1387
1388  std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
1389
1390  // Okay, the malloc site is completely handled.  All of the uses of GV are now
1391  // loads, and all uses of those loads are simple.  Rewrite them to use loads
1392  // of the per-field globals instead.
1393  for (auto UI = GV->user_begin(), E = GV->user_end(); UI != E;) {
1394    Instruction *User = cast<Instruction>(*UI++);
1395
1396    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
1397      RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
1398      continue;
1399    }
1400
1401    // Must be a store of null.
1402    StoreInst *SI = cast<StoreInst>(User);
1403    assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
1404           "Unexpected heap-sra user!");
1405
1406    // Insert a store of null into each global.
1407    for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
1408      PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
1409      Constant *Null = Constant::getNullValue(PT->getElementType());
1410      new StoreInst(Null, FieldGlobals[i], SI);
1411    }
1412    // Erase the original store.
1413    SI->eraseFromParent();
1414  }
1415
1416  // While we have PHIs that are interesting to rewrite, do it.
1417  while (!PHIsToRewrite.empty()) {
1418    PHINode *PN = PHIsToRewrite.back().first;
1419    unsigned FieldNo = PHIsToRewrite.back().second;
1420    PHIsToRewrite.pop_back();
1421    PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
1422    assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi");
1423
1424    // Add all the incoming values.  This can materialize more phis.
1425    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1426      Value *InVal = PN->getIncomingValue(i);
1427      InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
1428                               PHIsToRewrite);
1429      FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
1430    }
1431  }
1432
1433  // Drop all inter-phi links and any loads that made it this far.
1434  for (DenseMap<Value*, std::vector<Value*> >::iterator
1435       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
1436       I != E; ++I) {
1437    if (PHINode *PN = dyn_cast<PHINode>(I->first))
1438      PN->dropAllReferences();
1439    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
1440      LI->dropAllReferences();
1441  }
1442
1443  // Delete all the phis and loads now that inter-references are dead.
1444  for (DenseMap<Value*, std::vector<Value*> >::iterator
1445       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
1446       I != E; ++I) {
1447    if (PHINode *PN = dyn_cast<PHINode>(I->first))
1448      PN->eraseFromParent();
1449    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
1450      LI->eraseFromParent();
1451  }
1452
1453  // The old global is now dead, remove it.
1454  GV->eraseFromParent();
1455
1456  ++NumHeapSRA;
1457  return cast<GlobalVariable>(FieldGlobals[0]);
1458}
1459
1460/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a
1461/// pointer global variable with a single value stored it that is a malloc or
1462/// cast of malloc.
1463static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
1464                                               CallInst *CI,
1465                                               Type *AllocTy,
1466                                               AtomicOrdering Ordering,
1467                                               Module::global_iterator &GVI,
1468                                               const DataLayout *DL,
1469                                               TargetLibraryInfo *TLI) {
1470  if (!DL)
1471    return false;
1472
1473  // If this is a malloc of an abstract type, don't touch it.
1474  if (!AllocTy->isSized())
1475    return false;
1476
1477  // We can't optimize this global unless all uses of it are *known* to be
1478  // of the malloc value, not of the null initializer value (consider a use
1479  // that compares the global's value against zero to see if the malloc has
1480  // been reached).  To do this, we check to see if all uses of the global
1481  // would trap if the global were null: this proves that they must all
1482  // happen after the malloc.
1483  if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
1484    return false;
1485
1486  // We can't optimize this if the malloc itself is used in a complex way,
1487  // for example, being stored into multiple globals.  This allows the
1488  // malloc to be stored into the specified global, loaded icmp'd, and
1489  // GEP'd.  These are all things we could transform to using the global
1490  // for.
1491  SmallPtrSet<const PHINode*, 8> PHIs;
1492  if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
1493    return false;
1494
1495  // If we have a global that is only initialized with a fixed size malloc,
1496  // transform the program to use global memory instead of malloc'd memory.
1497  // This eliminates dynamic allocation, avoids an indirection accessing the
1498  // data, and exposes the resultant global to further GlobalOpt.
1499  // We cannot optimize the malloc if we cannot determine malloc array size.
1500  Value *NElems = getMallocArraySize(CI, DL, TLI, true);
1501  if (!NElems)
1502    return false;
1503
1504  if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
1505    // Restrict this transformation to only working on small allocations
1506    // (2048 bytes currently), as we don't want to introduce a 16M global or
1507    // something.
1508    if (NElements->getZExtValue() * DL->getTypeAllocSize(AllocTy) < 2048) {
1509      GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI);
1510      return true;
1511    }
1512
1513  // If the allocation is an array of structures, consider transforming this
1514  // into multiple malloc'd arrays, one for each field.  This is basically
1515  // SRoA for malloc'd memory.
1516
1517  if (Ordering != NotAtomic)
1518    return false;
1519
1520  // If this is an allocation of a fixed size array of structs, analyze as a
1521  // variable size array.  malloc [100 x struct],1 -> malloc struct, 100
1522  if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
1523    if (ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
1524      AllocTy = AT->getElementType();
1525
1526  StructType *AllocSTy = dyn_cast<StructType>(AllocTy);
1527  if (!AllocSTy)
1528    return false;
1529
1530  // This the structure has an unreasonable number of fields, leave it
1531  // alone.
1532  if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
1533      AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {
1534
1535    // If this is a fixed size array, transform the Malloc to be an alloc of
1536    // structs.  malloc [100 x struct],1 -> malloc struct, 100
1537    if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
1538      Type *IntPtrTy = DL->getIntPtrType(CI->getType());
1539      unsigned TypeSize = DL->getStructLayout(AllocSTy)->getSizeInBytes();
1540      Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
1541      Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
1542      Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
1543                                                   AllocSize, NumElements,
1544                                                   nullptr, CI->getName());
1545      Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
1546      CI->replaceAllUsesWith(Cast);
1547      CI->eraseFromParent();
1548      if (BitCastInst *BCI = dyn_cast<BitCastInst>(Malloc))
1549        CI = cast<CallInst>(BCI->getOperand(0));
1550      else
1551        CI = cast<CallInst>(Malloc);
1552    }
1553
1554    GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true),
1555                               DL, TLI);
1556    return true;
1557  }
1558
1559  return false;
1560}
1561
1562// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
1563// that only one value (besides its initializer) is ever stored to the global.
1564static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
1565                                     AtomicOrdering Ordering,
1566                                     Module::global_iterator &GVI,
1567                                     const DataLayout *DL,
1568                                     TargetLibraryInfo *TLI) {
1569  // Ignore no-op GEPs and bitcasts.
1570  StoredOnceVal = StoredOnceVal->stripPointerCasts();
1571
1572  // If we are dealing with a pointer global that is initialized to null and
1573  // only has one (non-null) value stored into it, then we can optimize any
1574  // users of the loaded value (often calls and loads) that would trap if the
1575  // value was null.
1576  if (GV->getInitializer()->getType()->isPointerTy() &&
1577      GV->getInitializer()->isNullValue()) {
1578    if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
1579      if (GV->getInitializer()->getType() != SOVC->getType())
1580        SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
1581
1582      // Optimize away any trapping uses of the loaded value.
1583      if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, TLI))
1584        return true;
1585    } else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) {
1586      Type *MallocType = getMallocAllocatedType(CI, TLI);
1587      if (MallocType &&
1588          TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, Ordering, GVI,
1589                                             DL, TLI))
1590        return true;
1591    }
1592  }
1593
1594  return false;
1595}
1596
1597/// TryToShrinkGlobalToBoolean - At this point, we have learned that the only
1598/// two values ever stored into GV are its initializer and OtherVal.  See if we
1599/// can shrink the global into a boolean and select between the two values
1600/// whenever it is used.  This exposes the values to other scalar optimizations.
1601static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
1602  Type *GVElType = GV->getType()->getElementType();
1603
1604  // If GVElType is already i1, it is already shrunk.  If the type of the GV is
1605  // an FP value, pointer or vector, don't do this optimization because a select
1606  // between them is very expensive and unlikely to lead to later
1607  // simplification.  In these cases, we typically end up with "cond ? v1 : v2"
1608  // where v1 and v2 both require constant pool loads, a big loss.
1609  if (GVElType == Type::getInt1Ty(GV->getContext()) ||
1610      GVElType->isFloatingPointTy() ||
1611      GVElType->isPointerTy() || GVElType->isVectorTy())
1612    return false;
1613
1614  // Walk the use list of the global seeing if all the uses are load or store.
1615  // If there is anything else, bail out.
1616  for (User *U : GV->users())
1617    if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
1618      return false;
1619
1620  DEBUG(dbgs() << "   *** SHRINKING TO BOOL: " << *GV);
1621
1622  // Create the new global, initializing it to false.
1623  GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
1624                                             false,
1625                                             GlobalValue::InternalLinkage,
1626                                        ConstantInt::getFalse(GV->getContext()),
1627                                             GV->getName()+".b",
1628                                             GV->getThreadLocalMode(),
1629                                             GV->getType()->getAddressSpace());
1630  GV->getParent()->getGlobalList().insert(GV, NewGV);
1631
1632  Constant *InitVal = GV->getInitializer();
1633  assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
1634         "No reason to shrink to bool!");
1635
1636  // If initialized to zero and storing one into the global, we can use a cast
1637  // instead of a select to synthesize the desired value.
1638  bool IsOneZero = false;
1639  if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal))
1640    IsOneZero = InitVal->isNullValue() && CI->isOne();
1641
1642  while (!GV->use_empty()) {
1643    Instruction *UI = cast<Instruction>(GV->user_back());
1644    if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
1645      // Change the store into a boolean store.
1646      bool StoringOther = SI->getOperand(0) == OtherVal;
1647      // Only do this if we weren't storing a loaded value.
1648      Value *StoreVal;
1649      if (StoringOther || SI->getOperand(0) == InitVal) {
1650        StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
1651                                    StoringOther);
1652      } else {
1653        // Otherwise, we are storing a previously loaded copy.  To do this,
1654        // change the copy from copying the original value to just copying the
1655        // bool.
1656        Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
1657
1658        // If we've already replaced the input, StoredVal will be a cast or
1659        // select instruction.  If not, it will be a load of the original
1660        // global.
1661        if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
1662          assert(LI->getOperand(0) == GV && "Not a copy!");
1663          // Insert a new load, to preserve the saved value.
1664          StoreVal = new LoadInst(NewGV, LI->getName()+".b", false, 0,
1665                                  LI->getOrdering(), LI->getSynchScope(), LI);
1666        } else {
1667          assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
1668                 "This is not a form that we understand!");
1669          StoreVal = StoredVal->getOperand(0);
1670          assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
1671        }
1672      }
1673      new StoreInst(StoreVal, NewGV, false, 0,
1674                    SI->getOrdering(), SI->getSynchScope(), SI);
1675    } else {
1676      // Change the load into a load of bool then a select.
1677      LoadInst *LI = cast<LoadInst>(UI);
1678      LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", false, 0,
1679                                   LI->getOrdering(), LI->getSynchScope(), LI);
1680      Value *NSI;
1681      if (IsOneZero)
1682        NSI = new ZExtInst(NLI, LI->getType(), "", LI);
1683      else
1684        NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI);
1685      NSI->takeName(LI);
1686      LI->replaceAllUsesWith(NSI);
1687    }
1688    UI->eraseFromParent();
1689  }
1690
1691  // Retain the name of the old global variable. People who are debugging their
1692  // programs may expect these variables to be named the same.
1693  NewGV->takeName(GV);
1694  GV->eraseFromParent();
1695  return true;
1696}
1697
1698
1699/// ProcessGlobal - Analyze the specified global variable and optimize it if
1700/// possible.  If we make a change, return true.
1701bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
1702                              Module::global_iterator &GVI) {
1703  // Do more involved optimizations if the global is internal.
1704  GV->removeDeadConstantUsers();
1705
1706  if (GV->use_empty()) {
1707    DEBUG(dbgs() << "GLOBAL DEAD: " << *GV);
1708    GV->eraseFromParent();
1709    ++NumDeleted;
1710    return true;
1711  }
1712
1713  if (!GV->hasLocalLinkage())
1714    return false;
1715
1716  GlobalStatus GS;
1717
1718  if (GlobalStatus::analyzeGlobal(GV, GS))
1719    return false;
1720
1721  if (!GS.IsCompared && !GV->hasUnnamedAddr()) {
1722    GV->setUnnamedAddr(true);
1723    NumUnnamed++;
1724  }
1725
1726  if (GV->isConstant() || !GV->hasInitializer())
1727    return false;
1728
1729  return ProcessInternalGlobal(GV, GVI, GS);
1730}
1731
1732/// ProcessInternalGlobal - Analyze the specified global variable and optimize
1733/// it if possible.  If we make a change, return true.
1734bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
1735                                      Module::global_iterator &GVI,
1736                                      const GlobalStatus &GS) {
1737  // If this is a first class global and has only one accessing function
1738  // and this function is main (which we know is not recursive), we replace
1739  // the global with a local alloca in this function.
1740  //
1741  // NOTE: It doesn't make sense to promote non-single-value types since we
1742  // are just replacing static memory to stack memory.
1743  //
1744  // If the global is in different address space, don't bring it to stack.
1745  if (!GS.HasMultipleAccessingFunctions &&
1746      GS.AccessingFunction && !GS.HasNonInstructionUser &&
1747      GV->getType()->getElementType()->isSingleValueType() &&
1748      GS.AccessingFunction->getName() == "main" &&
1749      GS.AccessingFunction->hasExternalLinkage() &&
1750      GV->getType()->getAddressSpace() == 0) {
1751    DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
1752    Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
1753                                                   ->getEntryBlock().begin());
1754    Type *ElemTy = GV->getType()->getElementType();
1755    // FIXME: Pass Global's alignment when globals have alignment
1756    AllocaInst *Alloca = new AllocaInst(ElemTy, nullptr,
1757                                        GV->getName(), &FirstI);
1758    if (!isa<UndefValue>(GV->getInitializer()))
1759      new StoreInst(GV->getInitializer(), Alloca, &FirstI);
1760
1761    GV->replaceAllUsesWith(Alloca);
1762    GV->eraseFromParent();
1763    ++NumLocalized;
1764    return true;
1765  }
1766
1767  // If the global is never loaded (but may be stored to), it is dead.
1768  // Delete it now.
1769  if (!GS.IsLoaded) {
1770    DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
1771
1772    bool Changed;
1773    if (isLeakCheckerRoot(GV)) {
1774      // Delete any constant stores to the global.
1775      Changed = CleanupPointerRootUsers(GV, TLI);
1776    } else {
1777      // Delete any stores we can find to the global.  We may not be able to
1778      // make it completely dead though.
1779      Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
1780    }
1781
1782    // If the global is dead now, delete it.
1783    if (GV->use_empty()) {
1784      GV->eraseFromParent();
1785      ++NumDeleted;
1786      Changed = true;
1787    }
1788    return Changed;
1789
1790  } else if (GS.StoredType <= GlobalStatus::InitializerStored) {
1791    DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
1792    GV->setConstant(true);
1793
1794    // Clean up any obviously simplifiable users now.
1795    CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
1796
1797    // If the global is dead now, just nuke it.
1798    if (GV->use_empty()) {
1799      DEBUG(dbgs() << "   *** Marking constant allowed us to simplify "
1800            << "all users and delete global!\n");
1801      GV->eraseFromParent();
1802      ++NumDeleted;
1803    }
1804
1805    ++NumMarked;
1806    return true;
1807  } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
1808    if (DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>()) {
1809      const DataLayout &DL = DLP->getDataLayout();
1810      if (GlobalVariable *FirstNewGV = SRAGlobal(GV, DL)) {
1811        GVI = FirstNewGV;  // Don't skip the newly produced globals!
1812        return true;
1813      }
1814    }
1815  } else if (GS.StoredType == GlobalStatus::StoredOnce) {
1816    // If the initial value for the global was an undef value, and if only
1817    // one other value was stored into it, we can just change the
1818    // initializer to be the stored value, then delete all stores to the
1819    // global.  This allows us to mark it constant.
1820    if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
1821      if (isa<UndefValue>(GV->getInitializer())) {
1822        // Change the initial value here.
1823        GV->setInitializer(SOVConstant);
1824
1825        // Clean up any obviously simplifiable users now.
1826        CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
1827
1828        if (GV->use_empty()) {
1829          DEBUG(dbgs() << "   *** Substituting initializer allowed us to "
1830                       << "simplify all users and delete global!\n");
1831          GV->eraseFromParent();
1832          ++NumDeleted;
1833        } else {
1834          GVI = GV;
1835        }
1836        ++NumSubstitute;
1837        return true;
1838      }
1839
1840    // Try to optimize globals based on the knowledge that only one value
1841    // (besides its initializer) is ever stored to the global.
1842    if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, GVI,
1843                                 DL, TLI))
1844      return true;
1845
1846    // Otherwise, if the global was not a boolean, we can shrink it to be a
1847    // boolean.
1848    if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) {
1849      if (GS.Ordering == NotAtomic) {
1850        if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
1851          ++NumShrunkToBool;
1852          return true;
1853        }
1854      }
1855    }
1856  }
1857
1858  return false;
1859}
1860
1861/// ChangeCalleesToFastCall - Walk all of the direct calls of the specified
1862/// function, changing them to FastCC.
1863static void ChangeCalleesToFastCall(Function *F) {
1864  for (User *U : F->users()) {
1865    if (isa<BlockAddress>(U))
1866      continue;
1867    CallSite CS(cast<Instruction>(U));
1868    CS.setCallingConv(CallingConv::Fast);
1869  }
1870}
1871
1872static AttributeSet StripNest(LLVMContext &C, const AttributeSet &Attrs) {
1873  for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
1874    unsigned Index = Attrs.getSlotIndex(i);
1875    if (!Attrs.getSlotAttributes(i).hasAttribute(Index, Attribute::Nest))
1876      continue;
1877
1878    // There can be only one.
1879    return Attrs.removeAttribute(C, Index, Attribute::Nest);
1880  }
1881
1882  return Attrs;
1883}
1884
1885static void RemoveNestAttribute(Function *F) {
1886  F->setAttributes(StripNest(F->getContext(), F->getAttributes()));
1887  for (User *U : F->users()) {
1888    if (isa<BlockAddress>(U))
1889      continue;
1890    CallSite CS(cast<Instruction>(U));
1891    CS.setAttributes(StripNest(F->getContext(), CS.getAttributes()));
1892  }
1893}
1894
1895/// Return true if this is a calling convention that we'd like to change.  The
1896/// idea here is that we don't want to mess with the convention if the user
1897/// explicitly requested something with performance implications like coldcc,
1898/// GHC, or anyregcc.
1899static bool isProfitableToMakeFastCC(Function *F) {
1900  CallingConv::ID CC = F->getCallingConv();
1901  // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc?
1902  return CC == CallingConv::C || CC == CallingConv::X86_ThisCall;
1903}
1904
1905bool GlobalOpt::OptimizeFunctions(Module &M) {
1906  bool Changed = false;
1907  // Optimize functions.
1908  for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
1909    Function *F = FI++;
1910    // Functions without names cannot be referenced outside this module.
1911    if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage())
1912      F->setLinkage(GlobalValue::InternalLinkage);
1913    F->removeDeadConstantUsers();
1914    if (F->isDefTriviallyDead()) {
1915      F->eraseFromParent();
1916      Changed = true;
1917      ++NumFnDeleted;
1918    } else if (F->hasLocalLinkage()) {
1919      if (isProfitableToMakeFastCC(F) && !F->isVarArg() &&
1920          !F->hasAddressTaken()) {
1921        // If this function has a calling convention worth changing, is not a
1922        // varargs function, and is only called directly, promote it to use the
1923        // Fast calling convention.
1924        F->setCallingConv(CallingConv::Fast);
1925        ChangeCalleesToFastCall(F);
1926        ++NumFastCallFns;
1927        Changed = true;
1928      }
1929
1930      if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
1931          !F->hasAddressTaken()) {
1932        // The function is not used by a trampoline intrinsic, so it is safe
1933        // to remove the 'nest' attribute.
1934        RemoveNestAttribute(F);
1935        ++NumNestRemoved;
1936        Changed = true;
1937      }
1938    }
1939  }
1940  return Changed;
1941}
1942
1943bool GlobalOpt::OptimizeGlobalVars(Module &M) {
1944  bool Changed = false;
1945
1946  SmallSet<const Comdat *, 8> NotDiscardableComdats;
1947  for (const GlobalVariable &GV : M.globals())
1948    if (const Comdat *C = GV.getComdat())
1949      if (!GV.isDiscardableIfUnused())
1950        NotDiscardableComdats.insert(C);
1951
1952  for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
1953       GVI != E; ) {
1954    GlobalVariable *GV = GVI++;
1955    // Global variables without names cannot be referenced outside this module.
1956    if (!GV->hasName() && !GV->isDeclaration() && !GV->hasLocalLinkage())
1957      GV->setLinkage(GlobalValue::InternalLinkage);
1958    // Simplify the initializer.
1959    if (GV->hasInitializer())
1960      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) {
1961        Constant *New = ConstantFoldConstantExpression(CE, DL, TLI);
1962        if (New && New != CE)
1963          GV->setInitializer(New);
1964      }
1965
1966    if (GV->isDiscardableIfUnused()) {
1967      if (const Comdat *C = GV->getComdat())
1968        if (NotDiscardableComdats.count(C))
1969          continue;
1970      Changed |= ProcessGlobal(GV, GVI);
1971    }
1972  }
1973  return Changed;
1974}
1975
1976static inline bool
1977isSimpleEnoughValueToCommit(Constant *C,
1978                            SmallPtrSet<Constant*, 8> &SimpleConstants,
1979                            const DataLayout *DL);
1980
1981
1982/// isSimpleEnoughValueToCommit - Return true if the specified constant can be
1983/// handled by the code generator.  We don't want to generate something like:
1984///   void *X = &X/42;
1985/// because the code generator doesn't have a relocation that can handle that.
1986///
1987/// This function should be called if C was not found (but just got inserted)
1988/// in SimpleConstants to avoid having to rescan the same constants all the
1989/// time.
1990static bool isSimpleEnoughValueToCommitHelper(Constant *C,
1991                                   SmallPtrSet<Constant*, 8> &SimpleConstants,
1992                                   const DataLayout *DL) {
1993  // Simple global addresses are supported, do not allow dllimport or
1994  // thread-local globals.
1995  if (auto *GV = dyn_cast<GlobalValue>(C))
1996    return !GV->hasDLLImportStorageClass() && !GV->isThreadLocal();
1997
1998  // Simple integer, undef, constant aggregate zero, etc are all supported.
1999  if (C->getNumOperands() == 0 || isa<BlockAddress>(C))
2000    return true;
2001
2002  // Aggregate values are safe if all their elements are.
2003  if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
2004      isa<ConstantVector>(C)) {
2005    for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
2006      Constant *Op = cast<Constant>(C->getOperand(i));
2007      if (!isSimpleEnoughValueToCommit(Op, SimpleConstants, DL))
2008        return false;
2009    }
2010    return true;
2011  }
2012
2013  // We don't know exactly what relocations are allowed in constant expressions,
2014  // so we allow &global+constantoffset, which is safe and uniformly supported
2015  // across targets.
2016  ConstantExpr *CE = cast<ConstantExpr>(C);
2017  switch (CE->getOpcode()) {
2018  case Instruction::BitCast:
2019    // Bitcast is fine if the casted value is fine.
2020    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
2021
2022  case Instruction::IntToPtr:
2023  case Instruction::PtrToInt:
2024    // int <=> ptr is fine if the int type is the same size as the
2025    // pointer type.
2026    if (!DL || DL->getTypeSizeInBits(CE->getType()) !=
2027               DL->getTypeSizeInBits(CE->getOperand(0)->getType()))
2028      return false;
2029    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
2030
2031  // GEP is fine if it is simple + constant offset.
2032  case Instruction::GetElementPtr:
2033    for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
2034      if (!isa<ConstantInt>(CE->getOperand(i)))
2035        return false;
2036    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
2037
2038  case Instruction::Add:
2039    // We allow simple+cst.
2040    if (!isa<ConstantInt>(CE->getOperand(1)))
2041      return false;
2042    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
2043  }
2044  return false;
2045}
2046
2047static inline bool
2048isSimpleEnoughValueToCommit(Constant *C,
2049                            SmallPtrSet<Constant*, 8> &SimpleConstants,
2050                            const DataLayout *DL) {
2051  // If we already checked this constant, we win.
2052  if (!SimpleConstants.insert(C)) return true;
2053  // Check the constant.
2054  return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, DL);
2055}
2056
2057
2058/// isSimpleEnoughPointerToCommit - Return true if this constant is simple
2059/// enough for us to understand.  In particular, if it is a cast to anything
2060/// other than from one pointer type to another pointer type, we punt.
2061/// We basically just support direct accesses to globals and GEP's of
2062/// globals.  This should be kept up to date with CommitValueTo.
2063static bool isSimpleEnoughPointerToCommit(Constant *C) {
2064  // Conservatively, avoid aggregate types. This is because we don't
2065  // want to worry about them partially overlapping other stores.
2066  if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType())
2067    return false;
2068
2069  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
2070    // Do not allow weak/*_odr/linkonce linkage or external globals.
2071    return GV->hasUniqueInitializer();
2072
2073  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
2074    // Handle a constantexpr gep.
2075    if (CE->getOpcode() == Instruction::GetElementPtr &&
2076        isa<GlobalVariable>(CE->getOperand(0)) &&
2077        cast<GEPOperator>(CE)->isInBounds()) {
2078      GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
2079      // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
2080      // external globals.
2081      if (!GV->hasUniqueInitializer())
2082        return false;
2083
2084      // The first index must be zero.
2085      ConstantInt *CI = dyn_cast<ConstantInt>(*std::next(CE->op_begin()));
2086      if (!CI || !CI->isZero()) return false;
2087
2088      // The remaining indices must be compile-time known integers within the
2089      // notional bounds of the corresponding static array types.
2090      if (!CE->isGEPWithNoNotionalOverIndexing())
2091        return false;
2092
2093      return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
2094
2095    // A constantexpr bitcast from a pointer to another pointer is a no-op,
2096    // and we know how to evaluate it by moving the bitcast from the pointer
2097    // operand to the value operand.
2098    } else if (CE->getOpcode() == Instruction::BitCast &&
2099               isa<GlobalVariable>(CE->getOperand(0))) {
2100      // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
2101      // external globals.
2102      return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer();
2103    }
2104  }
2105
2106  return false;
2107}
2108
2109/// EvaluateStoreInto - Evaluate a piece of a constantexpr store into a global
2110/// initializer.  This returns 'Init' modified to reflect 'Val' stored into it.
2111/// At this point, the GEP operands of Addr [0, OpNo) have been stepped into.
2112static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
2113                                   ConstantExpr *Addr, unsigned OpNo) {
2114  // Base case of the recursion.
2115  if (OpNo == Addr->getNumOperands()) {
2116    assert(Val->getType() == Init->getType() && "Type mismatch!");
2117    return Val;
2118  }
2119
2120  SmallVector<Constant*, 32> Elts;
2121  if (StructType *STy = dyn_cast<StructType>(Init->getType())) {
2122    // Break up the constant into its elements.
2123    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
2124      Elts.push_back(Init->getAggregateElement(i));
2125
2126    // Replace the element that we are supposed to.
2127    ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
2128    unsigned Idx = CU->getZExtValue();
2129    assert(Idx < STy->getNumElements() && "Struct index out of range!");
2130    Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
2131
2132    // Return the modified struct.
2133    return ConstantStruct::get(STy, Elts);
2134  }
2135
2136  ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
2137  SequentialType *InitTy = cast<SequentialType>(Init->getType());
2138
2139  uint64_t NumElts;
2140  if (ArrayType *ATy = dyn_cast<ArrayType>(InitTy))
2141    NumElts = ATy->getNumElements();
2142  else
2143    NumElts = InitTy->getVectorNumElements();
2144
2145  // Break up the array into elements.
2146  for (uint64_t i = 0, e = NumElts; i != e; ++i)
2147    Elts.push_back(Init->getAggregateElement(i));
2148
2149  assert(CI->getZExtValue() < NumElts);
2150  Elts[CI->getZExtValue()] =
2151    EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
2152
2153  if (Init->getType()->isArrayTy())
2154    return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
2155  return ConstantVector::get(Elts);
2156}
2157
2158/// CommitValueTo - We have decided that Addr (which satisfies the predicate
2159/// isSimpleEnoughPointerToCommit) should get Val as its value.  Make it happen.
2160static void CommitValueTo(Constant *Val, Constant *Addr) {
2161  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
2162    assert(GV->hasInitializer());
2163    GV->setInitializer(Val);
2164    return;
2165  }
2166
2167  ConstantExpr *CE = cast<ConstantExpr>(Addr);
2168  GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
2169  GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2));
2170}
2171
2172namespace {
2173
2174/// Evaluator - This class evaluates LLVM IR, producing the Constant
2175/// representing each SSA instruction.  Changes to global variables are stored
2176/// in a mapping that can be iterated over after the evaluation is complete.
2177/// Once an evaluation call fails, the evaluation object should not be reused.
2178class Evaluator {
2179public:
2180  Evaluator(const DataLayout *DL, const TargetLibraryInfo *TLI)
2181    : DL(DL), TLI(TLI) {
2182    ValueStack.emplace_back();
2183  }
2184
2185  ~Evaluator() {
2186    for (auto &Tmp : AllocaTmps)
2187      // If there are still users of the alloca, the program is doing something
2188      // silly, e.g. storing the address of the alloca somewhere and using it
2189      // later.  Since this is undefined, we'll just make it be null.
2190      if (!Tmp->use_empty())
2191        Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
2192  }
2193
2194  /// EvaluateFunction - Evaluate a call to function F, returning true if
2195  /// successful, false if we can't evaluate it.  ActualArgs contains the formal
2196  /// arguments for the function.
2197  bool EvaluateFunction(Function *F, Constant *&RetVal,
2198                        const SmallVectorImpl<Constant*> &ActualArgs);
2199
2200  /// EvaluateBlock - Evaluate all instructions in block BB, returning true if
2201  /// successful, false if we can't evaluate it.  NewBB returns the next BB that
2202  /// control flows into, or null upon return.
2203  bool EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB);
2204
2205  Constant *getVal(Value *V) {
2206    if (Constant *CV = dyn_cast<Constant>(V)) return CV;
2207    Constant *R = ValueStack.back().lookup(V);
2208    assert(R && "Reference to an uncomputed value!");
2209    return R;
2210  }
2211
2212  void setVal(Value *V, Constant *C) {
2213    ValueStack.back()[V] = C;
2214  }
2215
2216  const DenseMap<Constant*, Constant*> &getMutatedMemory() const {
2217    return MutatedMemory;
2218  }
2219
2220  const SmallPtrSet<GlobalVariable*, 8> &getInvariants() const {
2221    return Invariants;
2222  }
2223
2224private:
2225  Constant *ComputeLoadResult(Constant *P);
2226
2227  /// ValueStack - As we compute SSA register values, we store their contents
2228  /// here. The back of the deque contains the current function and the stack
2229  /// contains the values in the calling frames.
2230  std::deque<DenseMap<Value*, Constant*>> ValueStack;
2231
2232  /// CallStack - This is used to detect recursion.  In pathological situations
2233  /// we could hit exponential behavior, but at least there is nothing
2234  /// unbounded.
2235  SmallVector<Function*, 4> CallStack;
2236
2237  /// MutatedMemory - For each store we execute, we update this map.  Loads
2238  /// check this to get the most up-to-date value.  If evaluation is successful,
2239  /// this state is committed to the process.
2240  DenseMap<Constant*, Constant*> MutatedMemory;
2241
2242  /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable
2243  /// to represent its body.  This vector is needed so we can delete the
2244  /// temporary globals when we are done.
2245  SmallVector<std::unique_ptr<GlobalVariable>, 32> AllocaTmps;
2246
2247  /// Invariants - These global variables have been marked invariant by the
2248  /// static constructor.
2249  SmallPtrSet<GlobalVariable*, 8> Invariants;
2250
2251  /// SimpleConstants - These are constants we have checked and know to be
2252  /// simple enough to live in a static initializer of a global.
2253  SmallPtrSet<Constant*, 8> SimpleConstants;
2254
2255  const DataLayout *DL;
2256  const TargetLibraryInfo *TLI;
2257};
2258
2259}  // anonymous namespace
2260
2261/// ComputeLoadResult - Return the value that would be computed by a load from
2262/// P after the stores reflected by 'memory' have been performed.  If we can't
2263/// decide, return null.
2264Constant *Evaluator::ComputeLoadResult(Constant *P) {
2265  // If this memory location has been recently stored, use the stored value: it
2266  // is the most up-to-date.
2267  DenseMap<Constant*, Constant*>::const_iterator I = MutatedMemory.find(P);
2268  if (I != MutatedMemory.end()) return I->second;
2269
2270  // Access it.
2271  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
2272    if (GV->hasDefinitiveInitializer())
2273      return GV->getInitializer();
2274    return nullptr;
2275  }
2276
2277  // Handle a constantexpr getelementptr.
2278  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P))
2279    if (CE->getOpcode() == Instruction::GetElementPtr &&
2280        isa<GlobalVariable>(CE->getOperand(0))) {
2281      GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
2282      if (GV->hasDefinitiveInitializer())
2283        return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
2284    }
2285
2286  return nullptr;  // don't know how to evaluate.
2287}
2288
2289/// EvaluateBlock - Evaluate all instructions in block BB, returning true if
2290/// successful, false if we can't evaluate it.  NewBB returns the next BB that
2291/// control flows into, or null upon return.
2292bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
2293                              BasicBlock *&NextBB) {
2294  // This is the main evaluation loop.
2295  while (1) {
2296    Constant *InstResult = nullptr;
2297
2298    DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n");
2299
2300    if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
2301      if (!SI->isSimple()) {
2302        DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n");
2303        return false;  // no volatile/atomic accesses.
2304      }
2305      Constant *Ptr = getVal(SI->getOperand(1));
2306      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
2307        DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr);
2308        Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
2309        DEBUG(dbgs() << "; To: " << *Ptr << "\n");
2310      }
2311      if (!isSimpleEnoughPointerToCommit(Ptr)) {
2312        // If this is too complex for us to commit, reject it.
2313        DEBUG(dbgs() << "Pointer is too complex for us to evaluate store.");
2314        return false;
2315      }
2316
2317      Constant *Val = getVal(SI->getOperand(0));
2318
2319      // If this might be too difficult for the backend to handle (e.g. the addr
2320      // of one global variable divided by another) then we can't commit it.
2321      if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, DL)) {
2322        DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val
2323              << "\n");
2324        return false;
2325      }
2326
2327      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
2328        if (CE->getOpcode() == Instruction::BitCast) {
2329          DEBUG(dbgs() << "Attempting to resolve bitcast on constant ptr.\n");
2330          // If we're evaluating a store through a bitcast, then we need
2331          // to pull the bitcast off the pointer type and push it onto the
2332          // stored value.
2333          Ptr = CE->getOperand(0);
2334
2335          Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType();
2336
2337          // In order to push the bitcast onto the stored value, a bitcast
2338          // from NewTy to Val's type must be legal.  If it's not, we can try
2339          // introspecting NewTy to find a legal conversion.
2340          while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) {
2341            // If NewTy is a struct, we can convert the pointer to the struct
2342            // into a pointer to its first member.
2343            // FIXME: This could be extended to support arrays as well.
2344            if (StructType *STy = dyn_cast<StructType>(NewTy)) {
2345              NewTy = STy->getTypeAtIndex(0U);
2346
2347              IntegerType *IdxTy = IntegerType::get(NewTy->getContext(), 32);
2348              Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
2349              Constant * const IdxList[] = {IdxZero, IdxZero};
2350
2351              Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList);
2352              if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
2353                Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
2354
2355            // If we can't improve the situation by introspecting NewTy,
2356            // we have to give up.
2357            } else {
2358              DEBUG(dbgs() << "Failed to bitcast constant ptr, can not "
2359                    "evaluate.\n");
2360              return false;
2361            }
2362          }
2363
2364          // If we found compatible types, go ahead and push the bitcast
2365          // onto the stored value.
2366          Val = ConstantExpr::getBitCast(Val, NewTy);
2367
2368          DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n");
2369        }
2370      }
2371
2372      MutatedMemory[Ptr] = Val;
2373    } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
2374      InstResult = ConstantExpr::get(BO->getOpcode(),
2375                                     getVal(BO->getOperand(0)),
2376                                     getVal(BO->getOperand(1)));
2377      DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " << *InstResult
2378            << "\n");
2379    } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) {
2380      InstResult = ConstantExpr::getCompare(CI->getPredicate(),
2381                                            getVal(CI->getOperand(0)),
2382                                            getVal(CI->getOperand(1)));
2383      DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult
2384            << "\n");
2385    } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) {
2386      InstResult = ConstantExpr::getCast(CI->getOpcode(),
2387                                         getVal(CI->getOperand(0)),
2388                                         CI->getType());
2389      DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult
2390            << "\n");
2391    } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
2392      InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)),
2393                                           getVal(SI->getOperand(1)),
2394                                           getVal(SI->getOperand(2)));
2395      DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult
2396            << "\n");
2397    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
2398      Constant *P = getVal(GEP->getOperand(0));
2399      SmallVector<Constant*, 8> GEPOps;
2400      for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
2401           i != e; ++i)
2402        GEPOps.push_back(getVal(*i));
2403      InstResult =
2404        ConstantExpr::getGetElementPtr(P, GEPOps,
2405                                       cast<GEPOperator>(GEP)->isInBounds());
2406      DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult
2407            << "\n");
2408    } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
2409
2410      if (!LI->isSimple()) {
2411        DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n");
2412        return false;  // no volatile/atomic accesses.
2413      }
2414
2415      Constant *Ptr = getVal(LI->getOperand(0));
2416      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
2417        Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
2418        DEBUG(dbgs() << "Found a constant pointer expression, constant "
2419              "folding: " << *Ptr << "\n");
2420      }
2421      InstResult = ComputeLoadResult(Ptr);
2422      if (!InstResult) {
2423        DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load."
2424              "\n");
2425        return false; // Could not evaluate load.
2426      }
2427
2428      DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n");
2429    } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
2430      if (AI->isArrayAllocation()) {
2431        DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n");
2432        return false;  // Cannot handle array allocs.
2433      }
2434      Type *Ty = AI->getType()->getElementType();
2435      AllocaTmps.push_back(
2436          make_unique<GlobalVariable>(Ty, false, GlobalValue::InternalLinkage,
2437                                      UndefValue::get(Ty), AI->getName()));
2438      InstResult = AllocaTmps.back().get();
2439      DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");
2440    } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
2441      CallSite CS(CurInst);
2442
2443      // Debug info can safely be ignored here.
2444      if (isa<DbgInfoIntrinsic>(CS.getInstruction())) {
2445        DEBUG(dbgs() << "Ignoring debug info.\n");
2446        ++CurInst;
2447        continue;
2448      }
2449
2450      // Cannot handle inline asm.
2451      if (isa<InlineAsm>(CS.getCalledValue())) {
2452        DEBUG(dbgs() << "Found inline asm, can not evaluate.\n");
2453        return false;
2454      }
2455
2456      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
2457        if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) {
2458          if (MSI->isVolatile()) {
2459            DEBUG(dbgs() << "Can not optimize a volatile memset " <<
2460                  "intrinsic.\n");
2461            return false;
2462          }
2463          Constant *Ptr = getVal(MSI->getDest());
2464          Constant *Val = getVal(MSI->getValue());
2465          Constant *DestVal = ComputeLoadResult(getVal(Ptr));
2466          if (Val->isNullValue() && DestVal && DestVal->isNullValue()) {
2467            // This memset is a no-op.
2468            DEBUG(dbgs() << "Ignoring no-op memset.\n");
2469            ++CurInst;
2470            continue;
2471          }
2472        }
2473
2474        if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
2475            II->getIntrinsicID() == Intrinsic::lifetime_end) {
2476          DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n");
2477          ++CurInst;
2478          continue;
2479        }
2480
2481        if (II->getIntrinsicID() == Intrinsic::invariant_start) {
2482          // We don't insert an entry into Values, as it doesn't have a
2483          // meaningful return value.
2484          if (!II->use_empty()) {
2485            DEBUG(dbgs() << "Found unused invariant_start. Can't evaluate.\n");
2486            return false;
2487          }
2488          ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0));
2489          Value *PtrArg = getVal(II->getArgOperand(1));
2490          Value *Ptr = PtrArg->stripPointerCasts();
2491          if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
2492            Type *ElemTy = cast<PointerType>(GV->getType())->getElementType();
2493            if (DL && !Size->isAllOnesValue() &&
2494                Size->getValue().getLimitedValue() >=
2495                DL->getTypeStoreSize(ElemTy)) {
2496              Invariants.insert(GV);
2497              DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV
2498                    << "\n");
2499            } else {
2500              DEBUG(dbgs() << "Found a global var, but can not treat it as an "
2501                    "invariant.\n");
2502            }
2503          }
2504          // Continue even if we do nothing.
2505          ++CurInst;
2506          continue;
2507        }
2508
2509        DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n");
2510        return false;
2511      }
2512
2513      // Resolve function pointers.
2514      Function *Callee = dyn_cast<Function>(getVal(CS.getCalledValue()));
2515      if (!Callee || Callee->mayBeOverridden()) {
2516        DEBUG(dbgs() << "Can not resolve function pointer.\n");
2517        return false;  // Cannot resolve.
2518      }
2519
2520      SmallVector<Constant*, 8> Formals;
2521      for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i)
2522        Formals.push_back(getVal(*i));
2523
2524      if (Callee->isDeclaration()) {
2525        // If this is a function we can constant fold, do it.
2526        if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) {
2527          InstResult = C;
2528          DEBUG(dbgs() << "Constant folded function call. Result: " <<
2529                *InstResult << "\n");
2530        } else {
2531          DEBUG(dbgs() << "Can not constant fold function call.\n");
2532          return false;
2533        }
2534      } else {
2535        if (Callee->getFunctionType()->isVarArg()) {
2536          DEBUG(dbgs() << "Can not constant fold vararg function call.\n");
2537          return false;
2538        }
2539
2540        Constant *RetVal = nullptr;
2541        // Execute the call, if successful, use the return value.
2542        ValueStack.emplace_back();
2543        if (!EvaluateFunction(Callee, RetVal, Formals)) {
2544          DEBUG(dbgs() << "Failed to evaluate function.\n");
2545          return false;
2546        }
2547        ValueStack.pop_back();
2548        InstResult = RetVal;
2549
2550        if (InstResult) {
2551          DEBUG(dbgs() << "Successfully evaluated function. Result: " <<
2552                InstResult << "\n\n");
2553        } else {
2554          DEBUG(dbgs() << "Successfully evaluated function. Result: 0\n\n");
2555        }
2556      }
2557    } else if (isa<TerminatorInst>(CurInst)) {
2558      DEBUG(dbgs() << "Found a terminator instruction.\n");
2559
2560      if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
2561        if (BI->isUnconditional()) {
2562          NextBB = BI->getSuccessor(0);
2563        } else {
2564          ConstantInt *Cond =
2565            dyn_cast<ConstantInt>(getVal(BI->getCondition()));
2566          if (!Cond) return false;  // Cannot determine.
2567
2568          NextBB = BI->getSuccessor(!Cond->getZExtValue());
2569        }
2570      } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) {
2571        ConstantInt *Val =
2572          dyn_cast<ConstantInt>(getVal(SI->getCondition()));
2573        if (!Val) return false;  // Cannot determine.
2574        NextBB = SI->findCaseValue(Val).getCaseSuccessor();
2575      } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) {
2576        Value *Val = getVal(IBI->getAddress())->stripPointerCasts();
2577        if (BlockAddress *BA = dyn_cast<BlockAddress>(Val))
2578          NextBB = BA->getBasicBlock();
2579        else
2580          return false;  // Cannot determine.
2581      } else if (isa<ReturnInst>(CurInst)) {
2582        NextBB = nullptr;
2583      } else {
2584        // invoke, unwind, resume, unreachable.
2585        DEBUG(dbgs() << "Can not handle terminator.");
2586        return false;  // Cannot handle this terminator.
2587      }
2588
2589      // We succeeded at evaluating this block!
2590      DEBUG(dbgs() << "Successfully evaluated block.\n");
2591      return true;
2592    } else {
2593      // Did not know how to evaluate this!
2594      DEBUG(dbgs() << "Failed to evaluate block due to unhandled instruction."
2595            "\n");
2596      return false;
2597    }
2598
2599    if (!CurInst->use_empty()) {
2600      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
2601        InstResult = ConstantFoldConstantExpression(CE, DL, TLI);
2602
2603      setVal(CurInst, InstResult);
2604    }
2605
2606    // If we just processed an invoke, we finished evaluating the block.
2607    if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) {
2608      NextBB = II->getNormalDest();
2609      DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n");
2610      return true;
2611    }
2612
2613    // Advance program counter.
2614    ++CurInst;
2615  }
2616}
2617
2618/// EvaluateFunction - Evaluate a call to function F, returning true if
2619/// successful, false if we can't evaluate it.  ActualArgs contains the formal
2620/// arguments for the function.
2621bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
2622                                 const SmallVectorImpl<Constant*> &ActualArgs) {
2623  // Check to see if this function is already executing (recursion).  If so,
2624  // bail out.  TODO: we might want to accept limited recursion.
2625  if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
2626    return false;
2627
2628  CallStack.push_back(F);
2629
2630  // Initialize arguments to the incoming values specified.
2631  unsigned ArgNo = 0;
2632  for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
2633       ++AI, ++ArgNo)
2634    setVal(AI, ActualArgs[ArgNo]);
2635
2636  // ExecutedBlocks - We only handle non-looping, non-recursive code.  As such,
2637  // we can only evaluate any one basic block at most once.  This set keeps
2638  // track of what we have executed so we can detect recursive cases etc.
2639  SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
2640
2641  // CurBB - The current basic block we're evaluating.
2642  BasicBlock *CurBB = F->begin();
2643
2644  BasicBlock::iterator CurInst = CurBB->begin();
2645
2646  while (1) {
2647    BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings.
2648    DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n");
2649
2650    if (!EvaluateBlock(CurInst, NextBB))
2651      return false;
2652
2653    if (!NextBB) {
2654      // Successfully running until there's no next block means that we found
2655      // the return.  Fill it the return value and pop the call stack.
2656      ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator());
2657      if (RI->getNumOperands())
2658        RetVal = getVal(RI->getOperand(0));
2659      CallStack.pop_back();
2660      return true;
2661    }
2662
2663    // Okay, we succeeded in evaluating this control flow.  See if we have
2664    // executed the new block before.  If so, we have a looping function,
2665    // which we cannot evaluate in reasonable time.
2666    if (!ExecutedBlocks.insert(NextBB))
2667      return false;  // looped!
2668
2669    // Okay, we have never been in this block before.  Check to see if there
2670    // are any PHI nodes.  If so, evaluate them with information about where
2671    // we came from.
2672    PHINode *PN = nullptr;
2673    for (CurInst = NextBB->begin();
2674         (PN = dyn_cast<PHINode>(CurInst)); ++CurInst)
2675      setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB)));
2676
2677    // Advance to the next block.
2678    CurBB = NextBB;
2679  }
2680}
2681
2682/// EvaluateStaticConstructor - Evaluate static constructors in the function, if
2683/// we can.  Return true if we can, false otherwise.
2684static bool EvaluateStaticConstructor(Function *F, const DataLayout *DL,
2685                                      const TargetLibraryInfo *TLI) {
2686  // Call the function.
2687  Evaluator Eval(DL, TLI);
2688  Constant *RetValDummy;
2689  bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy,
2690                                           SmallVector<Constant*, 0>());
2691
2692  if (EvalSuccess) {
2693    ++NumCtorsEvaluated;
2694
2695    // We succeeded at evaluation: commit the result.
2696    DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
2697          << F->getName() << "' to " << Eval.getMutatedMemory().size()
2698          << " stores.\n");
2699    for (DenseMap<Constant*, Constant*>::const_iterator I =
2700           Eval.getMutatedMemory().begin(), E = Eval.getMutatedMemory().end();
2701         I != E; ++I)
2702      CommitValueTo(I->second, I->first);
2703    for (SmallPtrSet<GlobalVariable*, 8>::const_iterator I =
2704           Eval.getInvariants().begin(), E = Eval.getInvariants().end();
2705         I != E; ++I)
2706      (*I)->setConstant(true);
2707  }
2708
2709  return EvalSuccess;
2710}
2711
2712static int compareNames(Constant *const *A, Constant *const *B) {
2713  return (*A)->getName().compare((*B)->getName());
2714}
2715
2716static void setUsedInitializer(GlobalVariable &V,
2717                               SmallPtrSet<GlobalValue *, 8> Init) {
2718  if (Init.empty()) {
2719    V.eraseFromParent();
2720    return;
2721  }
2722
2723  // Type of pointer to the array of pointers.
2724  PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext(), 0);
2725
2726  SmallVector<llvm::Constant *, 8> UsedArray;
2727  for (SmallPtrSet<GlobalValue *, 8>::iterator I = Init.begin(), E = Init.end();
2728       I != E; ++I) {
2729    Constant *Cast
2730      = ConstantExpr::getPointerBitCastOrAddrSpaceCast(*I, Int8PtrTy);
2731    UsedArray.push_back(Cast);
2732  }
2733  // Sort to get deterministic order.
2734  array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames);
2735  ArrayType *ATy = ArrayType::get(Int8PtrTy, UsedArray.size());
2736
2737  Module *M = V.getParent();
2738  V.removeFromParent();
2739  GlobalVariable *NV =
2740      new GlobalVariable(*M, ATy, false, llvm::GlobalValue::AppendingLinkage,
2741                         llvm::ConstantArray::get(ATy, UsedArray), "");
2742  NV->takeName(&V);
2743  NV->setSection("llvm.metadata");
2744  delete &V;
2745}
2746
2747namespace {
2748/// \brief An easy to access representation of llvm.used and llvm.compiler.used.
2749class LLVMUsed {
2750  SmallPtrSet<GlobalValue *, 8> Used;
2751  SmallPtrSet<GlobalValue *, 8> CompilerUsed;
2752  GlobalVariable *UsedV;
2753  GlobalVariable *CompilerUsedV;
2754
2755public:
2756  LLVMUsed(Module &M) {
2757    UsedV = collectUsedGlobalVariables(M, Used, false);
2758    CompilerUsedV = collectUsedGlobalVariables(M, CompilerUsed, true);
2759  }
2760  typedef SmallPtrSet<GlobalValue *, 8>::iterator iterator;
2761  iterator usedBegin() { return Used.begin(); }
2762  iterator usedEnd() { return Used.end(); }
2763  iterator compilerUsedBegin() { return CompilerUsed.begin(); }
2764  iterator compilerUsedEnd() { return CompilerUsed.end(); }
2765  bool usedCount(GlobalValue *GV) const { return Used.count(GV); }
2766  bool compilerUsedCount(GlobalValue *GV) const {
2767    return CompilerUsed.count(GV);
2768  }
2769  bool usedErase(GlobalValue *GV) { return Used.erase(GV); }
2770  bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); }
2771  bool usedInsert(GlobalValue *GV) { return Used.insert(GV); }
2772  bool compilerUsedInsert(GlobalValue *GV) { return CompilerUsed.insert(GV); }
2773
2774  void syncVariablesAndSets() {
2775    if (UsedV)
2776      setUsedInitializer(*UsedV, Used);
2777    if (CompilerUsedV)
2778      setUsedInitializer(*CompilerUsedV, CompilerUsed);
2779  }
2780};
2781}
2782
2783static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) {
2784  if (GA.use_empty()) // No use at all.
2785    return false;
2786
2787  assert((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) &&
2788         "We should have removed the duplicated "
2789         "element from llvm.compiler.used");
2790  if (!GA.hasOneUse())
2791    // Strictly more than one use. So at least one is not in llvm.used and
2792    // llvm.compiler.used.
2793    return true;
2794
2795  // Exactly one use. Check if it is in llvm.used or llvm.compiler.used.
2796  return !U.usedCount(&GA) && !U.compilerUsedCount(&GA);
2797}
2798
2799static bool hasMoreThanOneUseOtherThanLLVMUsed(GlobalValue &V,
2800                                               const LLVMUsed &U) {
2801  unsigned N = 2;
2802  assert((!U.usedCount(&V) || !U.compilerUsedCount(&V)) &&
2803         "We should have removed the duplicated "
2804         "element from llvm.compiler.used");
2805  if (U.usedCount(&V) || U.compilerUsedCount(&V))
2806    ++N;
2807  return V.hasNUsesOrMore(N);
2808}
2809
2810static bool mayHaveOtherReferences(GlobalAlias &GA, const LLVMUsed &U) {
2811  if (!GA.hasLocalLinkage())
2812    return true;
2813
2814  return U.usedCount(&GA) || U.compilerUsedCount(&GA);
2815}
2816
2817static bool hasUsesToReplace(GlobalAlias &GA, LLVMUsed &U, bool &RenameTarget) {
2818  RenameTarget = false;
2819  bool Ret = false;
2820  if (hasUseOtherThanLLVMUsed(GA, U))
2821    Ret = true;
2822
2823  // If the alias is externally visible, we may still be able to simplify it.
2824  if (!mayHaveOtherReferences(GA, U))
2825    return Ret;
2826
2827  // If the aliasee has internal linkage, give it the name and linkage
2828  // of the alias, and delete the alias.  This turns:
2829  //   define internal ... @f(...)
2830  //   @a = alias ... @f
2831  // into:
2832  //   define ... @a(...)
2833  Constant *Aliasee = GA.getAliasee();
2834  GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
2835  if (!Target->hasLocalLinkage())
2836    return Ret;
2837
2838  // Do not perform the transform if multiple aliases potentially target the
2839  // aliasee. This check also ensures that it is safe to replace the section
2840  // and other attributes of the aliasee with those of the alias.
2841  if (hasMoreThanOneUseOtherThanLLVMUsed(*Target, U))
2842    return Ret;
2843
2844  RenameTarget = true;
2845  return true;
2846}
2847
2848bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
2849  bool Changed = false;
2850  LLVMUsed Used(M);
2851
2852  for (SmallPtrSet<GlobalValue *, 8>::iterator I = Used.usedBegin(),
2853                                               E = Used.usedEnd();
2854       I != E; ++I)
2855    Used.compilerUsedErase(*I);
2856
2857  for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
2858       I != E;) {
2859    Module::alias_iterator J = I++;
2860    // Aliases without names cannot be referenced outside this module.
2861    if (!J->hasName() && !J->isDeclaration() && !J->hasLocalLinkage())
2862      J->setLinkage(GlobalValue::InternalLinkage);
2863    // If the aliasee may change at link time, nothing can be done - bail out.
2864    if (J->mayBeOverridden())
2865      continue;
2866
2867    Constant *Aliasee = J->getAliasee();
2868    GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts());
2869    // We can't trivially replace the alias with the aliasee if the aliasee is
2870    // non-trivial in some way.
2871    // TODO: Try to handle non-zero GEPs of local aliasees.
2872    if (!Target)
2873      continue;
2874    Target->removeDeadConstantUsers();
2875
2876    // Make all users of the alias use the aliasee instead.
2877    bool RenameTarget;
2878    if (!hasUsesToReplace(*J, Used, RenameTarget))
2879      continue;
2880
2881    J->replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J->getType()));
2882    ++NumAliasesResolved;
2883    Changed = true;
2884
2885    if (RenameTarget) {
2886      // Give the aliasee the name, linkage and other attributes of the alias.
2887      Target->takeName(J);
2888      Target->setLinkage(J->getLinkage());
2889      Target->setVisibility(J->getVisibility());
2890      Target->setDLLStorageClass(J->getDLLStorageClass());
2891
2892      if (Used.usedErase(J))
2893        Used.usedInsert(Target);
2894
2895      if (Used.compilerUsedErase(J))
2896        Used.compilerUsedInsert(Target);
2897    } else if (mayHaveOtherReferences(*J, Used))
2898      continue;
2899
2900    // Delete the alias.
2901    M.getAliasList().erase(J);
2902    ++NumAliasesRemoved;
2903    Changed = true;
2904  }
2905
2906  Used.syncVariablesAndSets();
2907
2908  return Changed;
2909}
2910
2911static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
2912  if (!TLI->has(LibFunc::cxa_atexit))
2913    return nullptr;
2914
2915  Function *Fn = M.getFunction(TLI->getName(LibFunc::cxa_atexit));
2916
2917  if (!Fn)
2918    return nullptr;
2919
2920  FunctionType *FTy = Fn->getFunctionType();
2921
2922  // Checking that the function has the right return type, the right number of
2923  // parameters and that they all have pointer types should be enough.
2924  if (!FTy->getReturnType()->isIntegerTy() ||
2925      FTy->getNumParams() != 3 ||
2926      !FTy->getParamType(0)->isPointerTy() ||
2927      !FTy->getParamType(1)->isPointerTy() ||
2928      !FTy->getParamType(2)->isPointerTy())
2929    return nullptr;
2930
2931  return Fn;
2932}
2933
2934/// cxxDtorIsEmpty - Returns whether the given function is an empty C++
2935/// destructor and can therefore be eliminated.
2936/// Note that we assume that other optimization passes have already simplified
2937/// the code so we only look for a function with a single basic block, where
2938/// the only allowed instructions are 'ret', 'call' to an empty C++ dtor and
2939/// other side-effect free instructions.
2940static bool cxxDtorIsEmpty(const Function &Fn,
2941                           SmallPtrSet<const Function *, 8> &CalledFunctions) {
2942  // FIXME: We could eliminate C++ destructors if they're readonly/readnone and
2943  // nounwind, but that doesn't seem worth doing.
2944  if (Fn.isDeclaration())
2945    return false;
2946
2947  if (++Fn.begin() != Fn.end())
2948    return false;
2949
2950  const BasicBlock &EntryBlock = Fn.getEntryBlock();
2951  for (BasicBlock::const_iterator I = EntryBlock.begin(), E = EntryBlock.end();
2952       I != E; ++I) {
2953    if (const CallInst *CI = dyn_cast<CallInst>(I)) {
2954      // Ignore debug intrinsics.
2955      if (isa<DbgInfoIntrinsic>(CI))
2956        continue;
2957
2958      const Function *CalledFn = CI->getCalledFunction();
2959
2960      if (!CalledFn)
2961        return false;
2962
2963      SmallPtrSet<const Function *, 8> NewCalledFunctions(CalledFunctions);
2964
2965      // Don't treat recursive functions as empty.
2966      if (!NewCalledFunctions.insert(CalledFn))
2967        return false;
2968
2969      if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions))
2970        return false;
2971    } else if (isa<ReturnInst>(*I))
2972      return true; // We're done.
2973    else if (I->mayHaveSideEffects())
2974      return false; // Destructor with side effects, bail.
2975  }
2976
2977  return false;
2978}
2979
2980bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
2981  /// Itanium C++ ABI p3.3.5:
2982  ///
2983  ///   After constructing a global (or local static) object, that will require
2984  ///   destruction on exit, a termination function is registered as follows:
2985  ///
2986  ///   extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d );
2987  ///
2988  ///   This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the
2989  ///   call f(p) when DSO d is unloaded, before all such termination calls
2990  ///   registered before this one. It returns zero if registration is
2991  ///   successful, nonzero on failure.
2992
2993  // This pass will look for calls to __cxa_atexit where the function is trivial
2994  // and remove them.
2995  bool Changed = false;
2996
2997  for (auto I = CXAAtExitFn->user_begin(), E = CXAAtExitFn->user_end();
2998       I != E;) {
2999    // We're only interested in calls. Theoretically, we could handle invoke
3000    // instructions as well, but neither llvm-gcc nor clang generate invokes
3001    // to __cxa_atexit.
3002    CallInst *CI = dyn_cast<CallInst>(*I++);
3003    if (!CI)
3004      continue;
3005
3006    Function *DtorFn =
3007      dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
3008    if (!DtorFn)
3009      continue;
3010
3011    SmallPtrSet<const Function *, 8> CalledFunctions;
3012    if (!cxxDtorIsEmpty(*DtorFn, CalledFunctions))
3013      continue;
3014
3015    // Just remove the call.
3016    CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
3017    CI->eraseFromParent();
3018
3019    ++NumCXXDtorsRemoved;
3020
3021    Changed |= true;
3022  }
3023
3024  return Changed;
3025}
3026
3027bool GlobalOpt::runOnModule(Module &M) {
3028  bool Changed = false;
3029
3030  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
3031  DL = DLP ? &DLP->getDataLayout() : nullptr;
3032  TLI = &getAnalysis<TargetLibraryInfo>();
3033
3034  bool LocalChange = true;
3035  while (LocalChange) {
3036    LocalChange = false;
3037
3038    // Delete functions that are trivially dead, ccc -> fastcc
3039    LocalChange |= OptimizeFunctions(M);
3040
3041    // Optimize global_ctors list.
3042    LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) {
3043      return EvaluateStaticConstructor(F, DL, TLI);
3044    });
3045
3046    // Optimize non-address-taken globals.
3047    LocalChange |= OptimizeGlobalVars(M);
3048
3049    // Resolve aliases, when possible.
3050    LocalChange |= OptimizeGlobalAliases(M);
3051
3052    // Try to remove trivial global destructors if they are not removed
3053    // already.
3054    Function *CXAAtExitFn = FindCXAAtExit(M, TLI);
3055    if (CXAAtExitFn)
3056      LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
3057
3058    Changed |= LocalChange;
3059  }
3060
3061  // TODO: Move all global ctors functions to the end of the module for code
3062  // layout.
3063
3064  return Changed;
3065}
3066