1//===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file promotes memory references to be register references.  It promotes
11// alloca instructions which only have loads and stores as uses.  An alloca is
12// transformed by using iterated dominator frontiers to place PHI nodes, then
13// traversing the function in depth-first order to rewrite loads and stores as
14// appropriate.
15//
16// The algorithm used here is based on:
17//
18//   Sreedhar and Gao. A linear time algorithm for placing phi-nodes.
19//   In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of
20//   Programming Languages
21//   POPL '95. ACM, New York, NY, 62-73.
22//
23// It has been modified to not explicitly use the DJ graph data structure and to
24// directly compute pruned SSA using per-variable liveness information.
25//
26//===----------------------------------------------------------------------===//
27
28#include "llvm/Transforms/Utils/PromoteMemToReg.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/SmallPtrSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/Analysis/AliasSetTracker.h"
36#include "llvm/Analysis/InstructionSimplify.h"
37#include "llvm/Analysis/ValueTracking.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constants.h"
40#include "llvm/IR/DIBuilder.h"
41#include "llvm/IR/DebugInfo.h"
42#include "llvm/IR/DerivedTypes.h"
43#include "llvm/IR/Dominators.h"
44#include "llvm/IR/Function.h"
45#include "llvm/IR/Instructions.h"
46#include "llvm/IR/IntrinsicInst.h"
47#include "llvm/IR/Metadata.h"
48#include "llvm/IR/Module.h"
49#include "llvm/Transforms/Utils/Local.h"
50#include <algorithm>
51#include <queue>
52using namespace llvm;
53
54#define DEBUG_TYPE "mem2reg"
55
56STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block");
57STATISTIC(NumSingleStore,   "Number of alloca's promoted with a single store");
58STATISTIC(NumDeadAlloca,    "Number of dead alloca's removed");
59STATISTIC(NumPHIInsert,     "Number of PHI nodes inserted");
60
61bool llvm::isAllocaPromotable(const AllocaInst *AI) {
62  // FIXME: If the memory unit is of pointer or integer type, we can permit
63  // assignments to subsections of the memory unit.
64  unsigned AS = AI->getType()->getAddressSpace();
65
66  // Only allow direct and non-volatile loads and stores...
67  for (const User *U : AI->users()) {
68    if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
69      // Note that atomic loads can be transformed; atomic semantics do
70      // not have any meaning for a local alloca.
71      if (LI->isVolatile())
72        return false;
73    } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
74      if (SI->getOperand(0) == AI)
75        return false; // Don't allow a store OF the AI, only INTO the AI.
76      // Note that atomic stores can be transformed; atomic semantics do
77      // not have any meaning for a local alloca.
78      if (SI->isVolatile())
79        return false;
80    } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
81      if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
82          II->getIntrinsicID() != Intrinsic::lifetime_end)
83        return false;
84    } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
85      if (BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS))
86        return false;
87      if (!onlyUsedByLifetimeMarkers(BCI))
88        return false;
89    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
90      if (GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS))
91        return false;
92      if (!GEPI->hasAllZeroIndices())
93        return false;
94      if (!onlyUsedByLifetimeMarkers(GEPI))
95        return false;
96    } else {
97      return false;
98    }
99  }
100
101  return true;
102}
103
104namespace {
105
106struct AllocaInfo {
107  SmallVector<BasicBlock *, 32> DefiningBlocks;
108  SmallVector<BasicBlock *, 32> UsingBlocks;
109
110  StoreInst *OnlyStore;
111  BasicBlock *OnlyBlock;
112  bool OnlyUsedInOneBlock;
113
114  Value *AllocaPointerVal;
115  DbgDeclareInst *DbgDeclare;
116
117  void clear() {
118    DefiningBlocks.clear();
119    UsingBlocks.clear();
120    OnlyStore = nullptr;
121    OnlyBlock = nullptr;
122    OnlyUsedInOneBlock = true;
123    AllocaPointerVal = nullptr;
124    DbgDeclare = nullptr;
125  }
126
127  /// Scan the uses of the specified alloca, filling in the AllocaInfo used
128  /// by the rest of the pass to reason about the uses of this alloca.
129  void AnalyzeAlloca(AllocaInst *AI) {
130    clear();
131
132    // As we scan the uses of the alloca instruction, keep track of stores,
133    // and decide whether all of the loads and stores to the alloca are within
134    // the same basic block.
135    for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
136      Instruction *User = cast<Instruction>(*UI++);
137
138      if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
139        // Remember the basic blocks which define new values for the alloca
140        DefiningBlocks.push_back(SI->getParent());
141        AllocaPointerVal = SI->getOperand(0);
142        OnlyStore = SI;
143      } else {
144        LoadInst *LI = cast<LoadInst>(User);
145        // Otherwise it must be a load instruction, keep track of variable
146        // reads.
147        UsingBlocks.push_back(LI->getParent());
148        AllocaPointerVal = LI;
149      }
150
151      if (OnlyUsedInOneBlock) {
152        if (!OnlyBlock)
153          OnlyBlock = User->getParent();
154        else if (OnlyBlock != User->getParent())
155          OnlyUsedInOneBlock = false;
156      }
157    }
158
159    DbgDeclare = FindAllocaDbgDeclare(AI);
160  }
161};
162
163// Data package used by RenamePass()
164class RenamePassData {
165public:
166  typedef std::vector<Value *> ValVector;
167
168  RenamePassData() : BB(nullptr), Pred(nullptr), Values() {}
169  RenamePassData(BasicBlock *B, BasicBlock *P, const ValVector &V)
170      : BB(B), Pred(P), Values(V) {}
171  BasicBlock *BB;
172  BasicBlock *Pred;
173  ValVector Values;
174
175  void swap(RenamePassData &RHS) {
176    std::swap(BB, RHS.BB);
177    std::swap(Pred, RHS.Pred);
178    Values.swap(RHS.Values);
179  }
180};
181
182/// \brief This assigns and keeps a per-bb relative ordering of load/store
183/// instructions in the block that directly load or store an alloca.
184///
185/// This functionality is important because it avoids scanning large basic
186/// blocks multiple times when promoting many allocas in the same block.
187class LargeBlockInfo {
188  /// \brief For each instruction that we track, keep the index of the
189  /// instruction.
190  ///
191  /// The index starts out as the number of the instruction from the start of
192  /// the block.
193  DenseMap<const Instruction *, unsigned> InstNumbers;
194
195public:
196
197  /// This code only looks at accesses to allocas.
198  static bool isInterestingInstruction(const Instruction *I) {
199    return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
200           (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
201  }
202
203  /// Get or calculate the index of the specified instruction.
204  unsigned getInstructionIndex(const Instruction *I) {
205    assert(isInterestingInstruction(I) &&
206           "Not a load/store to/from an alloca?");
207
208    // If we already have this instruction number, return it.
209    DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I);
210    if (It != InstNumbers.end())
211      return It->second;
212
213    // Scan the whole block to get the instruction.  This accumulates
214    // information for every interesting instruction in the block, in order to
215    // avoid gratuitus rescans.
216    const BasicBlock *BB = I->getParent();
217    unsigned InstNo = 0;
218    for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); BBI != E;
219         ++BBI)
220      if (isInterestingInstruction(BBI))
221        InstNumbers[BBI] = InstNo++;
222    It = InstNumbers.find(I);
223
224    assert(It != InstNumbers.end() && "Didn't insert instruction?");
225    return It->second;
226  }
227
228  void deleteValue(const Instruction *I) { InstNumbers.erase(I); }
229
230  void clear() { InstNumbers.clear(); }
231};
232
233struct PromoteMem2Reg {
234  /// The alloca instructions being promoted.
235  std::vector<AllocaInst *> Allocas;
236  DominatorTree &DT;
237  DIBuilder DIB;
238
239  /// An AliasSetTracker object to update.  If null, don't update it.
240  AliasSetTracker *AST;
241
242  /// A cache of @llvm.assume intrinsics used by SimplifyInstruction.
243  AssumptionCache *AC;
244
245  /// Reverse mapping of Allocas.
246  DenseMap<AllocaInst *, unsigned> AllocaLookup;
247
248  /// \brief The PhiNodes we're adding.
249  ///
250  /// That map is used to simplify some Phi nodes as we iterate over it, so
251  /// it should have deterministic iterators.  We could use a MapVector, but
252  /// since we already maintain a map from BasicBlock* to a stable numbering
253  /// (BBNumbers), the DenseMap is more efficient (also supports removal).
254  DenseMap<std::pair<unsigned, unsigned>, PHINode *> NewPhiNodes;
255
256  /// For each PHI node, keep track of which entry in Allocas it corresponds
257  /// to.
258  DenseMap<PHINode *, unsigned> PhiToAllocaMap;
259
260  /// If we are updating an AliasSetTracker, then for each alloca that is of
261  /// pointer type, we keep track of what to copyValue to the inserted PHI
262  /// nodes here.
263  std::vector<Value *> PointerAllocaValues;
264
265  /// For each alloca, we keep track of the dbg.declare intrinsic that
266  /// describes it, if any, so that we can convert it to a dbg.value
267  /// intrinsic if the alloca gets promoted.
268  SmallVector<DbgDeclareInst *, 8> AllocaDbgDeclares;
269
270  /// The set of basic blocks the renamer has already visited.
271  ///
272  SmallPtrSet<BasicBlock *, 16> Visited;
273
274  /// Contains a stable numbering of basic blocks to avoid non-determinstic
275  /// behavior.
276  DenseMap<BasicBlock *, unsigned> BBNumbers;
277
278  /// Maps DomTreeNodes to their level in the dominator tree.
279  DenseMap<DomTreeNode *, unsigned> DomLevels;
280
281  /// Lazily compute the number of predecessors a block has.
282  DenseMap<const BasicBlock *, unsigned> BBNumPreds;
283
284public:
285  PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
286                 AliasSetTracker *AST, AssumptionCache *AC)
287      : Allocas(Allocas.begin(), Allocas.end()), DT(DT),
288        DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false),
289        AST(AST), AC(AC) {}
290
291  void run();
292
293private:
294  void RemoveFromAllocasList(unsigned &AllocaIdx) {
295    Allocas[AllocaIdx] = Allocas.back();
296    Allocas.pop_back();
297    --AllocaIdx;
298  }
299
300  unsigned getNumPreds(const BasicBlock *BB) {
301    unsigned &NP = BBNumPreds[BB];
302    if (NP == 0)
303      NP = std::distance(pred_begin(BB), pred_end(BB)) + 1;
304    return NP - 1;
305  }
306
307  void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
308                               AllocaInfo &Info);
309  void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
310                           const SmallPtrSetImpl<BasicBlock *> &DefBlocks,
311                           SmallPtrSetImpl<BasicBlock *> &LiveInBlocks);
312  void RenamePass(BasicBlock *BB, BasicBlock *Pred,
313                  RenamePassData::ValVector &IncVals,
314                  std::vector<RenamePassData> &Worklist);
315  bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
316};
317
318} // end of anonymous namespace
319
320static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
321  // Knowing that this alloca is promotable, we know that it's safe to kill all
322  // instructions except for load and store.
323
324  for (auto UI = AI->user_begin(), UE = AI->user_end(); UI != UE;) {
325    Instruction *I = cast<Instruction>(*UI);
326    ++UI;
327    if (isa<LoadInst>(I) || isa<StoreInst>(I))
328      continue;
329
330    if (!I->getType()->isVoidTy()) {
331      // The only users of this bitcast/GEP instruction are lifetime intrinsics.
332      // Follow the use/def chain to erase them now instead of leaving it for
333      // dead code elimination later.
334      for (auto UUI = I->user_begin(), UUE = I->user_end(); UUI != UUE;) {
335        Instruction *Inst = cast<Instruction>(*UUI);
336        ++UUI;
337        Inst->eraseFromParent();
338      }
339    }
340    I->eraseFromParent();
341  }
342}
343
344/// \brief Rewrite as many loads as possible given a single store.
345///
346/// When there is only a single store, we can use the domtree to trivially
347/// replace all of the dominated loads with the stored value. Do so, and return
348/// true if this has successfully promoted the alloca entirely. If this returns
349/// false there were some loads which were not dominated by the single store
350/// and thus must be phi-ed with undef. We fall back to the standard alloca
351/// promotion algorithm in that case.
352static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
353                                     LargeBlockInfo &LBI,
354                                     DominatorTree &DT,
355                                     AliasSetTracker *AST) {
356  StoreInst *OnlyStore = Info.OnlyStore;
357  bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
358  BasicBlock *StoreBB = OnlyStore->getParent();
359  int StoreIndex = -1;
360
361  // Clear out UsingBlocks.  We will reconstruct it here if needed.
362  Info.UsingBlocks.clear();
363
364  for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
365    Instruction *UserInst = cast<Instruction>(*UI++);
366    if (!isa<LoadInst>(UserInst)) {
367      assert(UserInst == OnlyStore && "Should only have load/stores");
368      continue;
369    }
370    LoadInst *LI = cast<LoadInst>(UserInst);
371
372    // Okay, if we have a load from the alloca, we want to replace it with the
373    // only value stored to the alloca.  We can do this if the value is
374    // dominated by the store.  If not, we use the rest of the mem2reg machinery
375    // to insert the phi nodes as needed.
376    if (!StoringGlobalVal) { // Non-instructions are always dominated.
377      if (LI->getParent() == StoreBB) {
378        // If we have a use that is in the same block as the store, compare the
379        // indices of the two instructions to see which one came first.  If the
380        // load came before the store, we can't handle it.
381        if (StoreIndex == -1)
382          StoreIndex = LBI.getInstructionIndex(OnlyStore);
383
384        if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
385          // Can't handle this load, bail out.
386          Info.UsingBlocks.push_back(StoreBB);
387          continue;
388        }
389
390      } else if (LI->getParent() != StoreBB &&
391                 !DT.dominates(StoreBB, LI->getParent())) {
392        // If the load and store are in different blocks, use BB dominance to
393        // check their relationships.  If the store doesn't dom the use, bail
394        // out.
395        Info.UsingBlocks.push_back(LI->getParent());
396        continue;
397      }
398    }
399
400    // Otherwise, we *can* safely rewrite this load.
401    Value *ReplVal = OnlyStore->getOperand(0);
402    // If the replacement value is the load, this must occur in unreachable
403    // code.
404    if (ReplVal == LI)
405      ReplVal = UndefValue::get(LI->getType());
406    LI->replaceAllUsesWith(ReplVal);
407    if (AST && LI->getType()->isPointerTy())
408      AST->deleteValue(LI);
409    LI->eraseFromParent();
410    LBI.deleteValue(LI);
411  }
412
413  // Finally, after the scan, check to see if the store is all that is left.
414  if (!Info.UsingBlocks.empty())
415    return false; // If not, we'll have to fall back for the remainder.
416
417  // Record debuginfo for the store and remove the declaration's
418  // debuginfo.
419  if (DbgDeclareInst *DDI = Info.DbgDeclare) {
420    DIBuilder DIB(*AI->getParent()->getParent()->getParent(),
421                  /*AllowUnresolved*/ false);
422    ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB);
423    DDI->eraseFromParent();
424    LBI.deleteValue(DDI);
425  }
426  // Remove the (now dead) store and alloca.
427  Info.OnlyStore->eraseFromParent();
428  LBI.deleteValue(Info.OnlyStore);
429
430  if (AST)
431    AST->deleteValue(AI);
432  AI->eraseFromParent();
433  LBI.deleteValue(AI);
434  return true;
435}
436
437/// Many allocas are only used within a single basic block.  If this is the
438/// case, avoid traversing the CFG and inserting a lot of potentially useless
439/// PHI nodes by just performing a single linear pass over the basic block
440/// using the Alloca.
441///
442/// If we cannot promote this alloca (because it is read before it is written),
443/// return true.  This is necessary in cases where, due to control flow, the
444/// alloca is potentially undefined on some control flow paths.  e.g. code like
445/// this is potentially correct:
446///
447///   for (...) { if (c) { A = undef; undef = B; } }
448///
449/// ... so long as A is not used before undef is set.
450static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
451                                     LargeBlockInfo &LBI,
452                                     AliasSetTracker *AST) {
453  // The trickiest case to handle is when we have large blocks. Because of this,
454  // this code is optimized assuming that large blocks happen.  This does not
455  // significantly pessimize the small block case.  This uses LargeBlockInfo to
456  // make it efficient to get the index of various operations in the block.
457
458  // Walk the use-def list of the alloca, getting the locations of all stores.
459  typedef SmallVector<std::pair<unsigned, StoreInst *>, 64> StoresByIndexTy;
460  StoresByIndexTy StoresByIndex;
461
462  for (User *U : AI->users())
463    if (StoreInst *SI = dyn_cast<StoreInst>(U))
464      StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));
465
466  // Sort the stores by their index, making it efficient to do a lookup with a
467  // binary search.
468  std::sort(StoresByIndex.begin(), StoresByIndex.end(), less_first());
469
470  // Walk all of the loads from this alloca, replacing them with the nearest
471  // store above them, if any.
472  for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
473    LoadInst *LI = dyn_cast<LoadInst>(*UI++);
474    if (!LI)
475      continue;
476
477    unsigned LoadIdx = LBI.getInstructionIndex(LI);
478
479    // Find the nearest store that has a lower index than this load.
480    StoresByIndexTy::iterator I =
481        std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
482                         std::make_pair(LoadIdx,
483                                        static_cast<StoreInst *>(nullptr)),
484                         less_first());
485
486    if (I == StoresByIndex.begin())
487      // If there is no store before this load, the load takes the undef value.
488      LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
489    else
490      // Otherwise, there was a store before this load, the load takes its value.
491      LI->replaceAllUsesWith(std::prev(I)->second->getOperand(0));
492
493    if (AST && LI->getType()->isPointerTy())
494      AST->deleteValue(LI);
495    LI->eraseFromParent();
496    LBI.deleteValue(LI);
497  }
498
499  // Remove the (now dead) stores and alloca.
500  while (!AI->use_empty()) {
501    StoreInst *SI = cast<StoreInst>(AI->user_back());
502    // Record debuginfo for the store before removing it.
503    if (DbgDeclareInst *DDI = Info.DbgDeclare) {
504      DIBuilder DIB(*AI->getParent()->getParent()->getParent(),
505                    /*AllowUnresolved*/ false);
506      ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
507    }
508    SI->eraseFromParent();
509    LBI.deleteValue(SI);
510  }
511
512  if (AST)
513    AST->deleteValue(AI);
514  AI->eraseFromParent();
515  LBI.deleteValue(AI);
516
517  // The alloca's debuginfo can be removed as well.
518  if (DbgDeclareInst *DDI = Info.DbgDeclare) {
519    DDI->eraseFromParent();
520    LBI.deleteValue(DDI);
521  }
522
523  ++NumLocalPromoted;
524}
525
526void PromoteMem2Reg::run() {
527  Function &F = *DT.getRoot()->getParent();
528
529  if (AST)
530    PointerAllocaValues.resize(Allocas.size());
531  AllocaDbgDeclares.resize(Allocas.size());
532
533  AllocaInfo Info;
534  LargeBlockInfo LBI;
535
536  for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
537    AllocaInst *AI = Allocas[AllocaNum];
538
539    assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!");
540    assert(AI->getParent()->getParent() == &F &&
541           "All allocas should be in the same function, which is same as DF!");
542
543    removeLifetimeIntrinsicUsers(AI);
544
545    if (AI->use_empty()) {
546      // If there are no uses of the alloca, just delete it now.
547      if (AST)
548        AST->deleteValue(AI);
549      AI->eraseFromParent();
550
551      // Remove the alloca from the Allocas list, since it has been processed
552      RemoveFromAllocasList(AllocaNum);
553      ++NumDeadAlloca;
554      continue;
555    }
556
557    // Calculate the set of read and write-locations for each alloca.  This is
558    // analogous to finding the 'uses' and 'definitions' of each variable.
559    Info.AnalyzeAlloca(AI);
560
561    // If there is only a single store to this value, replace any loads of
562    // it that are directly dominated by the definition with the value stored.
563    if (Info.DefiningBlocks.size() == 1) {
564      if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) {
565        // The alloca has been processed, move on.
566        RemoveFromAllocasList(AllocaNum);
567        ++NumSingleStore;
568        continue;
569      }
570    }
571
572    // If the alloca is only read and written in one basic block, just perform a
573    // linear sweep over the block to eliminate it.
574    if (Info.OnlyUsedInOneBlock) {
575      promoteSingleBlockAlloca(AI, Info, LBI, AST);
576
577      // The alloca has been processed, move on.
578      RemoveFromAllocasList(AllocaNum);
579      continue;
580    }
581
582    // If we haven't computed dominator tree levels, do so now.
583    if (DomLevels.empty()) {
584      SmallVector<DomTreeNode *, 32> Worklist;
585
586      DomTreeNode *Root = DT.getRootNode();
587      DomLevels[Root] = 0;
588      Worklist.push_back(Root);
589
590      while (!Worklist.empty()) {
591        DomTreeNode *Node = Worklist.pop_back_val();
592        unsigned ChildLevel = DomLevels[Node] + 1;
593        for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end();
594             CI != CE; ++CI) {
595          DomLevels[*CI] = ChildLevel;
596          Worklist.push_back(*CI);
597        }
598      }
599    }
600
601    // If we haven't computed a numbering for the BB's in the function, do so
602    // now.
603    if (BBNumbers.empty()) {
604      unsigned ID = 0;
605      for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
606        BBNumbers[I] = ID++;
607    }
608
609    // If we have an AST to keep updated, remember some pointer value that is
610    // stored into the alloca.
611    if (AST)
612      PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal;
613
614    // Remember the dbg.declare intrinsic describing this alloca, if any.
615    if (Info.DbgDeclare)
616      AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare;
617
618    // Keep the reverse mapping of the 'Allocas' array for the rename pass.
619    AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
620
621    // At this point, we're committed to promoting the alloca using IDF's, and
622    // the standard SSA construction algorithm.  Determine which blocks need PHI
623    // nodes and see if we can optimize out some work by avoiding insertion of
624    // dead phi nodes.
625    DetermineInsertionPoint(AI, AllocaNum, Info);
626  }
627
628  if (Allocas.empty())
629    return; // All of the allocas must have been trivial!
630
631  LBI.clear();
632
633  // Set the incoming values for the basic block to be null values for all of
634  // the alloca's.  We do this in case there is a load of a value that has not
635  // been stored yet.  In this case, it will get this null value.
636  //
637  RenamePassData::ValVector Values(Allocas.size());
638  for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
639    Values[i] = UndefValue::get(Allocas[i]->getAllocatedType());
640
641  // Walks all basic blocks in the function performing the SSA rename algorithm
642  // and inserting the phi nodes we marked as necessary
643  //
644  std::vector<RenamePassData> RenamePassWorkList;
645  RenamePassWorkList.push_back(RenamePassData(F.begin(), nullptr, Values));
646  do {
647    RenamePassData RPD;
648    RPD.swap(RenamePassWorkList.back());
649    RenamePassWorkList.pop_back();
650    // RenamePass may add new worklist entries.
651    RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList);
652  } while (!RenamePassWorkList.empty());
653
654  // The renamer uses the Visited set to avoid infinite loops.  Clear it now.
655  Visited.clear();
656
657  // Remove the allocas themselves from the function.
658  for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
659    Instruction *A = Allocas[i];
660
661    // If there are any uses of the alloca instructions left, they must be in
662    // unreachable basic blocks that were not processed by walking the dominator
663    // tree. Just delete the users now.
664    if (!A->use_empty())
665      A->replaceAllUsesWith(UndefValue::get(A->getType()));
666    if (AST)
667      AST->deleteValue(A);
668    A->eraseFromParent();
669  }
670
671  const DataLayout &DL = F.getParent()->getDataLayout();
672
673  // Remove alloca's dbg.declare instrinsics from the function.
674  for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i)
675    if (DbgDeclareInst *DDI = AllocaDbgDeclares[i])
676      DDI->eraseFromParent();
677
678  // Loop over all of the PHI nodes and see if there are any that we can get
679  // rid of because they merge all of the same incoming values.  This can
680  // happen due to undef values coming into the PHI nodes.  This process is
681  // iterative, because eliminating one PHI node can cause others to be removed.
682  bool EliminatedAPHI = true;
683  while (EliminatedAPHI) {
684    EliminatedAPHI = false;
685
686    // Iterating over NewPhiNodes is deterministic, so it is safe to try to
687    // simplify and RAUW them as we go.  If it was not, we could add uses to
688    // the values we replace with in a non-deterministic order, thus creating
689    // non-deterministic def->use chains.
690    for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
691             I = NewPhiNodes.begin(),
692             E = NewPhiNodes.end();
693         I != E;) {
694      PHINode *PN = I->second;
695
696      // If this PHI node merges one value and/or undefs, get the value.
697      if (Value *V = SimplifyInstruction(PN, DL, nullptr, &DT, AC)) {
698        if (AST && PN->getType()->isPointerTy())
699          AST->deleteValue(PN);
700        PN->replaceAllUsesWith(V);
701        PN->eraseFromParent();
702        NewPhiNodes.erase(I++);
703        EliminatedAPHI = true;
704        continue;
705      }
706      ++I;
707    }
708  }
709
710  // At this point, the renamer has added entries to PHI nodes for all reachable
711  // code.  Unfortunately, there may be unreachable blocks which the renamer
712  // hasn't traversed.  If this is the case, the PHI nodes may not
713  // have incoming values for all predecessors.  Loop over all PHI nodes we have
714  // created, inserting undef values if they are missing any incoming values.
715  //
716  for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
717           I = NewPhiNodes.begin(),
718           E = NewPhiNodes.end();
719       I != E; ++I) {
720    // We want to do this once per basic block.  As such, only process a block
721    // when we find the PHI that is the first entry in the block.
722    PHINode *SomePHI = I->second;
723    BasicBlock *BB = SomePHI->getParent();
724    if (&BB->front() != SomePHI)
725      continue;
726
727    // Only do work here if there the PHI nodes are missing incoming values.  We
728    // know that all PHI nodes that were inserted in a block will have the same
729    // number of incoming values, so we can just check any of them.
730    if (SomePHI->getNumIncomingValues() == getNumPreds(BB))
731      continue;
732
733    // Get the preds for BB.
734    SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
735
736    // Ok, now we know that all of the PHI nodes are missing entries for some
737    // basic blocks.  Start by sorting the incoming predecessors for efficient
738    // access.
739    std::sort(Preds.begin(), Preds.end());
740
741    // Now we loop through all BB's which have entries in SomePHI and remove
742    // them from the Preds list.
743    for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
744      // Do a log(n) search of the Preds list for the entry we want.
745      SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound(
746          Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i));
747      assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) &&
748             "PHI node has entry for a block which is not a predecessor!");
749
750      // Remove the entry
751      Preds.erase(EntIt);
752    }
753
754    // At this point, the blocks left in the preds list must have dummy
755    // entries inserted into every PHI nodes for the block.  Update all the phi
756    // nodes in this block that we are inserting (there could be phis before
757    // mem2reg runs).
758    unsigned NumBadPreds = SomePHI->getNumIncomingValues();
759    BasicBlock::iterator BBI = BB->begin();
760    while ((SomePHI = dyn_cast<PHINode>(BBI++)) &&
761           SomePHI->getNumIncomingValues() == NumBadPreds) {
762      Value *UndefVal = UndefValue::get(SomePHI->getType());
763      for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred)
764        SomePHI->addIncoming(UndefVal, Preds[pred]);
765    }
766  }
767
768  NewPhiNodes.clear();
769}
770
771/// \brief Determine which blocks the value is live in.
772///
773/// These are blocks which lead to uses.  Knowing this allows us to avoid
774/// inserting PHI nodes into blocks which don't lead to uses (thus, the
775/// inserted phi nodes would be dead).
776void PromoteMem2Reg::ComputeLiveInBlocks(
777    AllocaInst *AI, AllocaInfo &Info,
778    const SmallPtrSetImpl<BasicBlock *> &DefBlocks,
779    SmallPtrSetImpl<BasicBlock *> &LiveInBlocks) {
780
781  // To determine liveness, we must iterate through the predecessors of blocks
782  // where the def is live.  Blocks are added to the worklist if we need to
783  // check their predecessors.  Start with all the using blocks.
784  SmallVector<BasicBlock *, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(),
785                                                    Info.UsingBlocks.end());
786
787  // If any of the using blocks is also a definition block, check to see if the
788  // definition occurs before or after the use.  If it happens before the use,
789  // the value isn't really live-in.
790  for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) {
791    BasicBlock *BB = LiveInBlockWorklist[i];
792    if (!DefBlocks.count(BB))
793      continue;
794
795    // Okay, this is a block that both uses and defines the value.  If the first
796    // reference to the alloca is a def (store), then we know it isn't live-in.
797    for (BasicBlock::iterator I = BB->begin();; ++I) {
798      if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
799        if (SI->getOperand(1) != AI)
800          continue;
801
802        // We found a store to the alloca before a load.  The alloca is not
803        // actually live-in here.
804        LiveInBlockWorklist[i] = LiveInBlockWorklist.back();
805        LiveInBlockWorklist.pop_back();
806        --i, --e;
807        break;
808      }
809
810      if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
811        if (LI->getOperand(0) != AI)
812          continue;
813
814        // Okay, we found a load before a store to the alloca.  It is actually
815        // live into this block.
816        break;
817      }
818    }
819  }
820
821  // Now that we have a set of blocks where the phi is live-in, recursively add
822  // their predecessors until we find the full region the value is live.
823  while (!LiveInBlockWorklist.empty()) {
824    BasicBlock *BB = LiveInBlockWorklist.pop_back_val();
825
826    // The block really is live in here, insert it into the set.  If already in
827    // the set, then it has already been processed.
828    if (!LiveInBlocks.insert(BB).second)
829      continue;
830
831    // Since the value is live into BB, it is either defined in a predecessor or
832    // live into it to.  Add the preds to the worklist unless they are a
833    // defining block.
834    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
835      BasicBlock *P = *PI;
836
837      // The value is not live into a predecessor if it defines the value.
838      if (DefBlocks.count(P))
839        continue;
840
841      // Otherwise it is, add to the worklist.
842      LiveInBlockWorklist.push_back(P);
843    }
844  }
845}
846
847/// At this point, we're committed to promoting the alloca using IDF's, and the
848/// standard SSA construction algorithm.  Determine which blocks need phi nodes
849/// and see if we can optimize out some work by avoiding insertion of dead phi
850/// nodes.
851void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
852                                             AllocaInfo &Info) {
853  // Unique the set of defining blocks for efficient lookup.
854  SmallPtrSet<BasicBlock *, 32> DefBlocks;
855  DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
856
857  // Determine which blocks the value is live in.  These are blocks which lead
858  // to uses.
859  SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
860  ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
861
862  // Use a priority queue keyed on dominator tree level so that inserted nodes
863  // are handled from the bottom of the dominator tree upwards.
864  typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair;
865  typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
866                              less_second> IDFPriorityQueue;
867  IDFPriorityQueue PQ;
868
869  for (BasicBlock *BB : DefBlocks) {
870    if (DomTreeNode *Node = DT.getNode(BB))
871      PQ.push(std::make_pair(Node, DomLevels[Node]));
872  }
873
874  SmallVector<std::pair<unsigned, BasicBlock *>, 32> DFBlocks;
875  SmallVector<DomTreeNode *, 32> Worklist;
876  SmallPtrSet<DomTreeNode *, 32> VisitedPQ;
877  SmallPtrSet<DomTreeNode *, 32> VisitedWorklist;
878
879  while (!PQ.empty()) {
880    DomTreeNodePair RootPair = PQ.top();
881    PQ.pop();
882    DomTreeNode *Root = RootPair.first;
883    unsigned RootLevel = RootPair.second;
884
885    // Walk all dominator tree children of Root, inspecting their CFG edges with
886    // targets elsewhere on the dominator tree. Only targets whose level is at
887    // most Root's level are added to the iterated dominance frontier of the
888    // definition set.
889
890    Worklist.clear();
891    Worklist.push_back(Root);
892    VisitedWorklist.insert(Root);
893
894    while (!Worklist.empty()) {
895      DomTreeNode *Node = Worklist.pop_back_val();
896      BasicBlock *BB = Node->getBlock();
897
898      for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
899           ++SI) {
900        DomTreeNode *SuccNode = DT.getNode(*SI);
901
902        // Quickly skip all CFG edges that are also dominator tree edges instead
903        // of catching them below.
904        if (SuccNode->getIDom() == Node)
905          continue;
906
907        unsigned SuccLevel = DomLevels[SuccNode];
908        if (SuccLevel > RootLevel)
909          continue;
910
911        if (!VisitedPQ.insert(SuccNode).second)
912          continue;
913
914        BasicBlock *SuccBB = SuccNode->getBlock();
915        if (!LiveInBlocks.count(SuccBB))
916          continue;
917
918        DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB));
919        if (!DefBlocks.count(SuccBB))
920          PQ.push(std::make_pair(SuccNode, SuccLevel));
921      }
922
923      for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE;
924           ++CI) {
925        if (VisitedWorklist.insert(*CI).second)
926          Worklist.push_back(*CI);
927      }
928    }
929  }
930
931  if (DFBlocks.size() > 1)
932    std::sort(DFBlocks.begin(), DFBlocks.end());
933
934  unsigned CurrentVersion = 0;
935  for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i)
936    QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion);
937}
938
939/// \brief Queue a phi-node to be added to a basic-block for a specific Alloca.
940///
941/// Returns true if there wasn't already a phi-node for that variable
942bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
943                                  unsigned &Version) {
944  // Look up the basic-block in question.
945  PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)];
946
947  // If the BB already has a phi node added for the i'th alloca then we're done!
948  if (PN)
949    return false;
950
951  // Create a PhiNode using the dereferenced type... and add the phi-node to the
952  // BasicBlock.
953  PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB),
954                       Allocas[AllocaNo]->getName() + "." + Twine(Version++),
955                       BB->begin());
956  ++NumPHIInsert;
957  PhiToAllocaMap[PN] = AllocaNo;
958
959  if (AST && PN->getType()->isPointerTy())
960    AST->copyValue(PointerAllocaValues[AllocaNo], PN);
961
962  return true;
963}
964
965/// \brief Recursively traverse the CFG of the function, renaming loads and
966/// stores to the allocas which we are promoting.
967///
968/// IncomingVals indicates what value each Alloca contains on exit from the
969/// predecessor block Pred.
970void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
971                                RenamePassData::ValVector &IncomingVals,
972                                std::vector<RenamePassData> &Worklist) {
973NextIteration:
974  // If we are inserting any phi nodes into this BB, they will already be in the
975  // block.
976  if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) {
977    // If we have PHI nodes to update, compute the number of edges from Pred to
978    // BB.
979    if (PhiToAllocaMap.count(APN)) {
980      // We want to be able to distinguish between PHI nodes being inserted by
981      // this invocation of mem2reg from those phi nodes that already existed in
982      // the IR before mem2reg was run.  We determine that APN is being inserted
983      // because it is missing incoming edges.  All other PHI nodes being
984      // inserted by this pass of mem2reg will have the same number of incoming
985      // operands so far.  Remember this count.
986      unsigned NewPHINumOperands = APN->getNumOperands();
987
988      unsigned NumEdges = std::count(succ_begin(Pred), succ_end(Pred), BB);
989      assert(NumEdges && "Must be at least one edge from Pred to BB!");
990
991      // Add entries for all the phis.
992      BasicBlock::iterator PNI = BB->begin();
993      do {
994        unsigned AllocaNo = PhiToAllocaMap[APN];
995
996        // Add N incoming values to the PHI node.
997        for (unsigned i = 0; i != NumEdges; ++i)
998          APN->addIncoming(IncomingVals[AllocaNo], Pred);
999
1000        // The currently active variable for this block is now the PHI.
1001        IncomingVals[AllocaNo] = APN;
1002
1003        // Get the next phi node.
1004        ++PNI;
1005        APN = dyn_cast<PHINode>(PNI);
1006        if (!APN)
1007          break;
1008
1009        // Verify that it is missing entries.  If not, it is not being inserted
1010        // by this mem2reg invocation so we want to ignore it.
1011      } while (APN->getNumOperands() == NewPHINumOperands);
1012    }
1013  }
1014
1015  // Don't revisit blocks.
1016  if (!Visited.insert(BB).second)
1017    return;
1018
1019  for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II);) {
1020    Instruction *I = II++; // get the instruction, increment iterator
1021
1022    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
1023      AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
1024      if (!Src)
1025        continue;
1026
1027      DenseMap<AllocaInst *, unsigned>::iterator AI = AllocaLookup.find(Src);
1028      if (AI == AllocaLookup.end())
1029        continue;
1030
1031      Value *V = IncomingVals[AI->second];
1032
1033      // Anything using the load now uses the current value.
1034      LI->replaceAllUsesWith(V);
1035      if (AST && LI->getType()->isPointerTy())
1036        AST->deleteValue(LI);
1037      BB->getInstList().erase(LI);
1038    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
1039      // Delete this instruction and mark the name as the current holder of the
1040      // value
1041      AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand());
1042      if (!Dest)
1043        continue;
1044
1045      DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
1046      if (ai == AllocaLookup.end())
1047        continue;
1048
1049      // what value were we writing?
1050      IncomingVals[ai->second] = SI->getOperand(0);
1051      // Record debuginfo for the store before removing it.
1052      if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second])
1053        ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
1054      BB->getInstList().erase(SI);
1055    }
1056  }
1057
1058  // 'Recurse' to our successors.
1059  succ_iterator I = succ_begin(BB), E = succ_end(BB);
1060  if (I == E)
1061    return;
1062
1063  // Keep track of the successors so we don't visit the same successor twice
1064  SmallPtrSet<BasicBlock *, 8> VisitedSuccs;
1065
1066  // Handle the first successor without using the worklist.
1067  VisitedSuccs.insert(*I);
1068  Pred = BB;
1069  BB = *I;
1070  ++I;
1071
1072  for (; I != E; ++I)
1073    if (VisitedSuccs.insert(*I).second)
1074      Worklist.push_back(RenamePassData(*I, Pred, IncomingVals));
1075
1076  goto NextIteration;
1077}
1078
1079void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
1080                           AliasSetTracker *AST, AssumptionCache *AC) {
1081  // If there is nothing to do, bail out...
1082  if (Allocas.empty())
1083    return;
1084
1085  PromoteMem2Reg(Allocas, DT, AST, AC).run();
1086}
1087