ScalarReplAggregates.cpp revision c9ecd14cee020f884313b60f8696384d3e7848f7
1//===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This transformation implements the well known scalar replacement of
11// aggregates transformation.  This xform breaks up alloca instructions of
12// aggregate type (structure or array) into individual alloca instructions for
13// each member (if possible).  Then, if possible, it transforms the individual
14// alloca instructions into nice clean scalar SSA form.
15//
16// This combines a simple SRoA algorithm with the Mem2Reg algorithm because
17// often interact, especially for C++ programs.  As such, iterating between
18// SRoA, then Mem2Reg until we run out of things to promote works well.
19//
20//===----------------------------------------------------------------------===//
21
22#define DEBUG_TYPE "scalarrepl"
23#include "llvm/Transforms/Scalar.h"
24#include "llvm/Constants.h"
25#include "llvm/DerivedTypes.h"
26#include "llvm/Function.h"
27#include "llvm/GlobalVariable.h"
28#include "llvm/Instructions.h"
29#include "llvm/IntrinsicInst.h"
30#include "llvm/LLVMContext.h"
31#include "llvm/Module.h"
32#include "llvm/Pass.h"
33#include "llvm/Analysis/Dominators.h"
34#include "llvm/Analysis/Loads.h"
35#include "llvm/Analysis/ValueTracking.h"
36#include "llvm/Target/TargetData.h"
37#include "llvm/Transforms/Utils/PromoteMemToReg.h"
38#include "llvm/Transforms/Utils/Local.h"
39#include "llvm/Transforms/Utils/SSAUpdater.h"
40#include "llvm/Support/CallSite.h"
41#include "llvm/Support/Debug.h"
42#include "llvm/Support/ErrorHandling.h"
43#include "llvm/Support/GetElementPtrTypeIterator.h"
44#include "llvm/Support/IRBuilder.h"
45#include "llvm/Support/MathExtras.h"
46#include "llvm/Support/raw_ostream.h"
47#include "llvm/ADT/SetVector.h"
48#include "llvm/ADT/SmallVector.h"
49#include "llvm/ADT/Statistic.h"
50using namespace llvm;
51
52STATISTIC(NumReplaced,  "Number of allocas broken up");
53STATISTIC(NumPromoted,  "Number of allocas promoted");
54STATISTIC(NumAdjusted,  "Number of scalar allocas adjusted to allow promotion");
55STATISTIC(NumConverted, "Number of aggregates converted to scalar");
56STATISTIC(NumGlobals,   "Number of allocas copied from constant global");
57
58namespace {
59  struct SROA : public FunctionPass {
60    SROA(int T, bool hasDT, char &ID)
61      : FunctionPass(ID), HasDomTree(hasDT) {
62      if (T == -1)
63        SRThreshold = 128;
64      else
65        SRThreshold = T;
66    }
67
68    bool runOnFunction(Function &F);
69
70    bool performScalarRepl(Function &F);
71    bool performPromotion(Function &F);
72
73  private:
74    bool HasDomTree;
75    TargetData *TD;
76
77    /// DeadInsts - Keep track of instructions we have made dead, so that
78    /// we can remove them after we are done working.
79    SmallVector<Value*, 32> DeadInsts;
80
81    /// AllocaInfo - When analyzing uses of an alloca instruction, this captures
82    /// information about the uses.  All these fields are initialized to false
83    /// and set to true when something is learned.
84    struct AllocaInfo {
85      /// The alloca to promote.
86      AllocaInst *AI;
87
88      /// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite
89      /// looping and avoid redundant work.
90      SmallPtrSet<PHINode*, 8> CheckedPHIs;
91
92      /// isUnsafe - This is set to true if the alloca cannot be SROA'd.
93      bool isUnsafe : 1;
94
95      /// isMemCpySrc - This is true if this aggregate is memcpy'd from.
96      bool isMemCpySrc : 1;
97
98      /// isMemCpyDst - This is true if this aggregate is memcpy'd into.
99      bool isMemCpyDst : 1;
100
101      /// hasSubelementAccess - This is true if a subelement of the alloca is
102      /// ever accessed, or false if the alloca is only accessed with mem
103      /// intrinsics or load/store that only access the entire alloca at once.
104      bool hasSubelementAccess : 1;
105
106      /// hasALoadOrStore - This is true if there are any loads or stores to it.
107      /// The alloca may just be accessed with memcpy, for example, which would
108      /// not set this.
109      bool hasALoadOrStore : 1;
110
111      explicit AllocaInfo(AllocaInst *ai)
112        : AI(ai), isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false),
113          hasSubelementAccess(false), hasALoadOrStore(false) {}
114    };
115
116    unsigned SRThreshold;
117
118    void MarkUnsafe(AllocaInfo &I, Instruction *User) {
119      I.isUnsafe = true;
120      DEBUG(dbgs() << "  Transformation preventing inst: " << *User << '\n');
121    }
122
123    bool isSafeAllocaToScalarRepl(AllocaInst *AI);
124
125    void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info);
126    void isSafePHISelectUseForScalarRepl(Instruction *User, uint64_t Offset,
127                                         AllocaInfo &Info);
128    void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info);
129    void isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
130                         const Type *MemOpType, bool isStore, AllocaInfo &Info,
131                         Instruction *TheAccess, bool AllowWholeAccess);
132    bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size);
133    uint64_t FindElementAndOffset(const Type *&T, uint64_t &Offset,
134                                  const Type *&IdxTy);
135
136    void DoScalarReplacement(AllocaInst *AI,
137                             std::vector<AllocaInst*> &WorkList);
138    void DeleteDeadInstructions();
139
140    void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
141                              SmallVector<AllocaInst*, 32> &NewElts);
142    void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
143                        SmallVector<AllocaInst*, 32> &NewElts);
144    void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
145                    SmallVector<AllocaInst*, 32> &NewElts);
146    void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
147                                      AllocaInst *AI,
148                                      SmallVector<AllocaInst*, 32> &NewElts);
149    void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
150                                       SmallVector<AllocaInst*, 32> &NewElts);
151    void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
152                                      SmallVector<AllocaInst*, 32> &NewElts);
153
154    static MemTransferInst *isOnlyCopiedFromConstantGlobal(AllocaInst *AI);
155  };
156
157  // SROA_DT - SROA that uses DominatorTree.
158  struct SROA_DT : public SROA {
159    static char ID;
160  public:
161    SROA_DT(int T = -1) : SROA(T, true, ID) {
162      initializeSROA_DTPass(*PassRegistry::getPassRegistry());
163    }
164
165    // getAnalysisUsage - This pass does not require any passes, but we know it
166    // will not alter the CFG, so say so.
167    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
168      AU.addRequired<DominatorTree>();
169      AU.setPreservesCFG();
170    }
171  };
172
173  // SROA_SSAUp - SROA that uses SSAUpdater.
174  struct SROA_SSAUp : public SROA {
175    static char ID;
176  public:
177    SROA_SSAUp(int T = -1) : SROA(T, false, ID) {
178      initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry());
179    }
180
181    // getAnalysisUsage - This pass does not require any passes, but we know it
182    // will not alter the CFG, so say so.
183    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
184      AU.setPreservesCFG();
185    }
186  };
187
188}
189
190char SROA_DT::ID = 0;
191char SROA_SSAUp::ID = 0;
192
193INITIALIZE_PASS_BEGIN(SROA_DT, "scalarrepl",
194                "Scalar Replacement of Aggregates (DT)", false, false)
195INITIALIZE_PASS_DEPENDENCY(DominatorTree)
196INITIALIZE_PASS_END(SROA_DT, "scalarrepl",
197                "Scalar Replacement of Aggregates (DT)", false, false)
198
199INITIALIZE_PASS_BEGIN(SROA_SSAUp, "scalarrepl-ssa",
200                      "Scalar Replacement of Aggregates (SSAUp)", false, false)
201INITIALIZE_PASS_END(SROA_SSAUp, "scalarrepl-ssa",
202                    "Scalar Replacement of Aggregates (SSAUp)", false, false)
203
204// Public interface to the ScalarReplAggregates pass
205FunctionPass *llvm::createScalarReplAggregatesPass(int Threshold,
206                                                   bool UseDomTree) {
207  if (UseDomTree)
208    return new SROA_DT(Threshold);
209  return new SROA_SSAUp(Threshold);
210}
211
212
213//===----------------------------------------------------------------------===//
214// Convert To Scalar Optimization.
215//===----------------------------------------------------------------------===//
216
217namespace {
218/// ConvertToScalarInfo - This class implements the "Convert To Scalar"
219/// optimization, which scans the uses of an alloca and determines if it can
220/// rewrite it in terms of a single new alloca that can be mem2reg'd.
221class ConvertToScalarInfo {
222  /// AllocaSize - The size of the alloca being considered.
223  unsigned AllocaSize;
224  const TargetData &TD;
225
226  /// IsNotTrivial - This is set to true if there is some access to the object
227  /// which means that mem2reg can't promote it.
228  bool IsNotTrivial;
229
230  /// VectorTy - This tracks the type that we should promote the vector to if
231  /// it is possible to turn it into a vector.  This starts out null, and if it
232  /// isn't possible to turn into a vector type, it gets set to VoidTy.
233  const Type *VectorTy;
234
235  /// HadAVector - True if there is at least one vector access to the alloca.
236  /// We don't want to turn random arrays into vectors and use vector element
237  /// insert/extract, but if there are element accesses to something that is
238  /// also declared as a vector, we do want to promote to a vector.
239  bool HadAVector;
240
241public:
242  explicit ConvertToScalarInfo(unsigned Size, const TargetData &td)
243    : AllocaSize(Size), TD(td) {
244    IsNotTrivial = false;
245    VectorTy = 0;
246    HadAVector = false;
247  }
248
249  AllocaInst *TryConvert(AllocaInst *AI);
250
251private:
252  bool CanConvertToScalar(Value *V, uint64_t Offset);
253  void MergeInType(const Type *In, uint64_t Offset);
254  bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset);
255  void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
256
257  Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType,
258                                    uint64_t Offset, IRBuilder<> &Builder);
259  Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
260                                   uint64_t Offset, IRBuilder<> &Builder);
261};
262} // end anonymous namespace.
263
264
265/// TryConvert - Analyze the specified alloca, and if it is safe to do so,
266/// rewrite it to be a new alloca which is mem2reg'able.  This returns the new
267/// alloca if possible or null if not.
268AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
269  // If we can't convert this scalar, or if mem2reg can trivially do it, bail
270  // out.
271  if (!CanConvertToScalar(AI, 0) || !IsNotTrivial)
272    return 0;
273
274  // If we were able to find a vector type that can handle this with
275  // insert/extract elements, and if there was at least one use that had
276  // a vector type, promote this to a vector.  We don't want to promote
277  // random stuff that doesn't use vectors (e.g. <9 x double>) because then
278  // we just get a lot of insert/extracts.  If at least one vector is
279  // involved, then we probably really do have a union of vector/array.
280  const Type *NewTy;
281  if (VectorTy && VectorTy->isVectorTy() && HadAVector) {
282    DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n  TYPE = "
283          << *VectorTy << '\n');
284    NewTy = VectorTy;  // Use the vector type.
285  } else {
286    DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
287    // Create and insert the integer alloca.
288    NewTy = IntegerType::get(AI->getContext(), AllocaSize*8);
289  }
290  AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
291  ConvertUsesToScalar(AI, NewAI, 0);
292  return NewAI;
293}
294
295/// MergeInType - Add the 'In' type to the accumulated vector type (VectorTy)
296/// so far at the offset specified by Offset (which is specified in bytes).
297///
298/// There are two cases we handle here:
299///   1) A union of vector types of the same size and potentially its elements.
300///      Here we turn element accesses into insert/extract element operations.
301///      This promotes a <4 x float> with a store of float to the third element
302///      into a <4 x float> that uses insert element.
303///   2) A fully general blob of memory, which we turn into some (potentially
304///      large) integer type with extract and insert operations where the loads
305///      and stores would mutate the memory.  We mark this by setting VectorTy
306///      to VoidTy.
307void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
308  // If we already decided to turn this into a blob of integer memory, there is
309  // nothing to be done.
310  if (VectorTy && VectorTy->isVoidTy())
311    return;
312
313  // If this could be contributing to a vector, analyze it.
314
315  // If the In type is a vector that is the same size as the alloca, see if it
316  // matches the existing VecTy.
317  if (const VectorType *VInTy = dyn_cast<VectorType>(In)) {
318    if (MergeInVectorType(VInTy, Offset))
319      return;
320  } else if (In->isFloatTy() || In->isDoubleTy() ||
321             (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
322              isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
323    // If we're accessing something that could be an element of a vector, see
324    // if the implied vector agrees with what we already have and if Offset is
325    // compatible with it.
326    unsigned EltSize = In->getPrimitiveSizeInBits()/8;
327    if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
328        (VectorTy == 0 ||
329         cast<VectorType>(VectorTy)->getElementType()
330               ->getPrimitiveSizeInBits()/8 == EltSize)) {
331      if (VectorTy == 0)
332        VectorTy = VectorType::get(In, AllocaSize/EltSize);
333      return;
334    }
335  }
336
337  // Otherwise, we have a case that we can't handle with an optimized vector
338  // form.  We can still turn this into a large integer.
339  VectorTy = Type::getVoidTy(In->getContext());
340}
341
342/// MergeInVectorType - Handles the vector case of MergeInType, returning true
343/// if the type was successfully merged and false otherwise.
344bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy,
345                                            uint64_t Offset) {
346  // Remember if we saw a vector type.
347  HadAVector = true;
348
349  if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
350    // If we're storing/loading a vector of the right size, allow it as a
351    // vector.  If this the first vector we see, remember the type so that
352    // we know the element size.  If this is a subsequent access, ignore it
353    // even if it is a differing type but the same size.  Worst case we can
354    // bitcast the resultant vectors.
355    if (VectorTy == 0)
356      VectorTy = VInTy;
357    return true;
358  }
359
360  return false;
361}
362
363/// CanConvertToScalar - V is a pointer.  If we can convert the pointee and all
364/// its accesses to a single vector type, return true and set VecTy to
365/// the new type.  If we could convert the alloca into a single promotable
366/// integer, return true but set VecTy to VoidTy.  Further, if the use is not a
367/// completely trivial use that mem2reg could promote, set IsNotTrivial.  Offset
368/// is the current offset from the base of the alloca being analyzed.
369///
370/// If we see at least one access to the value that is as a vector type, set the
371/// SawVec flag.
372bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
373  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
374    Instruction *User = cast<Instruction>(*UI);
375
376    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
377      // Don't break volatile loads.
378      if (LI->isVolatile())
379        return false;
380      // Don't touch MMX operations.
381      if (LI->getType()->isX86_MMXTy())
382        return false;
383      MergeInType(LI->getType(), Offset);
384      continue;
385    }
386
387    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
388      // Storing the pointer, not into the value?
389      if (SI->getOperand(0) == V || SI->isVolatile()) return false;
390      // Don't touch MMX operations.
391      if (SI->getOperand(0)->getType()->isX86_MMXTy())
392        return false;
393      MergeInType(SI->getOperand(0)->getType(), Offset);
394      continue;
395    }
396
397    if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
398      IsNotTrivial = true;  // Can't be mem2reg'd.
399      if (!CanConvertToScalar(BCI, Offset))
400        return false;
401      continue;
402    }
403
404    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
405      // If this is a GEP with a variable indices, we can't handle it.
406      if (!GEP->hasAllConstantIndices())
407        return false;
408
409      // Compute the offset that this GEP adds to the pointer.
410      SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
411      uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
412                                               &Indices[0], Indices.size());
413      // See if all uses can be converted.
414      if (!CanConvertToScalar(GEP, Offset+GEPOffset))
415        return false;
416      IsNotTrivial = true;  // Can't be mem2reg'd.
417      continue;
418    }
419
420    // If this is a constant sized memset of a constant value (e.g. 0) we can
421    // handle it.
422    if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
423      // Store of constant value and constant size.
424      if (!isa<ConstantInt>(MSI->getValue()) ||
425          !isa<ConstantInt>(MSI->getLength()))
426        return false;
427      IsNotTrivial = true;  // Can't be mem2reg'd.
428      continue;
429    }
430
431    // If this is a memcpy or memmove into or out of the whole allocation, we
432    // can handle it like a load or store of the scalar type.
433    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
434      ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength());
435      if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0)
436        return false;
437
438      IsNotTrivial = true;  // Can't be mem2reg'd.
439      continue;
440    }
441
442    // Otherwise, we cannot handle this!
443    return false;
444  }
445
446  return true;
447}
448
449/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca
450/// directly.  This happens when we are converting an "integer union" to a
451/// single integer scalar, or when we are converting a "vector union" to a
452/// vector with insert/extractelement instructions.
453///
454/// Offset is an offset from the original alloca, in bits that need to be
455/// shifted to the right.  By the end of this, there should be no uses of Ptr.
456void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
457                                              uint64_t Offset) {
458  while (!Ptr->use_empty()) {
459    Instruction *User = cast<Instruction>(Ptr->use_back());
460
461    if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) {
462      ConvertUsesToScalar(CI, NewAI, Offset);
463      CI->eraseFromParent();
464      continue;
465    }
466
467    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
468      // Compute the offset that this GEP adds to the pointer.
469      SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
470      uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
471                                               &Indices[0], Indices.size());
472      ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8);
473      GEP->eraseFromParent();
474      continue;
475    }
476
477    IRBuilder<> Builder(User);
478
479    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
480      // The load is a bit extract from NewAI shifted right by Offset bits.
481      Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp");
482      Value *NewLoadVal
483        = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder);
484      LI->replaceAllUsesWith(NewLoadVal);
485      LI->eraseFromParent();
486      continue;
487    }
488
489    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
490      assert(SI->getOperand(0) != Ptr && "Consistency error!");
491      Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
492      Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
493                                             Builder);
494      Builder.CreateStore(New, NewAI);
495      SI->eraseFromParent();
496
497      // If the load we just inserted is now dead, then the inserted store
498      // overwrote the entire thing.
499      if (Old->use_empty())
500        Old->eraseFromParent();
501      continue;
502    }
503
504    // If this is a constant sized memset of a constant value (e.g. 0) we can
505    // transform it into a store of the expanded constant value.
506    if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
507      assert(MSI->getRawDest() == Ptr && "Consistency error!");
508      unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
509      if (NumBytes != 0) {
510        unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
511
512        // Compute the value replicated the right number of times.
513        APInt APVal(NumBytes*8, Val);
514
515        // Splat the value if non-zero.
516        if (Val)
517          for (unsigned i = 1; i != NumBytes; ++i)
518            APVal |= APVal << 8;
519
520        Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
521        Value *New = ConvertScalar_InsertValue(
522                                    ConstantInt::get(User->getContext(), APVal),
523                                               Old, Offset, Builder);
524        Builder.CreateStore(New, NewAI);
525
526        // If the load we just inserted is now dead, then the memset overwrote
527        // the entire thing.
528        if (Old->use_empty())
529          Old->eraseFromParent();
530      }
531      MSI->eraseFromParent();
532      continue;
533    }
534
535    // If this is a memcpy or memmove into or out of the whole allocation, we
536    // can handle it like a load or store of the scalar type.
537    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
538      assert(Offset == 0 && "must be store to start of alloca");
539
540      // If the source and destination are both to the same alloca, then this is
541      // a noop copy-to-self, just delete it.  Otherwise, emit a load and store
542      // as appropriate.
543      AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, &TD, 0));
544
545      if (GetUnderlyingObject(MTI->getSource(), &TD, 0) != OrigAI) {
546        // Dest must be OrigAI, change this to be a load from the original
547        // pointer (bitcasted), then a store to our new alloca.
548        assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
549        Value *SrcPtr = MTI->getSource();
550        const PointerType* SPTy = cast<PointerType>(SrcPtr->getType());
551        const PointerType* AIPTy = cast<PointerType>(NewAI->getType());
552        if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
553          AIPTy = PointerType::get(AIPTy->getElementType(),
554                                   SPTy->getAddressSpace());
555        }
556        SrcPtr = Builder.CreateBitCast(SrcPtr, AIPTy);
557
558        LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
559        SrcVal->setAlignment(MTI->getAlignment());
560        Builder.CreateStore(SrcVal, NewAI);
561      } else if (GetUnderlyingObject(MTI->getDest(), &TD, 0) != OrigAI) {
562        // Src must be OrigAI, change this to be a load from NewAI then a store
563        // through the original dest pointer (bitcasted).
564        assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
565        LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
566
567        const PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType());
568        const PointerType* AIPTy = cast<PointerType>(NewAI->getType());
569        if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
570          AIPTy = PointerType::get(AIPTy->getElementType(),
571                                   DPTy->getAddressSpace());
572        }
573        Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), AIPTy);
574
575        StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
576        NewStore->setAlignment(MTI->getAlignment());
577      } else {
578        // Noop transfer. Src == Dst
579      }
580
581      MTI->eraseFromParent();
582      continue;
583    }
584
585    llvm_unreachable("Unsupported operation!");
586  }
587}
588
589/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
590/// or vector value FromVal, extracting the bits from the offset specified by
591/// Offset.  This returns the value, which is of type ToType.
592///
593/// This happens when we are converting an "integer union" to a single
594/// integer scalar, or when we are converting a "vector union" to a vector with
595/// insert/extractelement instructions.
596///
597/// Offset is an offset from the original alloca, in bits that need to be
598/// shifted to the right.
599Value *ConvertToScalarInfo::
600ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
601                           uint64_t Offset, IRBuilder<> &Builder) {
602  // If the load is of the whole new alloca, no conversion is needed.
603  if (FromVal->getType() == ToType && Offset == 0)
604    return FromVal;
605
606  // If the result alloca is a vector type, this is either an element
607  // access or a bitcast to another vector type of the same size.
608  if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
609    if (ToType->isVectorTy())
610      return Builder.CreateBitCast(FromVal, ToType, "tmp");
611
612    // Otherwise it must be an element access.
613    unsigned Elt = 0;
614    if (Offset) {
615      unsigned EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
616      Elt = Offset/EltSize;
617      assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
618    }
619    // Return the element extracted out of it.
620    Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get(
621                    Type::getInt32Ty(FromVal->getContext()), Elt), "tmp");
622    if (V->getType() != ToType)
623      V = Builder.CreateBitCast(V, ToType, "tmp");
624    return V;
625  }
626
627  // If ToType is a first class aggregate, extract out each of the pieces and
628  // use insertvalue's to form the FCA.
629  if (const StructType *ST = dyn_cast<StructType>(ToType)) {
630    const StructLayout &Layout = *TD.getStructLayout(ST);
631    Value *Res = UndefValue::get(ST);
632    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
633      Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
634                                        Offset+Layout.getElementOffsetInBits(i),
635                                              Builder);
636      Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
637    }
638    return Res;
639  }
640
641  if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
642    uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
643    Value *Res = UndefValue::get(AT);
644    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
645      Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
646                                              Offset+i*EltSize, Builder);
647      Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
648    }
649    return Res;
650  }
651
652  // Otherwise, this must be a union that was converted to an integer value.
653  const IntegerType *NTy = cast<IntegerType>(FromVal->getType());
654
655  // If this is a big-endian system and the load is narrower than the
656  // full alloca type, we need to do a shift to get the right bits.
657  int ShAmt = 0;
658  if (TD.isBigEndian()) {
659    // On big-endian machines, the lowest bit is stored at the bit offset
660    // from the pointer given by getTypeStoreSizeInBits.  This matters for
661    // integers with a bitwidth that is not a multiple of 8.
662    ShAmt = TD.getTypeStoreSizeInBits(NTy) -
663            TD.getTypeStoreSizeInBits(ToType) - Offset;
664  } else {
665    ShAmt = Offset;
666  }
667
668  // Note: we support negative bitwidths (with shl) which are not defined.
669  // We do this to support (f.e.) loads off the end of a structure where
670  // only some bits are used.
671  if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
672    FromVal = Builder.CreateLShr(FromVal,
673                                 ConstantInt::get(FromVal->getType(),
674                                                           ShAmt), "tmp");
675  else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
676    FromVal = Builder.CreateShl(FromVal,
677                                ConstantInt::get(FromVal->getType(),
678                                                          -ShAmt), "tmp");
679
680  // Finally, unconditionally truncate the integer to the right width.
681  unsigned LIBitWidth = TD.getTypeSizeInBits(ToType);
682  if (LIBitWidth < NTy->getBitWidth())
683    FromVal =
684      Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(),
685                                                    LIBitWidth), "tmp");
686  else if (LIBitWidth > NTy->getBitWidth())
687    FromVal =
688       Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(),
689                                                    LIBitWidth), "tmp");
690
691  // If the result is an integer, this is a trunc or bitcast.
692  if (ToType->isIntegerTy()) {
693    // Should be done.
694  } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) {
695    // Just do a bitcast, we know the sizes match up.
696    FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp");
697  } else {
698    // Otherwise must be a pointer.
699    FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp");
700  }
701  assert(FromVal->getType() == ToType && "Didn't convert right?");
702  return FromVal;
703}
704
705/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer
706/// or vector value "Old" at the offset specified by Offset.
707///
708/// This happens when we are converting an "integer union" to a
709/// single integer scalar, or when we are converting a "vector union" to a
710/// vector with insert/extractelement instructions.
711///
712/// Offset is an offset from the original alloca, in bits that need to be
713/// shifted to the right.
714Value *ConvertToScalarInfo::
715ConvertScalar_InsertValue(Value *SV, Value *Old,
716                          uint64_t Offset, IRBuilder<> &Builder) {
717  // Convert the stored type to the actual type, shift it left to insert
718  // then 'or' into place.
719  const Type *AllocaType = Old->getType();
720  LLVMContext &Context = Old->getContext();
721
722  if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
723    uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy);
724    uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType());
725
726    // Changing the whole vector with memset or with an access of a different
727    // vector type?
728    if (ValSize == VecSize)
729      return Builder.CreateBitCast(SV, AllocaType, "tmp");
730
731    uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
732
733    // Must be an element insertion.
734    unsigned Elt = Offset/EltSize;
735
736    if (SV->getType() != VTy->getElementType())
737      SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp");
738
739    SV = Builder.CreateInsertElement(Old, SV,
740                     ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt),
741                                     "tmp");
742    return SV;
743  }
744
745  // If SV is a first-class aggregate value, insert each value recursively.
746  if (const StructType *ST = dyn_cast<StructType>(SV->getType())) {
747    const StructLayout &Layout = *TD.getStructLayout(ST);
748    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
749      Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
750      Old = ConvertScalar_InsertValue(Elt, Old,
751                                      Offset+Layout.getElementOffsetInBits(i),
752                                      Builder);
753    }
754    return Old;
755  }
756
757  if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
758    uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
759    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
760      Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
761      Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder);
762    }
763    return Old;
764  }
765
766  // If SV is a float, convert it to the appropriate integer type.
767  // If it is a pointer, do the same.
768  unsigned SrcWidth = TD.getTypeSizeInBits(SV->getType());
769  unsigned DestWidth = TD.getTypeSizeInBits(AllocaType);
770  unsigned SrcStoreWidth = TD.getTypeStoreSizeInBits(SV->getType());
771  unsigned DestStoreWidth = TD.getTypeStoreSizeInBits(AllocaType);
772  if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
773    SV = Builder.CreateBitCast(SV,
774                            IntegerType::get(SV->getContext(),SrcWidth), "tmp");
775  else if (SV->getType()->isPointerTy())
776    SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()), "tmp");
777
778  // Zero extend or truncate the value if needed.
779  if (SV->getType() != AllocaType) {
780    if (SV->getType()->getPrimitiveSizeInBits() <
781             AllocaType->getPrimitiveSizeInBits())
782      SV = Builder.CreateZExt(SV, AllocaType, "tmp");
783    else {
784      // Truncation may be needed if storing more than the alloca can hold
785      // (undefined behavior).
786      SV = Builder.CreateTrunc(SV, AllocaType, "tmp");
787      SrcWidth = DestWidth;
788      SrcStoreWidth = DestStoreWidth;
789    }
790  }
791
792  // If this is a big-endian system and the store is narrower than the
793  // full alloca type, we need to do a shift to get the right bits.
794  int ShAmt = 0;
795  if (TD.isBigEndian()) {
796    // On big-endian machines, the lowest bit is stored at the bit offset
797    // from the pointer given by getTypeStoreSizeInBits.  This matters for
798    // integers with a bitwidth that is not a multiple of 8.
799    ShAmt = DestStoreWidth - SrcStoreWidth - Offset;
800  } else {
801    ShAmt = Offset;
802  }
803
804  // Note: we support negative bitwidths (with shr) which are not defined.
805  // We do this to support (f.e.) stores off the end of a structure where
806  // only some bits in the structure are set.
807  APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
808  if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
809    SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(),
810                           ShAmt), "tmp");
811    Mask <<= ShAmt;
812  } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
813    SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(),
814                            -ShAmt), "tmp");
815    Mask = Mask.lshr(-ShAmt);
816  }
817
818  // Mask out the bits we are about to insert from the old value, and or
819  // in the new bits.
820  if (SrcWidth != DestWidth) {
821    assert(DestWidth > SrcWidth);
822    Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask");
823    SV = Builder.CreateOr(Old, SV, "ins");
824  }
825  return SV;
826}
827
828
829//===----------------------------------------------------------------------===//
830// SRoA Driver
831//===----------------------------------------------------------------------===//
832
833
834bool SROA::runOnFunction(Function &F) {
835  TD = getAnalysisIfAvailable<TargetData>();
836
837  bool Changed = performPromotion(F);
838
839  // FIXME: ScalarRepl currently depends on TargetData more than it
840  // theoretically needs to. It should be refactored in order to support
841  // target-independent IR. Until this is done, just skip the actual
842  // scalar-replacement portion of this pass.
843  if (!TD) return Changed;
844
845  while (1) {
846    bool LocalChange = performScalarRepl(F);
847    if (!LocalChange) break;   // No need to repromote if no scalarrepl
848    Changed = true;
849    LocalChange = performPromotion(F);
850    if (!LocalChange) break;   // No need to re-scalarrepl if no promotion
851  }
852
853  return Changed;
854}
855
856namespace {
857class AllocaPromoter : public LoadAndStorePromoter {
858  AllocaInst *AI;
859public:
860  AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S)
861    : LoadAndStorePromoter(Insts, S), AI(0) {}
862
863  void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) {
864    // Remember which alloca we're promoting (for isInstInList).
865    this->AI = AI;
866    LoadAndStorePromoter::run(Insts);
867    AI->eraseFromParent();
868  }
869
870  virtual bool isInstInList(Instruction *I,
871                            const SmallVectorImpl<Instruction*> &Insts) const {
872    if (LoadInst *LI = dyn_cast<LoadInst>(I))
873      return LI->getOperand(0) == AI;
874    return cast<StoreInst>(I)->getPointerOperand() == AI;
875  }
876};
877} // end anon namespace
878
879/// isSafeSelectToSpeculate - Select instructions that use an alloca and are
880/// subsequently loaded can be rewritten to load both input pointers and then
881/// select between the result, allowing the load of the alloca to be promoted.
882/// From this:
883///   %P2 = select i1 %cond, i32* %Alloca, i32* %Other
884///   %V = load i32* %P2
885/// to:
886///   %V1 = load i32* %Alloca      -> will be mem2reg'd
887///   %V2 = load i32* %Other
888///   %V = select i1 %cond, i32 %V1, i32 %V2
889///
890/// We can do this to a select if its only uses are loads and if the operand to
891/// the select can be loaded unconditionally.
892static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) {
893  bool TDerefable = SI->getTrueValue()->isDereferenceablePointer();
894  bool FDerefable = SI->getFalseValue()->isDereferenceablePointer();
895
896  for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
897       UI != UE; ++UI) {
898    LoadInst *LI = dyn_cast<LoadInst>(*UI);
899    if (LI == 0 || LI->isVolatile()) return false;
900
901    // Both operands to the select need to be dereferencable, either absolutely
902    // (e.g. allocas) or at this point because we can see other accesses to it.
903    if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI,
904                                                    LI->getAlignment(), TD))
905      return false;
906    if (!FDerefable && !isSafeToLoadUnconditionally(SI->getFalseValue(), LI,
907                                                    LI->getAlignment(), TD))
908      return false;
909  }
910
911  return true;
912}
913
914/// isSafePHIToSpeculate - PHI instructions that use an alloca and are
915/// subsequently loaded can be rewritten to load both input pointers in the pred
916/// blocks and then PHI the results, allowing the load of the alloca to be
917/// promoted.
918/// From this:
919///   %P2 = phi [i32* %Alloca, i32* %Other]
920///   %V = load i32* %P2
921/// to:
922///   %V1 = load i32* %Alloca      -> will be mem2reg'd
923///   ...
924///   %V2 = load i32* %Other
925///   ...
926///   %V = phi [i32 %V1, i32 %V2]
927///
928/// We can do this to a select if its only uses are loads and if the operand to
929/// the select can be loaded unconditionally.
930static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
931  // For now, we can only do this promotion if the load is in the same block as
932  // the PHI, and if there are no stores between the phi and load.
933  // TODO: Allow recursive phi users.
934  // TODO: Allow stores.
935  BasicBlock *BB = PN->getParent();
936  unsigned MaxAlign = 0;
937  for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
938       UI != UE; ++UI) {
939    LoadInst *LI = dyn_cast<LoadInst>(*UI);
940    if (LI == 0 || LI->isVolatile()) return false;
941
942    // For now we only allow loads in the same block as the PHI.  This is a
943    // common case that happens when instcombine merges two loads through a PHI.
944    if (LI->getParent() != BB) return false;
945
946    // Ensure that there are no instructions between the PHI and the load that
947    // could store.
948    for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI)
949      if (BBI->mayWriteToMemory())
950        return false;
951
952    MaxAlign = std::max(MaxAlign, LI->getAlignment());
953  }
954
955  // Okay, we know that we have one or more loads in the same block as the PHI.
956  // We can transform this if it is safe to push the loads into the predecessor
957  // blocks.  The only thing to watch out for is that we can't put a possibly
958  // trapping load in the predecessor if it is a critical edge.
959  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
960    BasicBlock *Pred = PN->getIncomingBlock(i);
961
962    // If the predecessor has a single successor, then the edge isn't critical.
963    if (Pred->getTerminator()->getNumSuccessors() == 1)
964      continue;
965
966    Value *InVal = PN->getIncomingValue(i);
967
968    // If the InVal is an invoke in the pred, we can't put a load on the edge.
969    if (InvokeInst *II = dyn_cast<InvokeInst>(InVal))
970      if (II->getParent() == Pred)
971        return false;
972
973    // If this pointer is always safe to load, or if we can prove that there is
974    // already a load in the block, then we can move the load to the pred block.
975    if (InVal->isDereferenceablePointer() ||
976        isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, TD))
977      continue;
978
979    return false;
980  }
981
982  return true;
983}
984
985
986/// tryToMakeAllocaBePromotable - This returns true if the alloca only has
987/// direct (non-volatile) loads and stores to it.  If the alloca is close but
988/// not quite there, this will transform the code to allow promotion.  As such,
989/// it is a non-pure predicate.
990static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
991  SetVector<Instruction*, SmallVector<Instruction*, 4>,
992            SmallPtrSet<Instruction*, 4> > InstsToRewrite;
993
994  for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
995       UI != UE; ++UI) {
996    User *U = *UI;
997    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
998      if (LI->isVolatile())
999        return false;
1000      continue;
1001    }
1002
1003    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
1004      if (SI->getOperand(0) == AI || SI->isVolatile())
1005        return false;   // Don't allow a store OF the AI, only INTO the AI.
1006      continue;
1007    }
1008
1009    if (SelectInst *SI = dyn_cast<SelectInst>(U)) {
1010      // If the condition being selected on is a constant, fold the select, yes
1011      // this does (rarely) happen early on.
1012      if (ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition())) {
1013        Value *Result = SI->getOperand(1+CI->isZero());
1014        SI->replaceAllUsesWith(Result);
1015        SI->eraseFromParent();
1016
1017        // This is very rare and we just scrambled the use list of AI, start
1018        // over completely.
1019        return tryToMakeAllocaBePromotable(AI, TD);
1020      }
1021
1022      // If it is safe to turn "load (select c, AI, ptr)" into a select of two
1023      // loads, then we can transform this by rewriting the select.
1024      if (!isSafeSelectToSpeculate(SI, TD))
1025        return false;
1026
1027      InstsToRewrite.insert(SI);
1028      continue;
1029    }
1030
1031    if (PHINode *PN = dyn_cast<PHINode>(U)) {
1032      if (PN->use_empty()) {  // Dead PHIs can be stripped.
1033        InstsToRewrite.insert(PN);
1034        continue;
1035      }
1036
1037      // If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads
1038      // in the pred blocks, then we can transform this by rewriting the PHI.
1039      if (!isSafePHIToSpeculate(PN, TD))
1040        return false;
1041
1042      InstsToRewrite.insert(PN);
1043      continue;
1044    }
1045
1046    return false;
1047  }
1048
1049  // If there are no instructions to rewrite, then all uses are load/stores and
1050  // we're done!
1051  if (InstsToRewrite.empty())
1052    return true;
1053
1054  // If we have instructions that need to be rewritten for this to be promotable
1055  // take care of it now.
1056  for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) {
1057    if (SelectInst *SI = dyn_cast<SelectInst>(InstsToRewrite[i])) {
1058      // Selects in InstsToRewrite only have load uses.  Rewrite each as two
1059      // loads with a new select.
1060      while (!SI->use_empty()) {
1061        LoadInst *LI = cast<LoadInst>(SI->use_back());
1062
1063        IRBuilder<> Builder(LI);
1064        LoadInst *TrueLoad =
1065          Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t");
1066        LoadInst *FalseLoad =
1067          Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".t");
1068
1069        // Transfer alignment and TBAA info if present.
1070        TrueLoad->setAlignment(LI->getAlignment());
1071        FalseLoad->setAlignment(LI->getAlignment());
1072        if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) {
1073          TrueLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
1074          FalseLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
1075        }
1076
1077        Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad);
1078        V->takeName(LI);
1079        LI->replaceAllUsesWith(V);
1080        LI->eraseFromParent();
1081      }
1082
1083      // Now that all the loads are gone, the select is gone too.
1084      SI->eraseFromParent();
1085      continue;
1086    }
1087
1088    // Otherwise, we have a PHI node which allows us to push the loads into the
1089    // predecessors.
1090    PHINode *PN = cast<PHINode>(InstsToRewrite[i]);
1091    if (PN->use_empty()) {
1092      PN->eraseFromParent();
1093      continue;
1094    }
1095
1096    const Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
1097    PHINode *NewPN = PHINode::Create(LoadTy, PN->getName()+".ld", PN);
1098
1099    // Get the TBAA tag and alignment to use from one of the loads.  It doesn't
1100    // matter which one we get and if any differ, it doesn't matter.
1101    LoadInst *SomeLoad = cast<LoadInst>(PN->use_back());
1102    MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
1103    unsigned Align = SomeLoad->getAlignment();
1104
1105    // Rewrite all loads of the PN to use the new PHI.
1106    while (!PN->use_empty()) {
1107      LoadInst *LI = cast<LoadInst>(PN->use_back());
1108      LI->replaceAllUsesWith(NewPN);
1109      LI->eraseFromParent();
1110    }
1111
1112    // Inject loads into all of the pred blocks.  Keep track of which blocks we
1113    // insert them into in case we have multiple edges from the same block.
1114    DenseMap<BasicBlock*, LoadInst*> InsertedLoads;
1115
1116    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1117      BasicBlock *Pred = PN->getIncomingBlock(i);
1118      LoadInst *&Load = InsertedLoads[Pred];
1119      if (Load == 0) {
1120        Load = new LoadInst(PN->getIncomingValue(i),
1121                            PN->getName() + "." + Pred->getName(),
1122                            Pred->getTerminator());
1123        Load->setAlignment(Align);
1124        if (TBAATag) Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);
1125      }
1126
1127      NewPN->addIncoming(Load, Pred);
1128    }
1129
1130    PN->eraseFromParent();
1131  }
1132
1133  ++NumAdjusted;
1134  return true;
1135}
1136
1137
1138bool SROA::performPromotion(Function &F) {
1139  std::vector<AllocaInst*> Allocas;
1140  DominatorTree *DT = 0;
1141  if (HasDomTree)
1142    DT = &getAnalysis<DominatorTree>();
1143
1144  BasicBlock &BB = F.getEntryBlock();  // Get the entry node for the function
1145
1146  bool Changed = false;
1147  SmallVector<Instruction*, 64> Insts;
1148  while (1) {
1149    Allocas.clear();
1150
1151    // Find allocas that are safe to promote, by looking at all instructions in
1152    // the entry node
1153    for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
1154      if (AllocaInst *AI = dyn_cast<AllocaInst>(I))       // Is it an alloca?
1155        if (tryToMakeAllocaBePromotable(AI, TD))
1156          Allocas.push_back(AI);
1157
1158    if (Allocas.empty()) break;
1159
1160    if (HasDomTree)
1161      PromoteMemToReg(Allocas, *DT);
1162    else {
1163      SSAUpdater SSA;
1164      for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
1165        AllocaInst *AI = Allocas[i];
1166
1167        // Build list of instructions to promote.
1168        for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
1169             UI != E; ++UI)
1170          Insts.push_back(cast<Instruction>(*UI));
1171
1172        AllocaPromoter(Insts, SSA).run(AI, Insts);
1173        Insts.clear();
1174      }
1175    }
1176    NumPromoted += Allocas.size();
1177    Changed = true;
1178  }
1179
1180  return Changed;
1181}
1182
1183
1184/// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for
1185/// SROA.  It must be a struct or array type with a small number of elements.
1186static bool ShouldAttemptScalarRepl(AllocaInst *AI) {
1187  const Type *T = AI->getAllocatedType();
1188  // Do not promote any struct into more than 32 separate vars.
1189  if (const StructType *ST = dyn_cast<StructType>(T))
1190    return ST->getNumElements() <= 32;
1191  // Arrays are much less likely to be safe for SROA; only consider
1192  // them if they are very small.
1193  if (const ArrayType *AT = dyn_cast<ArrayType>(T))
1194    return AT->getNumElements() <= 8;
1195  return false;
1196}
1197
1198
1199// performScalarRepl - This algorithm is a simple worklist driven algorithm,
1200// which runs on all of the malloc/alloca instructions in the function, removing
1201// them if they are only used by getelementptr instructions.
1202//
1203bool SROA::performScalarRepl(Function &F) {
1204  std::vector<AllocaInst*> WorkList;
1205
1206  // Scan the entry basic block, adding allocas to the worklist.
1207  BasicBlock &BB = F.getEntryBlock();
1208  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
1209    if (AllocaInst *A = dyn_cast<AllocaInst>(I))
1210      WorkList.push_back(A);
1211
1212  // Process the worklist
1213  bool Changed = false;
1214  while (!WorkList.empty()) {
1215    AllocaInst *AI = WorkList.back();
1216    WorkList.pop_back();
1217
1218    // Handle dead allocas trivially.  These can be formed by SROA'ing arrays
1219    // with unused elements.
1220    if (AI->use_empty()) {
1221      AI->eraseFromParent();
1222      Changed = true;
1223      continue;
1224    }
1225
1226    // If this alloca is impossible for us to promote, reject it early.
1227    if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized())
1228      continue;
1229
1230    // Check to see if this allocation is only modified by a memcpy/memmove from
1231    // a constant global.  If this is the case, we can change all users to use
1232    // the constant global instead.  This is commonly produced by the CFE by
1233    // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
1234    // is only subsequently read.
1235    if (MemTransferInst *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) {
1236      DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n');
1237      DEBUG(dbgs() << "  memcpy = " << *TheCopy << '\n');
1238      Constant *TheSrc = cast<Constant>(TheCopy->getSource());
1239      AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
1240      TheCopy->eraseFromParent();  // Don't mutate the global.
1241      AI->eraseFromParent();
1242      ++NumGlobals;
1243      Changed = true;
1244      continue;
1245    }
1246
1247    // Check to see if we can perform the core SROA transformation.  We cannot
1248    // transform the allocation instruction if it is an array allocation
1249    // (allocations OF arrays are ok though), and an allocation of a scalar
1250    // value cannot be decomposed at all.
1251    uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType());
1252
1253    // Do not promote [0 x %struct].
1254    if (AllocaSize == 0) continue;
1255
1256    // Do not promote any struct whose size is too big.
1257    if (AllocaSize > SRThreshold) continue;
1258
1259    // If the alloca looks like a good candidate for scalar replacement, and if
1260    // all its users can be transformed, then split up the aggregate into its
1261    // separate elements.
1262    if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) {
1263      DoScalarReplacement(AI, WorkList);
1264      Changed = true;
1265      continue;
1266    }
1267
1268    // If we can turn this aggregate value (potentially with casts) into a
1269    // simple scalar value that can be mem2reg'd into a register value.
1270    // IsNotTrivial tracks whether this is something that mem2reg could have
1271    // promoted itself.  If so, we don't want to transform it needlessly.  Note
1272    // that we can't just check based on the type: the alloca may be of an i32
1273    // but that has pointer arithmetic to set byte 3 of it or something.
1274    if (AllocaInst *NewAI =
1275          ConvertToScalarInfo((unsigned)AllocaSize, *TD).TryConvert(AI)) {
1276      NewAI->takeName(AI);
1277      AI->eraseFromParent();
1278      ++NumConverted;
1279      Changed = true;
1280      continue;
1281    }
1282
1283    // Otherwise, couldn't process this alloca.
1284  }
1285
1286  return Changed;
1287}
1288
1289/// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl
1290/// predicate, do SROA now.
1291void SROA::DoScalarReplacement(AllocaInst *AI,
1292                               std::vector<AllocaInst*> &WorkList) {
1293  DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n');
1294  SmallVector<AllocaInst*, 32> ElementAllocas;
1295  if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
1296    ElementAllocas.reserve(ST->getNumContainedTypes());
1297    for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {
1298      AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0,
1299                                      AI->getAlignment(),
1300                                      AI->getName() + "." + Twine(i), AI);
1301      ElementAllocas.push_back(NA);
1302      WorkList.push_back(NA);  // Add to worklist for recursive processing
1303    }
1304  } else {
1305    const ArrayType *AT = cast<ArrayType>(AI->getAllocatedType());
1306    ElementAllocas.reserve(AT->getNumElements());
1307    const Type *ElTy = AT->getElementType();
1308    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
1309      AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(),
1310                                      AI->getName() + "." + Twine(i), AI);
1311      ElementAllocas.push_back(NA);
1312      WorkList.push_back(NA);  // Add to worklist for recursive processing
1313    }
1314  }
1315
1316  // Now that we have created the new alloca instructions, rewrite all the
1317  // uses of the old alloca.
1318  RewriteForScalarRepl(AI, AI, 0, ElementAllocas);
1319
1320  // Now erase any instructions that were made dead while rewriting the alloca.
1321  DeleteDeadInstructions();
1322  AI->eraseFromParent();
1323
1324  ++NumReplaced;
1325}
1326
1327/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list,
1328/// recursively including all their operands that become trivially dead.
1329void SROA::DeleteDeadInstructions() {
1330  while (!DeadInsts.empty()) {
1331    Instruction *I = cast<Instruction>(DeadInsts.pop_back_val());
1332
1333    for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
1334      if (Instruction *U = dyn_cast<Instruction>(*OI)) {
1335        // Zero out the operand and see if it becomes trivially dead.
1336        // (But, don't add allocas to the dead instruction list -- they are
1337        // already on the worklist and will be deleted separately.)
1338        *OI = 0;
1339        if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U))
1340          DeadInsts.push_back(U);
1341      }
1342
1343    I->eraseFromParent();
1344  }
1345}
1346
1347/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to
1348/// performing scalar replacement of alloca AI.  The results are flagged in
1349/// the Info parameter.  Offset indicates the position within AI that is
1350/// referenced by this instruction.
1351void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
1352                               AllocaInfo &Info) {
1353  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
1354    Instruction *User = cast<Instruction>(*UI);
1355
1356    if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
1357      isSafeForScalarRepl(BC, Offset, Info);
1358    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
1359      uint64_t GEPOffset = Offset;
1360      isSafeGEP(GEPI, GEPOffset, Info);
1361      if (!Info.isUnsafe)
1362        isSafeForScalarRepl(GEPI, GEPOffset, Info);
1363    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
1364      ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
1365      if (Length == 0)
1366        return MarkUnsafe(Info, User);
1367      isSafeMemAccess(Offset, Length->getZExtValue(), 0,
1368                      UI.getOperandNo() == 0, Info, MI,
1369                      true /*AllowWholeAccess*/);
1370    } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
1371      if (LI->isVolatile())
1372        return MarkUnsafe(Info, User);
1373      const Type *LIType = LI->getType();
1374      isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
1375                      LIType, false, Info, LI, true /*AllowWholeAccess*/);
1376      Info.hasALoadOrStore = true;
1377
1378    } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
1379      // Store is ok if storing INTO the pointer, not storing the pointer
1380      if (SI->isVolatile() || SI->getOperand(0) == I)
1381        return MarkUnsafe(Info, User);
1382
1383      const Type *SIType = SI->getOperand(0)->getType();
1384      isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
1385                      SIType, true, Info, SI, true /*AllowWholeAccess*/);
1386      Info.hasALoadOrStore = true;
1387    } else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
1388      isSafePHISelectUseForScalarRepl(User, Offset, Info);
1389    } else {
1390      return MarkUnsafe(Info, User);
1391    }
1392    if (Info.isUnsafe) return;
1393  }
1394}
1395
1396
1397/// isSafePHIUseForScalarRepl - If we see a PHI node or select using a pointer
1398/// derived from the alloca, we can often still split the alloca into elements.
1399/// This is useful if we have a large alloca where one element is phi'd
1400/// together somewhere: we can SRoA and promote all the other elements even if
1401/// we end up not being able to promote this one.
1402///
1403/// All we require is that the uses of the PHI do not index into other parts of
1404/// the alloca.  The most important use case for this is single load and stores
1405/// that are PHI'd together, which can happen due to code sinking.
1406void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
1407                                           AllocaInfo &Info) {
1408  // If we've already checked this PHI, don't do it again.
1409  if (PHINode *PN = dyn_cast<PHINode>(I))
1410    if (!Info.CheckedPHIs.insert(PN))
1411      return;
1412
1413  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
1414    Instruction *User = cast<Instruction>(*UI);
1415
1416    if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
1417      isSafePHISelectUseForScalarRepl(BC, Offset, Info);
1418    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
1419      // Only allow "bitcast" GEPs for simplicity.  We could generalize this,
1420      // but would have to prove that we're staying inside of an element being
1421      // promoted.
1422      if (!GEPI->hasAllZeroIndices())
1423        return MarkUnsafe(Info, User);
1424      isSafePHISelectUseForScalarRepl(GEPI, Offset, Info);
1425    } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
1426      if (LI->isVolatile())
1427        return MarkUnsafe(Info, User);
1428      const Type *LIType = LI->getType();
1429      isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
1430                      LIType, false, Info, LI, false /*AllowWholeAccess*/);
1431      Info.hasALoadOrStore = true;
1432
1433    } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
1434      // Store is ok if storing INTO the pointer, not storing the pointer
1435      if (SI->isVolatile() || SI->getOperand(0) == I)
1436        return MarkUnsafe(Info, User);
1437
1438      const Type *SIType = SI->getOperand(0)->getType();
1439      isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
1440                      SIType, true, Info, SI, false /*AllowWholeAccess*/);
1441      Info.hasALoadOrStore = true;
1442    } else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
1443      isSafePHISelectUseForScalarRepl(User, Offset, Info);
1444    } else {
1445      return MarkUnsafe(Info, User);
1446    }
1447    if (Info.isUnsafe) return;
1448  }
1449}
1450
1451/// isSafeGEP - Check if a GEP instruction can be handled for scalar
1452/// replacement.  It is safe when all the indices are constant, in-bounds
1453/// references, and when the resulting offset corresponds to an element within
1454/// the alloca type.  The results are flagged in the Info parameter.  Upon
1455/// return, Offset is adjusted as specified by the GEP indices.
1456void SROA::isSafeGEP(GetElementPtrInst *GEPI,
1457                     uint64_t &Offset, AllocaInfo &Info) {
1458  gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI);
1459  if (GEPIt == E)
1460    return;
1461
1462  // Walk through the GEP type indices, checking the types that this indexes
1463  // into.
1464  for (; GEPIt != E; ++GEPIt) {
1465    // Ignore struct elements, no extra checking needed for these.
1466    if ((*GEPIt)->isStructTy())
1467      continue;
1468
1469    ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
1470    if (!IdxVal)
1471      return MarkUnsafe(Info, GEPI);
1472  }
1473
1474  // Compute the offset due to this GEP and check if the alloca has a
1475  // component element at that offset.
1476  SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
1477  Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
1478                                 &Indices[0], Indices.size());
1479  if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, 0))
1480    MarkUnsafe(Info, GEPI);
1481}
1482
1483/// isHomogeneousAggregate - Check if type T is a struct or array containing
1484/// elements of the same type (which is always true for arrays).  If so,
1485/// return true with NumElts and EltTy set to the number of elements and the
1486/// element type, respectively.
1487static bool isHomogeneousAggregate(const Type *T, unsigned &NumElts,
1488                                   const Type *&EltTy) {
1489  if (const ArrayType *AT = dyn_cast<ArrayType>(T)) {
1490    NumElts = AT->getNumElements();
1491    EltTy = (NumElts == 0 ? 0 : AT->getElementType());
1492    return true;
1493  }
1494  if (const StructType *ST = dyn_cast<StructType>(T)) {
1495    NumElts = ST->getNumContainedTypes();
1496    EltTy = (NumElts == 0 ? 0 : ST->getContainedType(0));
1497    for (unsigned n = 1; n < NumElts; ++n) {
1498      if (ST->getContainedType(n) != EltTy)
1499        return false;
1500    }
1501    return true;
1502  }
1503  return false;
1504}
1505
1506/// isCompatibleAggregate - Check if T1 and T2 are either the same type or are
1507/// "homogeneous" aggregates with the same element type and number of elements.
1508static bool isCompatibleAggregate(const Type *T1, const Type *T2) {
1509  if (T1 == T2)
1510    return true;
1511
1512  unsigned NumElts1, NumElts2;
1513  const Type *EltTy1, *EltTy2;
1514  if (isHomogeneousAggregate(T1, NumElts1, EltTy1) &&
1515      isHomogeneousAggregate(T2, NumElts2, EltTy2) &&
1516      NumElts1 == NumElts2 &&
1517      EltTy1 == EltTy2)
1518    return true;
1519
1520  return false;
1521}
1522
1523/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI
1524/// alloca or has an offset and size that corresponds to a component element
1525/// within it.  The offset checked here may have been formed from a GEP with a
1526/// pointer bitcasted to a different type.
1527///
1528/// If AllowWholeAccess is true, then this allows uses of the entire alloca as a
1529/// unit.  If false, it only allows accesses known to be in a single element.
1530void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
1531                           const Type *MemOpType, bool isStore,
1532                           AllocaInfo &Info, Instruction *TheAccess,
1533                           bool AllowWholeAccess) {
1534  // Check if this is a load/store of the entire alloca.
1535  if (Offset == 0 && AllowWholeAccess &&
1536      MemSize == TD->getTypeAllocSize(Info.AI->getAllocatedType())) {
1537    // This can be safe for MemIntrinsics (where MemOpType is 0) and integer
1538    // loads/stores (which are essentially the same as the MemIntrinsics with
1539    // regard to copying padding between elements).  But, if an alloca is
1540    // flagged as both a source and destination of such operations, we'll need
1541    // to check later for padding between elements.
1542    if (!MemOpType || MemOpType->isIntegerTy()) {
1543      if (isStore)
1544        Info.isMemCpyDst = true;
1545      else
1546        Info.isMemCpySrc = true;
1547      return;
1548    }
1549    // This is also safe for references using a type that is compatible with
1550    // the type of the alloca, so that loads/stores can be rewritten using
1551    // insertvalue/extractvalue.
1552    if (isCompatibleAggregate(MemOpType, Info.AI->getAllocatedType())) {
1553      Info.hasSubelementAccess = true;
1554      return;
1555    }
1556  }
1557  // Check if the offset/size correspond to a component within the alloca type.
1558  const Type *T = Info.AI->getAllocatedType();
1559  if (TypeHasComponent(T, Offset, MemSize)) {
1560    Info.hasSubelementAccess = true;
1561    return;
1562  }
1563
1564  return MarkUnsafe(Info, TheAccess);
1565}
1566
1567/// TypeHasComponent - Return true if T has a component type with the
1568/// specified offset and size.  If Size is zero, do not check the size.
1569bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) {
1570  const Type *EltTy;
1571  uint64_t EltSize;
1572  if (const StructType *ST = dyn_cast<StructType>(T)) {
1573    const StructLayout *Layout = TD->getStructLayout(ST);
1574    unsigned EltIdx = Layout->getElementContainingOffset(Offset);
1575    EltTy = ST->getContainedType(EltIdx);
1576    EltSize = TD->getTypeAllocSize(EltTy);
1577    Offset -= Layout->getElementOffset(EltIdx);
1578  } else if (const ArrayType *AT = dyn_cast<ArrayType>(T)) {
1579    EltTy = AT->getElementType();
1580    EltSize = TD->getTypeAllocSize(EltTy);
1581    if (Offset >= AT->getNumElements() * EltSize)
1582      return false;
1583    Offset %= EltSize;
1584  } else {
1585    return false;
1586  }
1587  if (Offset == 0 && (Size == 0 || EltSize == Size))
1588    return true;
1589  // Check if the component spans multiple elements.
1590  if (Offset + Size > EltSize)
1591    return false;
1592  return TypeHasComponent(EltTy, Offset, Size);
1593}
1594
1595/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite
1596/// the instruction I, which references it, to use the separate elements.
1597/// Offset indicates the position within AI that is referenced by this
1598/// instruction.
1599void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
1600                                SmallVector<AllocaInst*, 32> &NewElts) {
1601  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E;) {
1602    Use &TheUse = UI.getUse();
1603    Instruction *User = cast<Instruction>(*UI++);
1604
1605    if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
1606      RewriteBitCast(BC, AI, Offset, NewElts);
1607      continue;
1608    }
1609
1610    if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
1611      RewriteGEP(GEPI, AI, Offset, NewElts);
1612      continue;
1613    }
1614
1615    if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
1616      ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
1617      uint64_t MemSize = Length->getZExtValue();
1618      if (Offset == 0 &&
1619          MemSize == TD->getTypeAllocSize(AI->getAllocatedType()))
1620        RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts);
1621      // Otherwise the intrinsic can only touch a single element and the
1622      // address operand will be updated, so nothing else needs to be done.
1623      continue;
1624    }
1625
1626    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
1627      const Type *LIType = LI->getType();
1628
1629      if (isCompatibleAggregate(LIType, AI->getAllocatedType())) {
1630        // Replace:
1631        //   %res = load { i32, i32 }* %alloc
1632        // with:
1633        //   %load.0 = load i32* %alloc.0
1634        //   %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0
1635        //   %load.1 = load i32* %alloc.1
1636        //   %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1
1637        // (Also works for arrays instead of structs)
1638        Value *Insert = UndefValue::get(LIType);
1639        for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
1640          Value *Load = new LoadInst(NewElts[i], "load", LI);
1641          Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI);
1642        }
1643        LI->replaceAllUsesWith(Insert);
1644        DeadInsts.push_back(LI);
1645      } else if (LIType->isIntegerTy() &&
1646                 TD->getTypeAllocSize(LIType) ==
1647                 TD->getTypeAllocSize(AI->getAllocatedType())) {
1648        // If this is a load of the entire alloca to an integer, rewrite it.
1649        RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
1650      }
1651      continue;
1652    }
1653
1654    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
1655      Value *Val = SI->getOperand(0);
1656      const Type *SIType = Val->getType();
1657      if (isCompatibleAggregate(SIType, AI->getAllocatedType())) {
1658        // Replace:
1659        //   store { i32, i32 } %val, { i32, i32 }* %alloc
1660        // with:
1661        //   %val.0 = extractvalue { i32, i32 } %val, 0
1662        //   store i32 %val.0, i32* %alloc.0
1663        //   %val.1 = extractvalue { i32, i32 } %val, 1
1664        //   store i32 %val.1, i32* %alloc.1
1665        // (Also works for arrays instead of structs)
1666        for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
1667          Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI);
1668          new StoreInst(Extract, NewElts[i], SI);
1669        }
1670        DeadInsts.push_back(SI);
1671      } else if (SIType->isIntegerTy() &&
1672                 TD->getTypeAllocSize(SIType) ==
1673                 TD->getTypeAllocSize(AI->getAllocatedType())) {
1674        // If this is a store of the entire alloca from an integer, rewrite it.
1675        RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
1676      }
1677      continue;
1678    }
1679
1680    if (isa<SelectInst>(User) || isa<PHINode>(User)) {
1681      // If we have a PHI user of the alloca itself (as opposed to a GEP or
1682      // bitcast) we have to rewrite it.  GEP and bitcast uses will be RAUW'd to
1683      // the new pointer.
1684      if (!isa<AllocaInst>(I)) continue;
1685
1686      assert(Offset == 0 && NewElts[0] &&
1687             "Direct alloca use should have a zero offset");
1688
1689      // If we have a use of the alloca, we know the derived uses will be
1690      // utilizing just the first element of the scalarized result.  Insert a
1691      // bitcast of the first alloca before the user as required.
1692      AllocaInst *NewAI = NewElts[0];
1693      BitCastInst *BCI = new BitCastInst(NewAI, AI->getType(), "", NewAI);
1694      NewAI->moveBefore(BCI);
1695      TheUse = BCI;
1696      continue;
1697    }
1698  }
1699}
1700
1701/// RewriteBitCast - Update a bitcast reference to the alloca being replaced
1702/// and recursively continue updating all of its uses.
1703void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
1704                          SmallVector<AllocaInst*, 32> &NewElts) {
1705  RewriteForScalarRepl(BC, AI, Offset, NewElts);
1706  if (BC->getOperand(0) != AI)
1707    return;
1708
1709  // The bitcast references the original alloca.  Replace its uses with
1710  // references to the first new element alloca.
1711  Instruction *Val = NewElts[0];
1712  if (Val->getType() != BC->getDestTy()) {
1713    Val = new BitCastInst(Val, BC->getDestTy(), "", BC);
1714    Val->takeName(BC);
1715  }
1716  BC->replaceAllUsesWith(Val);
1717  DeadInsts.push_back(BC);
1718}
1719
1720/// FindElementAndOffset - Return the index of the element containing Offset
1721/// within the specified type, which must be either a struct or an array.
1722/// Sets T to the type of the element and Offset to the offset within that
1723/// element.  IdxTy is set to the type of the index result to be used in a
1724/// GEP instruction.
1725uint64_t SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset,
1726                                    const Type *&IdxTy) {
1727  uint64_t Idx = 0;
1728  if (const StructType *ST = dyn_cast<StructType>(T)) {
1729    const StructLayout *Layout = TD->getStructLayout(ST);
1730    Idx = Layout->getElementContainingOffset(Offset);
1731    T = ST->getContainedType(Idx);
1732    Offset -= Layout->getElementOffset(Idx);
1733    IdxTy = Type::getInt32Ty(T->getContext());
1734    return Idx;
1735  }
1736  const ArrayType *AT = cast<ArrayType>(T);
1737  T = AT->getElementType();
1738  uint64_t EltSize = TD->getTypeAllocSize(T);
1739  Idx = Offset / EltSize;
1740  Offset -= Idx * EltSize;
1741  IdxTy = Type::getInt64Ty(T->getContext());
1742  return Idx;
1743}
1744
1745/// RewriteGEP - Check if this GEP instruction moves the pointer across
1746/// elements of the alloca that are being split apart, and if so, rewrite
1747/// the GEP to be relative to the new element.
1748void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
1749                      SmallVector<AllocaInst*, 32> &NewElts) {
1750  uint64_t OldOffset = Offset;
1751  SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
1752  Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
1753                                 &Indices[0], Indices.size());
1754
1755  RewriteForScalarRepl(GEPI, AI, Offset, NewElts);
1756
1757  const Type *T = AI->getAllocatedType();
1758  const Type *IdxTy;
1759  uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy);
1760  if (GEPI->getOperand(0) == AI)
1761    OldIdx = ~0ULL; // Force the GEP to be rewritten.
1762
1763  T = AI->getAllocatedType();
1764  uint64_t EltOffset = Offset;
1765  uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy);
1766
1767  // If this GEP does not move the pointer across elements of the alloca
1768  // being split, then it does not needs to be rewritten.
1769  if (Idx == OldIdx)
1770    return;
1771
1772  const Type *i32Ty = Type::getInt32Ty(AI->getContext());
1773  SmallVector<Value*, 8> NewArgs;
1774  NewArgs.push_back(Constant::getNullValue(i32Ty));
1775  while (EltOffset != 0) {
1776    uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy);
1777    NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx));
1778  }
1779  Instruction *Val = NewElts[Idx];
1780  if (NewArgs.size() > 1) {
1781    Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(),
1782                                            NewArgs.end(), "", GEPI);
1783    Val->takeName(GEPI);
1784  }
1785  if (Val->getType() != GEPI->getType())
1786    Val = new BitCastInst(Val, GEPI->getType(), Val->getName(), GEPI);
1787  GEPI->replaceAllUsesWith(Val);
1788  DeadInsts.push_back(GEPI);
1789}
1790
1791/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI.
1792/// Rewrite it to copy or set the elements of the scalarized memory.
1793void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
1794                                        AllocaInst *AI,
1795                                        SmallVector<AllocaInst*, 32> &NewElts) {
1796  // If this is a memcpy/memmove, construct the other pointer as the
1797  // appropriate type.  The "Other" pointer is the pointer that goes to memory
1798  // that doesn't have anything to do with the alloca that we are promoting. For
1799  // memset, this Value* stays null.
1800  Value *OtherPtr = 0;
1801  unsigned MemAlignment = MI->getAlignment();
1802  if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy
1803    if (Inst == MTI->getRawDest())
1804      OtherPtr = MTI->getRawSource();
1805    else {
1806      assert(Inst == MTI->getRawSource());
1807      OtherPtr = MTI->getRawDest();
1808    }
1809  }
1810
1811  // If there is an other pointer, we want to convert it to the same pointer
1812  // type as AI has, so we can GEP through it safely.
1813  if (OtherPtr) {
1814    unsigned AddrSpace =
1815      cast<PointerType>(OtherPtr->getType())->getAddressSpace();
1816
1817    // Remove bitcasts and all-zero GEPs from OtherPtr.  This is an
1818    // optimization, but it's also required to detect the corner case where
1819    // both pointer operands are referencing the same memory, and where
1820    // OtherPtr may be a bitcast or GEP that currently being rewritten.  (This
1821    // function is only called for mem intrinsics that access the whole
1822    // aggregate, so non-zero GEPs are not an issue here.)
1823    OtherPtr = OtherPtr->stripPointerCasts();
1824
1825    // Copying the alloca to itself is a no-op: just delete it.
1826    if (OtherPtr == AI || OtherPtr == NewElts[0]) {
1827      // This code will run twice for a no-op memcpy -- once for each operand.
1828      // Put only one reference to MI on the DeadInsts list.
1829      for (SmallVector<Value*, 32>::const_iterator I = DeadInsts.begin(),
1830             E = DeadInsts.end(); I != E; ++I)
1831        if (*I == MI) return;
1832      DeadInsts.push_back(MI);
1833      return;
1834    }
1835
1836    // If the pointer is not the right type, insert a bitcast to the right
1837    // type.
1838    const Type *NewTy =
1839      PointerType::get(AI->getType()->getElementType(), AddrSpace);
1840
1841    if (OtherPtr->getType() != NewTy)
1842      OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI);
1843  }
1844
1845  // Process each element of the aggregate.
1846  bool SROADest = MI->getRawDest() == Inst;
1847
1848  Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
1849
1850  for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
1851    // If this is a memcpy/memmove, emit a GEP of the other element address.
1852    Value *OtherElt = 0;
1853    unsigned OtherEltAlign = MemAlignment;
1854
1855    if (OtherPtr) {
1856      Value *Idx[2] = { Zero,
1857                      ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) };
1858      OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2,
1859                                              OtherPtr->getName()+"."+Twine(i),
1860                                                   MI);
1861      uint64_t EltOffset;
1862      const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
1863      const Type *OtherTy = OtherPtrTy->getElementType();
1864      if (const StructType *ST = dyn_cast<StructType>(OtherTy)) {
1865        EltOffset = TD->getStructLayout(ST)->getElementOffset(i);
1866      } else {
1867        const Type *EltTy = cast<SequentialType>(OtherTy)->getElementType();
1868        EltOffset = TD->getTypeAllocSize(EltTy)*i;
1869      }
1870
1871      // The alignment of the other pointer is the guaranteed alignment of the
1872      // element, which is affected by both the known alignment of the whole
1873      // mem intrinsic and the alignment of the element.  If the alignment of
1874      // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the
1875      // known alignment is just 4 bytes.
1876      OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset);
1877    }
1878
1879    Value *EltPtr = NewElts[i];
1880    const Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType();
1881
1882    // If we got down to a scalar, insert a load or store as appropriate.
1883    if (EltTy->isSingleValueType()) {
1884      if (isa<MemTransferInst>(MI)) {
1885        if (SROADest) {
1886          // From Other to Alloca.
1887          Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI);
1888          new StoreInst(Elt, EltPtr, MI);
1889        } else {
1890          // From Alloca to Other.
1891          Value *Elt = new LoadInst(EltPtr, "tmp", MI);
1892          new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI);
1893        }
1894        continue;
1895      }
1896      assert(isa<MemSetInst>(MI));
1897
1898      // If the stored element is zero (common case), just store a null
1899      // constant.
1900      Constant *StoreVal;
1901      if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getArgOperand(1))) {
1902        if (CI->isZero()) {
1903          StoreVal = Constant::getNullValue(EltTy);  // 0.0, null, 0, <0,0>
1904        } else {
1905          // If EltTy is a vector type, get the element type.
1906          const Type *ValTy = EltTy->getScalarType();
1907
1908          // Construct an integer with the right value.
1909          unsigned EltSize = TD->getTypeSizeInBits(ValTy);
1910          APInt OneVal(EltSize, CI->getZExtValue());
1911          APInt TotalVal(OneVal);
1912          // Set each byte.
1913          for (unsigned i = 0; 8*i < EltSize; ++i) {
1914            TotalVal = TotalVal.shl(8);
1915            TotalVal |= OneVal;
1916          }
1917
1918          // Convert the integer value to the appropriate type.
1919          StoreVal = ConstantInt::get(CI->getContext(), TotalVal);
1920          if (ValTy->isPointerTy())
1921            StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy);
1922          else if (ValTy->isFloatingPointTy())
1923            StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
1924          assert(StoreVal->getType() == ValTy && "Type mismatch!");
1925
1926          // If the requested value was a vector constant, create it.
1927          if (EltTy != ValTy) {
1928            unsigned NumElts = cast<VectorType>(ValTy)->getNumElements();
1929            SmallVector<Constant*, 16> Elts(NumElts, StoreVal);
1930            StoreVal = ConstantVector::get(Elts);
1931          }
1932        }
1933        new StoreInst(StoreVal, EltPtr, MI);
1934        continue;
1935      }
1936      // Otherwise, if we're storing a byte variable, use a memset call for
1937      // this element.
1938    }
1939
1940    unsigned EltSize = TD->getTypeAllocSize(EltTy);
1941
1942    IRBuilder<> Builder(MI);
1943
1944    // Finally, insert the meminst for this element.
1945    if (isa<MemSetInst>(MI)) {
1946      Builder.CreateMemSet(EltPtr, MI->getArgOperand(1), EltSize,
1947                           MI->isVolatile());
1948    } else {
1949      assert(isa<MemTransferInst>(MI));
1950      Value *Dst = SROADest ? EltPtr : OtherElt;  // Dest ptr
1951      Value *Src = SROADest ? OtherElt : EltPtr;  // Src ptr
1952
1953      if (isa<MemCpyInst>(MI))
1954        Builder.CreateMemCpy(Dst, Src, EltSize, OtherEltAlign,MI->isVolatile());
1955      else
1956        Builder.CreateMemMove(Dst, Src, EltSize,OtherEltAlign,MI->isVolatile());
1957    }
1958  }
1959  DeadInsts.push_back(MI);
1960}
1961
1962/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that
1963/// overwrites the entire allocation.  Extract out the pieces of the stored
1964/// integer and store them individually.
1965void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
1966                                         SmallVector<AllocaInst*, 32> &NewElts){
1967  // Extract each element out of the integer according to its structure offset
1968  // and store the element value to the individual alloca.
1969  Value *SrcVal = SI->getOperand(0);
1970  const Type *AllocaEltTy = AI->getAllocatedType();
1971  uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
1972
1973  IRBuilder<> Builder(SI);
1974
1975  // Handle tail padding by extending the operand
1976  if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
1977    SrcVal = Builder.CreateZExt(SrcVal,
1978                            IntegerType::get(SI->getContext(), AllocaSizeBits));
1979
1980  DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI
1981               << '\n');
1982
1983  // There are two forms here: AI could be an array or struct.  Both cases
1984  // have different ways to compute the element offset.
1985  if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
1986    const StructLayout *Layout = TD->getStructLayout(EltSTy);
1987
1988    for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
1989      // Get the number of bits to shift SrcVal to get the value.
1990      const Type *FieldTy = EltSTy->getElementType(i);
1991      uint64_t Shift = Layout->getElementOffsetInBits(i);
1992
1993      if (TD->isBigEndian())
1994        Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy);
1995
1996      Value *EltVal = SrcVal;
1997      if (Shift) {
1998        Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
1999        EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt");
2000      }
2001
2002      // Truncate down to an integer of the right size.
2003      uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
2004
2005      // Ignore zero sized fields like {}, they obviously contain no data.
2006      if (FieldSizeBits == 0) continue;
2007
2008      if (FieldSizeBits != AllocaSizeBits)
2009        EltVal = Builder.CreateTrunc(EltVal,
2010                             IntegerType::get(SI->getContext(), FieldSizeBits));
2011      Value *DestField = NewElts[i];
2012      if (EltVal->getType() == FieldTy) {
2013        // Storing to an integer field of this size, just do it.
2014      } else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) {
2015        // Bitcast to the right element type (for fp/vector values).
2016        EltVal = Builder.CreateBitCast(EltVal, FieldTy);
2017      } else {
2018        // Otherwise, bitcast the dest pointer (for aggregates).
2019        DestField = Builder.CreateBitCast(DestField,
2020                                     PointerType::getUnqual(EltVal->getType()));
2021      }
2022      new StoreInst(EltVal, DestField, SI);
2023    }
2024
2025  } else {
2026    const ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
2027    const Type *ArrayEltTy = ATy->getElementType();
2028    uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
2029    uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy);
2030
2031    uint64_t Shift;
2032
2033    if (TD->isBigEndian())
2034      Shift = AllocaSizeBits-ElementOffset;
2035    else
2036      Shift = 0;
2037
2038    for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
2039      // Ignore zero sized fields like {}, they obviously contain no data.
2040      if (ElementSizeBits == 0) continue;
2041
2042      Value *EltVal = SrcVal;
2043      if (Shift) {
2044        Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
2045        EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt");
2046      }
2047
2048      // Truncate down to an integer of the right size.
2049      if (ElementSizeBits != AllocaSizeBits)
2050        EltVal = Builder.CreateTrunc(EltVal,
2051                                     IntegerType::get(SI->getContext(),
2052                                                      ElementSizeBits));
2053      Value *DestField = NewElts[i];
2054      if (EltVal->getType() == ArrayEltTy) {
2055        // Storing to an integer field of this size, just do it.
2056      } else if (ArrayEltTy->isFloatingPointTy() ||
2057                 ArrayEltTy->isVectorTy()) {
2058        // Bitcast to the right element type (for fp/vector values).
2059        EltVal = Builder.CreateBitCast(EltVal, ArrayEltTy);
2060      } else {
2061        // Otherwise, bitcast the dest pointer (for aggregates).
2062        DestField = Builder.CreateBitCast(DestField,
2063                                     PointerType::getUnqual(EltVal->getType()));
2064      }
2065      new StoreInst(EltVal, DestField, SI);
2066
2067      if (TD->isBigEndian())
2068        Shift -= ElementOffset;
2069      else
2070        Shift += ElementOffset;
2071    }
2072  }
2073
2074  DeadInsts.push_back(SI);
2075}
2076
2077/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to
2078/// an integer.  Load the individual pieces to form the aggregate value.
2079void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
2080                                        SmallVector<AllocaInst*, 32> &NewElts) {
2081  // Extract each element out of the NewElts according to its structure offset
2082  // and form the result value.
2083  const Type *AllocaEltTy = AI->getAllocatedType();
2084  uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
2085
2086  DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
2087               << '\n');
2088
2089  // There are two forms here: AI could be an array or struct.  Both cases
2090  // have different ways to compute the element offset.
2091  const StructLayout *Layout = 0;
2092  uint64_t ArrayEltBitOffset = 0;
2093  if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
2094    Layout = TD->getStructLayout(EltSTy);
2095  } else {
2096    const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
2097    ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
2098  }
2099
2100  Value *ResultVal =
2101    Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits));
2102
2103  for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
2104    // Load the value from the alloca.  If the NewElt is an aggregate, cast
2105    // the pointer to an integer of the same size before doing the load.
2106    Value *SrcField = NewElts[i];
2107    const Type *FieldTy =
2108      cast<PointerType>(SrcField->getType())->getElementType();
2109    uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
2110
2111    // Ignore zero sized fields like {}, they obviously contain no data.
2112    if (FieldSizeBits == 0) continue;
2113
2114    const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
2115                                                     FieldSizeBits);
2116    if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() &&
2117        !FieldTy->isVectorTy())
2118      SrcField = new BitCastInst(SrcField,
2119                                 PointerType::getUnqual(FieldIntTy),
2120                                 "", LI);
2121    SrcField = new LoadInst(SrcField, "sroa.load.elt", LI);
2122
2123    // If SrcField is a fp or vector of the right size but that isn't an
2124    // integer type, bitcast to an integer so we can shift it.
2125    if (SrcField->getType() != FieldIntTy)
2126      SrcField = new BitCastInst(SrcField, FieldIntTy, "", LI);
2127
2128    // Zero extend the field to be the same size as the final alloca so that
2129    // we can shift and insert it.
2130    if (SrcField->getType() != ResultVal->getType())
2131      SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI);
2132
2133    // Determine the number of bits to shift SrcField.
2134    uint64_t Shift;
2135    if (Layout) // Struct case.
2136      Shift = Layout->getElementOffsetInBits(i);
2137    else  // Array case.
2138      Shift = i*ArrayEltBitOffset;
2139
2140    if (TD->isBigEndian())
2141      Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth();
2142
2143    if (Shift) {
2144      Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift);
2145      SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI);
2146    }
2147
2148    // Don't create an 'or x, 0' on the first iteration.
2149    if (!isa<Constant>(ResultVal) ||
2150        !cast<Constant>(ResultVal)->isNullValue())
2151      ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI);
2152    else
2153      ResultVal = SrcField;
2154  }
2155
2156  // Handle tail padding by truncating the result
2157  if (TD->getTypeSizeInBits(LI->getType()) != AllocaSizeBits)
2158    ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI);
2159
2160  LI->replaceAllUsesWith(ResultVal);
2161  DeadInsts.push_back(LI);
2162}
2163
2164/// HasPadding - Return true if the specified type has any structure or
2165/// alignment padding in between the elements that would be split apart
2166/// by SROA; return false otherwise.
2167static bool HasPadding(const Type *Ty, const TargetData &TD) {
2168  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
2169    Ty = ATy->getElementType();
2170    return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
2171  }
2172
2173  // SROA currently handles only Arrays and Structs.
2174  const StructType *STy = cast<StructType>(Ty);
2175  const StructLayout *SL = TD.getStructLayout(STy);
2176  unsigned PrevFieldBitOffset = 0;
2177  for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
2178    unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
2179
2180    // Check to see if there is any padding between this element and the
2181    // previous one.
2182    if (i) {
2183      unsigned PrevFieldEnd =
2184        PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1));
2185      if (PrevFieldEnd < FieldBitOffset)
2186        return true;
2187    }
2188    PrevFieldBitOffset = FieldBitOffset;
2189  }
2190  // Check for tail padding.
2191  if (unsigned EltCount = STy->getNumElements()) {
2192    unsigned PrevFieldEnd = PrevFieldBitOffset +
2193      TD.getTypeSizeInBits(STy->getElementType(EltCount-1));
2194    if (PrevFieldEnd < SL->getSizeInBits())
2195      return true;
2196  }
2197  return false;
2198}
2199
2200/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
2201/// an aggregate can be broken down into elements.  Return 0 if not, 3 if safe,
2202/// or 1 if safe after canonicalization has been performed.
2203bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
2204  // Loop over the use list of the alloca.  We can only transform it if all of
2205  // the users are safe to transform.
2206  AllocaInfo Info(AI);
2207
2208  isSafeForScalarRepl(AI, 0, Info);
2209  if (Info.isUnsafe) {
2210    DEBUG(dbgs() << "Cannot transform: " << *AI << '\n');
2211    return false;
2212  }
2213
2214  // Okay, we know all the users are promotable.  If the aggregate is a memcpy
2215  // source and destination, we have to be careful.  In particular, the memcpy
2216  // could be moving around elements that live in structure padding of the LLVM
2217  // types, but may actually be used.  In these cases, we refuse to promote the
2218  // struct.
2219  if (Info.isMemCpySrc && Info.isMemCpyDst &&
2220      HasPadding(AI->getAllocatedType(), *TD))
2221    return false;
2222
2223  // If the alloca never has an access to just *part* of it, but is accessed
2224  // via loads and stores, then we should use ConvertToScalarInfo to promote
2225  // the alloca instead of promoting each piece at a time and inserting fission
2226  // and fusion code.
2227  if (!Info.hasSubelementAccess && Info.hasALoadOrStore) {
2228    // If the struct/array just has one element, use basic SRoA.
2229    if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
2230      if (ST->getNumElements() > 1) return false;
2231    } else {
2232      if (cast<ArrayType>(AI->getAllocatedType())->getNumElements() > 1)
2233        return false;
2234    }
2235  }
2236
2237  return true;
2238}
2239
2240
2241
2242/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to
2243/// some part of a constant global variable.  This intentionally only accepts
2244/// constant expressions because we don't can't rewrite arbitrary instructions.
2245static bool PointsToConstantGlobal(Value *V) {
2246  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
2247    return GV->isConstant();
2248  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
2249    if (CE->getOpcode() == Instruction::BitCast ||
2250        CE->getOpcode() == Instruction::GetElementPtr)
2251      return PointsToConstantGlobal(CE->getOperand(0));
2252  return false;
2253}
2254
2255/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
2256/// pointer to an alloca.  Ignore any reads of the pointer, return false if we
2257/// see any stores or other unknown uses.  If we see pointer arithmetic, keep
2258/// track of whether it moves the pointer (with isOffset) but otherwise traverse
2259/// the uses.  If we see a memcpy/memmove that targets an unoffseted pointer to
2260/// the alloca, and if the source pointer is a pointer to a constant global, we
2261/// can optimize this.
2262static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
2263                                           bool isOffset) {
2264  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
2265    User *U = cast<Instruction>(*UI);
2266
2267    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
2268      // Ignore non-volatile loads, they are always ok.
2269      if (LI->isVolatile()) return false;
2270      continue;
2271    }
2272
2273    if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
2274      // If uses of the bitcast are ok, we are ok.
2275      if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset))
2276        return false;
2277      continue;
2278    }
2279    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
2280      // If the GEP has all zero indices, it doesn't offset the pointer.  If it
2281      // doesn't, it does.
2282      if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy,
2283                                         isOffset || !GEP->hasAllZeroIndices()))
2284        return false;
2285      continue;
2286    }
2287
2288    if (CallSite CS = U) {
2289      // If this is a readonly/readnone call site, then we know it is just a
2290      // load and we can ignore it.
2291      if (CS.onlyReadsMemory())
2292        continue;
2293
2294      // If this is the function being called then we treat it like a load and
2295      // ignore it.
2296      if (CS.isCallee(UI))
2297        continue;
2298
2299      // If this is being passed as a byval argument, the caller is making a
2300      // copy, so it is only a read of the alloca.
2301      unsigned ArgNo = CS.getArgumentNo(UI);
2302      if (CS.paramHasAttr(ArgNo+1, Attribute::ByVal))
2303        continue;
2304    }
2305
2306    // If this is isn't our memcpy/memmove, reject it as something we can't
2307    // handle.
2308    MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
2309    if (MI == 0)
2310      return false;
2311
2312    // If the transfer is using the alloca as a source of the transfer, then
2313    // ignore it since it is a load (unless the transfer is volatile).
2314    if (UI.getOperandNo() == 1) {
2315      if (MI->isVolatile()) return false;
2316      continue;
2317    }
2318
2319    // If we already have seen a copy, reject the second one.
2320    if (TheCopy) return false;
2321
2322    // If the pointer has been offset from the start of the alloca, we can't
2323    // safely handle this.
2324    if (isOffset) return false;
2325
2326    // If the memintrinsic isn't using the alloca as the dest, reject it.
2327    if (UI.getOperandNo() != 0) return false;
2328
2329    // If the source of the memcpy/move is not a constant global, reject it.
2330    if (!PointsToConstantGlobal(MI->getSource()))
2331      return false;
2332
2333    // Otherwise, the transform is safe.  Remember the copy instruction.
2334    TheCopy = MI;
2335  }
2336  return true;
2337}
2338
2339/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
2340/// modified by a copy from a constant global.  If we can prove this, we can
2341/// replace any uses of the alloca with uses of the global directly.
2342MemTransferInst *SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI) {
2343  MemTransferInst *TheCopy = 0;
2344  if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false))
2345    return TheCopy;
2346  return 0;
2347}
2348