ScalarReplAggregates.cpp revision 90c579de5a383cee278acc3f7e7b9d0a656e6a35
1//===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This transformation implements the well known scalar replacement of
11// aggregates transformation.  This xform breaks up alloca instructions of
12// aggregate type (structure or array) into individual alloca instructions for
13// each member (if possible).  Then, if possible, it transforms the individual
14// alloca instructions into nice clean scalar SSA form.
15//
16// This combines a simple SRoA algorithm with the Mem2Reg algorithm because
17// often interact, especially for C++ programs.  As such, iterating between
18// SRoA, then Mem2Reg until we run out of things to promote works well.
19//
20//===----------------------------------------------------------------------===//
21
22#define DEBUG_TYPE "scalarrepl"
23#include "llvm/Transforms/Scalar.h"
24#include "llvm/Constants.h"
25#include "llvm/DerivedTypes.h"
26#include "llvm/Function.h"
27#include "llvm/GlobalVariable.h"
28#include "llvm/Instructions.h"
29#include "llvm/IntrinsicInst.h"
30#include "llvm/LLVMContext.h"
31#include "llvm/Pass.h"
32#include "llvm/Analysis/Dominators.h"
33#include "llvm/Target/TargetData.h"
34#include "llvm/Transforms/Utils/PromoteMemToReg.h"
35#include "llvm/Transforms/Utils/Local.h"
36#include "llvm/Support/Debug.h"
37#include "llvm/Support/ErrorHandling.h"
38#include "llvm/Support/GetElementPtrTypeIterator.h"
39#include "llvm/Support/IRBuilder.h"
40#include "llvm/Support/MathExtras.h"
41#include "llvm/Support/raw_ostream.h"
42#include "llvm/ADT/SmallVector.h"
43#include "llvm/ADT/Statistic.h"
44using namespace llvm;
45
46STATISTIC(NumReplaced,  "Number of allocas broken up");
47STATISTIC(NumPromoted,  "Number of allocas promoted");
48STATISTIC(NumConverted, "Number of aggregates converted to scalar");
49STATISTIC(NumGlobals,   "Number of allocas copied from constant global");
50
51namespace {
52  struct SROA : public FunctionPass {
53    static char ID; // Pass identification, replacement for typeid
54    explicit SROA(signed T = -1) : FunctionPass(ID) {
55      if (T == -1)
56        SRThreshold = 128;
57      else
58        SRThreshold = T;
59    }
60
61    bool runOnFunction(Function &F);
62
63    bool performScalarRepl(Function &F);
64    bool performPromotion(Function &F);
65
66    // getAnalysisUsage - This pass does not require any passes, but we know it
67    // will not alter the CFG, so say so.
68    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
69      AU.addRequired<DominatorTree>();
70      AU.addRequired<DominanceFrontier>();
71      AU.setPreservesCFG();
72    }
73
74  private:
75    TargetData *TD;
76
77    /// DeadInsts - Keep track of instructions we have made dead, so that
78    /// we can remove them after we are done working.
79    SmallVector<Value*, 32> DeadInsts;
80
81    /// AllocaInfo - When analyzing uses of an alloca instruction, this captures
82    /// information about the uses.  All these fields are initialized to false
83    /// and set to true when something is learned.
84    struct AllocaInfo {
85      /// isUnsafe - This is set to true if the alloca cannot be SROA'd.
86      bool isUnsafe : 1;
87
88      /// isMemCpySrc - This is true if this aggregate is memcpy'd from.
89      bool isMemCpySrc : 1;
90
91      /// isMemCpyDst - This is true if this aggregate is memcpy'd into.
92      bool isMemCpyDst : 1;
93
94      AllocaInfo()
95        : isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false) {}
96    };
97
98    unsigned SRThreshold;
99
100    void MarkUnsafe(AllocaInfo &I) { I.isUnsafe = true; }
101
102    bool isSafeAllocaToScalarRepl(AllocaInst *AI);
103
104    void isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
105                             AllocaInfo &Info);
106    void isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t &Offset,
107                   AllocaInfo &Info);
108    void isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize,
109                         const Type *MemOpType, bool isStore, AllocaInfo &Info);
110    bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size);
111    uint64_t FindElementAndOffset(const Type *&T, uint64_t &Offset,
112                                  const Type *&IdxTy);
113
114    void DoScalarReplacement(AllocaInst *AI,
115                             std::vector<AllocaInst*> &WorkList);
116    void DeleteDeadInstructions();
117    AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base);
118
119    void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
120                              SmallVector<AllocaInst*, 32> &NewElts);
121    void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
122                        SmallVector<AllocaInst*, 32> &NewElts);
123    void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
124                    SmallVector<AllocaInst*, 32> &NewElts);
125    void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
126                                      AllocaInst *AI,
127                                      SmallVector<AllocaInst*, 32> &NewElts);
128    void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
129                                       SmallVector<AllocaInst*, 32> &NewElts);
130    void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
131                                      SmallVector<AllocaInst*, 32> &NewElts);
132
133    static MemTransferInst *isOnlyCopiedFromConstantGlobal(AllocaInst *AI);
134  };
135}
136
137char SROA::ID = 0;
138INITIALIZE_PASS(SROA, "scalarrepl",
139                "Scalar Replacement of Aggregates", false, false);
140
141// Public interface to the ScalarReplAggregates pass
142FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) {
143  return new SROA(Threshold);
144}
145
146
147//===----------------------------------------------------------------------===//
148// Convert To Scalar Optimization.
149//===----------------------------------------------------------------------===//
150
151namespace {
152/// ConvertToScalarInfo - This class implements the "Convert To Scalar"
153/// optimization, which scans the uses of an alloca and determines if it can
154/// rewrite it in terms of a single new alloca that can be mem2reg'd.
155class ConvertToScalarInfo {
156  /// AllocaSize - The size of the alloca being considered.
157  unsigned AllocaSize;
158  const TargetData &TD;
159
160  /// IsNotTrivial - This is set to true if there is some access to the object
161  /// which means that mem2reg can't promote it.
162  bool IsNotTrivial;
163
164  /// VectorTy - This tracks the type that we should promote the vector to if
165  /// it is possible to turn it into a vector.  This starts out null, and if it
166  /// isn't possible to turn into a vector type, it gets set to VoidTy.
167  const Type *VectorTy;
168
169  /// HadAVector - True if there is at least one vector access to the alloca.
170  /// We don't want to turn random arrays into vectors and use vector element
171  /// insert/extract, but if there are element accesses to something that is
172  /// also declared as a vector, we do want to promote to a vector.
173  bool HadAVector;
174
175public:
176  explicit ConvertToScalarInfo(unsigned Size, const TargetData &td)
177    : AllocaSize(Size), TD(td) {
178    IsNotTrivial = false;
179    VectorTy = 0;
180    HadAVector = false;
181  }
182
183  AllocaInst *TryConvert(AllocaInst *AI);
184
185private:
186  bool CanConvertToScalar(Value *V, uint64_t Offset);
187  void MergeInType(const Type *In, uint64_t Offset);
188  void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
189
190  Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType,
191                                    uint64_t Offset, IRBuilder<> &Builder);
192  Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
193                                   uint64_t Offset, IRBuilder<> &Builder);
194};
195} // end anonymous namespace.
196
197/// TryConvert - Analyze the specified alloca, and if it is safe to do so,
198/// rewrite it to be a new alloca which is mem2reg'able.  This returns the new
199/// alloca if possible or null if not.
200AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
201  // If we can't convert this scalar, or if mem2reg can trivially do it, bail
202  // out.
203  if (!CanConvertToScalar(AI, 0) || !IsNotTrivial)
204    return 0;
205
206  // If we were able to find a vector type that can handle this with
207  // insert/extract elements, and if there was at least one use that had
208  // a vector type, promote this to a vector.  We don't want to promote
209  // random stuff that doesn't use vectors (e.g. <9 x double>) because then
210  // we just get a lot of insert/extracts.  If at least one vector is
211  // involved, then we probably really do have a union of vector/array.
212  const Type *NewTy;
213  if (VectorTy && VectorTy->isVectorTy() && HadAVector) {
214    DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n  TYPE = "
215          << *VectorTy << '\n');
216    NewTy = VectorTy;  // Use the vector type.
217  } else {
218    DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
219    // Create and insert the integer alloca.
220    NewTy = IntegerType::get(AI->getContext(), AllocaSize*8);
221  }
222  AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
223  ConvertUsesToScalar(AI, NewAI, 0);
224  return NewAI;
225}
226
227/// MergeInType - Add the 'In' type to the accumulated vector type (VectorTy)
228/// so far at the offset specified by Offset (which is specified in bytes).
229///
230/// There are two cases we handle here:
231///   1) A union of vector types of the same size and potentially its elements.
232///      Here we turn element accesses into insert/extract element operations.
233///      This promotes a <4 x float> with a store of float to the third element
234///      into a <4 x float> that uses insert element.
235///   2) A fully general blob of memory, which we turn into some (potentially
236///      large) integer type with extract and insert operations where the loads
237///      and stores would mutate the memory.  We mark this by setting VectorTy
238///      to VoidTy.
239void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
240  // If we already decided to turn this into a blob of integer memory, there is
241  // nothing to be done.
242  if (VectorTy && VectorTy->isVoidTy())
243    return;
244
245  // If this could be contributing to a vector, analyze it.
246
247  // If the In type is a vector that is the same size as the alloca, see if it
248  // matches the existing VecTy.
249  if (const VectorType *VInTy = dyn_cast<VectorType>(In)) {
250    // Remember if we saw a vector type.
251    HadAVector = true;
252
253    if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
254      // If we're storing/loading a vector of the right size, allow it as a
255      // vector.  If this the first vector we see, remember the type so that
256      // we know the element size.  If this is a subsequent access, ignore it
257      // even if it is a differing type but the same size.  Worst case we can
258      // bitcast the resultant vectors.
259      if (VectorTy == 0)
260        VectorTy = VInTy;
261      return;
262    }
263  } else if (In->isFloatTy() || In->isDoubleTy() ||
264             (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
265              isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
266    // If we're accessing something that could be an element of a vector, see
267    // if the implied vector agrees with what we already have and if Offset is
268    // compatible with it.
269    unsigned EltSize = In->getPrimitiveSizeInBits()/8;
270    if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
271        (VectorTy == 0 ||
272         cast<VectorType>(VectorTy)->getElementType()
273               ->getPrimitiveSizeInBits()/8 == EltSize)) {
274      if (VectorTy == 0)
275        VectorTy = VectorType::get(In, AllocaSize/EltSize);
276      return;
277    }
278  }
279
280  // Otherwise, we have a case that we can't handle with an optimized vector
281  // form.  We can still turn this into a large integer.
282  VectorTy = Type::getVoidTy(In->getContext());
283}
284
285/// CanConvertToScalar - V is a pointer.  If we can convert the pointee and all
286/// its accesses to a single vector type, return true and set VecTy to
287/// the new type.  If we could convert the alloca into a single promotable
288/// integer, return true but set VecTy to VoidTy.  Further, if the use is not a
289/// completely trivial use that mem2reg could promote, set IsNotTrivial.  Offset
290/// is the current offset from the base of the alloca being analyzed.
291///
292/// If we see at least one access to the value that is as a vector type, set the
293/// SawVec flag.
294bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
295  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
296    Instruction *User = cast<Instruction>(*UI);
297
298    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
299      // Don't break volatile loads.
300      if (LI->isVolatile())
301        return false;
302      MergeInType(LI->getType(), Offset);
303      continue;
304    }
305
306    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
307      // Storing the pointer, not into the value?
308      if (SI->getOperand(0) == V || SI->isVolatile()) return false;
309      MergeInType(SI->getOperand(0)->getType(), Offset);
310      continue;
311    }
312
313    if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
314      IsNotTrivial = true;  // Can't be mem2reg'd.
315      if (!CanConvertToScalar(BCI, Offset))
316        return false;
317      continue;
318    }
319
320    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
321      // If this is a GEP with a variable indices, we can't handle it.
322      if (!GEP->hasAllConstantIndices())
323        return false;
324
325      // Compute the offset that this GEP adds to the pointer.
326      SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
327      uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
328                                               &Indices[0], Indices.size());
329      // See if all uses can be converted.
330      if (!CanConvertToScalar(GEP, Offset+GEPOffset))
331        return false;
332      IsNotTrivial = true;  // Can't be mem2reg'd.
333      continue;
334    }
335
336    // If this is a constant sized memset of a constant value (e.g. 0) we can
337    // handle it.
338    if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
339      // Store of constant value and constant size.
340      if (!isa<ConstantInt>(MSI->getValue()) ||
341          !isa<ConstantInt>(MSI->getLength()))
342        return false;
343      IsNotTrivial = true;  // Can't be mem2reg'd.
344      continue;
345    }
346
347    // If this is a memcpy or memmove into or out of the whole allocation, we
348    // can handle it like a load or store of the scalar type.
349    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
350      ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength());
351      if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0)
352        return false;
353
354      IsNotTrivial = true;  // Can't be mem2reg'd.
355      continue;
356    }
357
358    // Otherwise, we cannot handle this!
359    return false;
360  }
361
362  return true;
363}
364
365/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca
366/// directly.  This happens when we are converting an "integer union" to a
367/// single integer scalar, or when we are converting a "vector union" to a
368/// vector with insert/extractelement instructions.
369///
370/// Offset is an offset from the original alloca, in bits that need to be
371/// shifted to the right.  By the end of this, there should be no uses of Ptr.
372void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
373                                              uint64_t Offset) {
374  while (!Ptr->use_empty()) {
375    Instruction *User = cast<Instruction>(Ptr->use_back());
376
377    if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) {
378      ConvertUsesToScalar(CI, NewAI, Offset);
379      CI->eraseFromParent();
380      continue;
381    }
382
383    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
384      // Compute the offset that this GEP adds to the pointer.
385      SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
386      uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
387                                               &Indices[0], Indices.size());
388      ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8);
389      GEP->eraseFromParent();
390      continue;
391    }
392
393    IRBuilder<> Builder(User->getParent(), User);
394
395    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
396      // The load is a bit extract from NewAI shifted right by Offset bits.
397      Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp");
398      Value *NewLoadVal
399        = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder);
400      LI->replaceAllUsesWith(NewLoadVal);
401      LI->eraseFromParent();
402      continue;
403    }
404
405    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
406      assert(SI->getOperand(0) != Ptr && "Consistency error!");
407      Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
408      Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
409                                             Builder);
410      Builder.CreateStore(New, NewAI);
411      SI->eraseFromParent();
412
413      // If the load we just inserted is now dead, then the inserted store
414      // overwrote the entire thing.
415      if (Old->use_empty())
416        Old->eraseFromParent();
417      continue;
418    }
419
420    // If this is a constant sized memset of a constant value (e.g. 0) we can
421    // transform it into a store of the expanded constant value.
422    if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
423      assert(MSI->getRawDest() == Ptr && "Consistency error!");
424      unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
425      if (NumBytes != 0) {
426        unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
427
428        // Compute the value replicated the right number of times.
429        APInt APVal(NumBytes*8, Val);
430
431        // Splat the value if non-zero.
432        if (Val)
433          for (unsigned i = 1; i != NumBytes; ++i)
434            APVal |= APVal << 8;
435
436        Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
437        Value *New = ConvertScalar_InsertValue(
438                                    ConstantInt::get(User->getContext(), APVal),
439                                               Old, Offset, Builder);
440        Builder.CreateStore(New, NewAI);
441
442        // If the load we just inserted is now dead, then the memset overwrote
443        // the entire thing.
444        if (Old->use_empty())
445          Old->eraseFromParent();
446      }
447      MSI->eraseFromParent();
448      continue;
449    }
450
451    // If this is a memcpy or memmove into or out of the whole allocation, we
452    // can handle it like a load or store of the scalar type.
453    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
454      assert(Offset == 0 && "must be store to start of alloca");
455
456      // If the source and destination are both to the same alloca, then this is
457      // a noop copy-to-self, just delete it.  Otherwise, emit a load and store
458      // as appropriate.
459      AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject(0));
460
461      if (MTI->getSource()->getUnderlyingObject(0) != OrigAI) {
462        // Dest must be OrigAI, change this to be a load from the original
463        // pointer (bitcasted), then a store to our new alloca.
464        assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
465        Value *SrcPtr = MTI->getSource();
466        SrcPtr = Builder.CreateBitCast(SrcPtr, NewAI->getType());
467
468        LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
469        SrcVal->setAlignment(MTI->getAlignment());
470        Builder.CreateStore(SrcVal, NewAI);
471      } else if (MTI->getDest()->getUnderlyingObject(0) != OrigAI) {
472        // Src must be OrigAI, change this to be a load from NewAI then a store
473        // through the original dest pointer (bitcasted).
474        assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
475        LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
476
477        Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), NewAI->getType());
478        StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
479        NewStore->setAlignment(MTI->getAlignment());
480      } else {
481        // Noop transfer. Src == Dst
482      }
483
484      MTI->eraseFromParent();
485      continue;
486    }
487
488    llvm_unreachable("Unsupported operation!");
489  }
490}
491
492/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
493/// or vector value FromVal, extracting the bits from the offset specified by
494/// Offset.  This returns the value, which is of type ToType.
495///
496/// This happens when we are converting an "integer union" to a single
497/// integer scalar, or when we are converting a "vector union" to a vector with
498/// insert/extractelement instructions.
499///
500/// Offset is an offset from the original alloca, in bits that need to be
501/// shifted to the right.
502Value *ConvertToScalarInfo::
503ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
504                           uint64_t Offset, IRBuilder<> &Builder) {
505  // If the load is of the whole new alloca, no conversion is needed.
506  if (FromVal->getType() == ToType && Offset == 0)
507    return FromVal;
508
509  // If the result alloca is a vector type, this is either an element
510  // access or a bitcast to another vector type of the same size.
511  if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
512    if (ToType->isVectorTy())
513      return Builder.CreateBitCast(FromVal, ToType, "tmp");
514
515    // Otherwise it must be an element access.
516    unsigned Elt = 0;
517    if (Offset) {
518      unsigned EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
519      Elt = Offset/EltSize;
520      assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
521    }
522    // Return the element extracted out of it.
523    Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get(
524                    Type::getInt32Ty(FromVal->getContext()), Elt), "tmp");
525    if (V->getType() != ToType)
526      V = Builder.CreateBitCast(V, ToType, "tmp");
527    return V;
528  }
529
530  // If ToType is a first class aggregate, extract out each of the pieces and
531  // use insertvalue's to form the FCA.
532  if (const StructType *ST = dyn_cast<StructType>(ToType)) {
533    const StructLayout &Layout = *TD.getStructLayout(ST);
534    Value *Res = UndefValue::get(ST);
535    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
536      Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
537                                        Offset+Layout.getElementOffsetInBits(i),
538                                              Builder);
539      Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
540    }
541    return Res;
542  }
543
544  if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
545    uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
546    Value *Res = UndefValue::get(AT);
547    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
548      Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
549                                              Offset+i*EltSize, Builder);
550      Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
551    }
552    return Res;
553  }
554
555  // Otherwise, this must be a union that was converted to an integer value.
556  const IntegerType *NTy = cast<IntegerType>(FromVal->getType());
557
558  // If this is a big-endian system and the load is narrower than the
559  // full alloca type, we need to do a shift to get the right bits.
560  int ShAmt = 0;
561  if (TD.isBigEndian()) {
562    // On big-endian machines, the lowest bit is stored at the bit offset
563    // from the pointer given by getTypeStoreSizeInBits.  This matters for
564    // integers with a bitwidth that is not a multiple of 8.
565    ShAmt = TD.getTypeStoreSizeInBits(NTy) -
566            TD.getTypeStoreSizeInBits(ToType) - Offset;
567  } else {
568    ShAmt = Offset;
569  }
570
571  // Note: we support negative bitwidths (with shl) which are not defined.
572  // We do this to support (f.e.) loads off the end of a structure where
573  // only some bits are used.
574  if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
575    FromVal = Builder.CreateLShr(FromVal,
576                                 ConstantInt::get(FromVal->getType(),
577                                                           ShAmt), "tmp");
578  else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
579    FromVal = Builder.CreateShl(FromVal,
580                                ConstantInt::get(FromVal->getType(),
581                                                          -ShAmt), "tmp");
582
583  // Finally, unconditionally truncate the integer to the right width.
584  unsigned LIBitWidth = TD.getTypeSizeInBits(ToType);
585  if (LIBitWidth < NTy->getBitWidth())
586    FromVal =
587      Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(),
588                                                    LIBitWidth), "tmp");
589  else if (LIBitWidth > NTy->getBitWidth())
590    FromVal =
591       Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(),
592                                                    LIBitWidth), "tmp");
593
594  // If the result is an integer, this is a trunc or bitcast.
595  if (ToType->isIntegerTy()) {
596    // Should be done.
597  } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) {
598    // Just do a bitcast, we know the sizes match up.
599    FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp");
600  } else {
601    // Otherwise must be a pointer.
602    FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp");
603  }
604  assert(FromVal->getType() == ToType && "Didn't convert right?");
605  return FromVal;
606}
607
608/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer
609/// or vector value "Old" at the offset specified by Offset.
610///
611/// This happens when we are converting an "integer union" to a
612/// single integer scalar, or when we are converting a "vector union" to a
613/// vector with insert/extractelement instructions.
614///
615/// Offset is an offset from the original alloca, in bits that need to be
616/// shifted to the right.
617Value *ConvertToScalarInfo::
618ConvertScalar_InsertValue(Value *SV, Value *Old,
619                          uint64_t Offset, IRBuilder<> &Builder) {
620  // Convert the stored type to the actual type, shift it left to insert
621  // then 'or' into place.
622  const Type *AllocaType = Old->getType();
623  LLVMContext &Context = Old->getContext();
624
625  if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
626    uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy);
627    uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType());
628
629    // Changing the whole vector with memset or with an access of a different
630    // vector type?
631    if (ValSize == VecSize)
632      return Builder.CreateBitCast(SV, AllocaType, "tmp");
633
634    uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
635
636    // Must be an element insertion.
637    unsigned Elt = Offset/EltSize;
638
639    if (SV->getType() != VTy->getElementType())
640      SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp");
641
642    SV = Builder.CreateInsertElement(Old, SV,
643                     ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt),
644                                     "tmp");
645    return SV;
646  }
647
648  // If SV is a first-class aggregate value, insert each value recursively.
649  if (const StructType *ST = dyn_cast<StructType>(SV->getType())) {
650    const StructLayout &Layout = *TD.getStructLayout(ST);
651    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
652      Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
653      Old = ConvertScalar_InsertValue(Elt, Old,
654                                      Offset+Layout.getElementOffsetInBits(i),
655                                      Builder);
656    }
657    return Old;
658  }
659
660  if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
661    uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
662    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
663      Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
664      Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder);
665    }
666    return Old;
667  }
668
669  // If SV is a float, convert it to the appropriate integer type.
670  // If it is a pointer, do the same.
671  unsigned SrcWidth = TD.getTypeSizeInBits(SV->getType());
672  unsigned DestWidth = TD.getTypeSizeInBits(AllocaType);
673  unsigned SrcStoreWidth = TD.getTypeStoreSizeInBits(SV->getType());
674  unsigned DestStoreWidth = TD.getTypeStoreSizeInBits(AllocaType);
675  if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
676    SV = Builder.CreateBitCast(SV,
677                            IntegerType::get(SV->getContext(),SrcWidth), "tmp");
678  else if (SV->getType()->isPointerTy())
679    SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()), "tmp");
680
681  // Zero extend or truncate the value if needed.
682  if (SV->getType() != AllocaType) {
683    if (SV->getType()->getPrimitiveSizeInBits() <
684             AllocaType->getPrimitiveSizeInBits())
685      SV = Builder.CreateZExt(SV, AllocaType, "tmp");
686    else {
687      // Truncation may be needed if storing more than the alloca can hold
688      // (undefined behavior).
689      SV = Builder.CreateTrunc(SV, AllocaType, "tmp");
690      SrcWidth = DestWidth;
691      SrcStoreWidth = DestStoreWidth;
692    }
693  }
694
695  // If this is a big-endian system and the store is narrower than the
696  // full alloca type, we need to do a shift to get the right bits.
697  int ShAmt = 0;
698  if (TD.isBigEndian()) {
699    // On big-endian machines, the lowest bit is stored at the bit offset
700    // from the pointer given by getTypeStoreSizeInBits.  This matters for
701    // integers with a bitwidth that is not a multiple of 8.
702    ShAmt = DestStoreWidth - SrcStoreWidth - Offset;
703  } else {
704    ShAmt = Offset;
705  }
706
707  // Note: we support negative bitwidths (with shr) which are not defined.
708  // We do this to support (f.e.) stores off the end of a structure where
709  // only some bits in the structure are set.
710  APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
711  if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
712    SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(),
713                           ShAmt), "tmp");
714    Mask <<= ShAmt;
715  } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
716    SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(),
717                            -ShAmt), "tmp");
718    Mask = Mask.lshr(-ShAmt);
719  }
720
721  // Mask out the bits we are about to insert from the old value, and or
722  // in the new bits.
723  if (SrcWidth != DestWidth) {
724    assert(DestWidth > SrcWidth);
725    Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask");
726    SV = Builder.CreateOr(Old, SV, "ins");
727  }
728  return SV;
729}
730
731
732//===----------------------------------------------------------------------===//
733// SRoA Driver
734//===----------------------------------------------------------------------===//
735
736
737bool SROA::runOnFunction(Function &F) {
738  TD = getAnalysisIfAvailable<TargetData>();
739
740  bool Changed = performPromotion(F);
741
742  // FIXME: ScalarRepl currently depends on TargetData more than it
743  // theoretically needs to. It should be refactored in order to support
744  // target-independent IR. Until this is done, just skip the actual
745  // scalar-replacement portion of this pass.
746  if (!TD) return Changed;
747
748  while (1) {
749    bool LocalChange = performScalarRepl(F);
750    if (!LocalChange) break;   // No need to repromote if no scalarrepl
751    Changed = true;
752    LocalChange = performPromotion(F);
753    if (!LocalChange) break;   // No need to re-scalarrepl if no promotion
754  }
755
756  return Changed;
757}
758
759
760bool SROA::performPromotion(Function &F) {
761  std::vector<AllocaInst*> Allocas;
762  DominatorTree         &DT = getAnalysis<DominatorTree>();
763  DominanceFrontier &DF = getAnalysis<DominanceFrontier>();
764
765  BasicBlock &BB = F.getEntryBlock();  // Get the entry node for the function
766
767  bool Changed = false;
768
769  while (1) {
770    Allocas.clear();
771
772    // Find allocas that are safe to promote, by looking at all instructions in
773    // the entry node
774    for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
775      if (AllocaInst *AI = dyn_cast<AllocaInst>(I))       // Is it an alloca?
776        if (isAllocaPromotable(AI))
777          Allocas.push_back(AI);
778
779    if (Allocas.empty()) break;
780
781    PromoteMemToReg(Allocas, DT, DF);
782    NumPromoted += Allocas.size();
783    Changed = true;
784  }
785
786  return Changed;
787}
788
789
790/// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for
791/// SROA.  It must be a struct or array type with a small number of elements.
792static bool ShouldAttemptScalarRepl(AllocaInst *AI) {
793  const Type *T = AI->getAllocatedType();
794  // Do not promote any struct into more than 32 separate vars.
795  if (const StructType *ST = dyn_cast<StructType>(T))
796    return ST->getNumElements() <= 32;
797  // Arrays are much less likely to be safe for SROA; only consider
798  // them if they are very small.
799  if (const ArrayType *AT = dyn_cast<ArrayType>(T))
800    return AT->getNumElements() <= 8;
801  return false;
802}
803
804
805// performScalarRepl - This algorithm is a simple worklist driven algorithm,
806// which runs on all of the malloc/alloca instructions in the function, removing
807// them if they are only used by getelementptr instructions.
808//
809bool SROA::performScalarRepl(Function &F) {
810  std::vector<AllocaInst*> WorkList;
811
812  // Scan the entry basic block, adding allocas to the worklist.
813  BasicBlock &BB = F.getEntryBlock();
814  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
815    if (AllocaInst *A = dyn_cast<AllocaInst>(I))
816      WorkList.push_back(A);
817
818  // Process the worklist
819  bool Changed = false;
820  while (!WorkList.empty()) {
821    AllocaInst *AI = WorkList.back();
822    WorkList.pop_back();
823
824    // Handle dead allocas trivially.  These can be formed by SROA'ing arrays
825    // with unused elements.
826    if (AI->use_empty()) {
827      AI->eraseFromParent();
828      Changed = true;
829      continue;
830    }
831
832    // If this alloca is impossible for us to promote, reject it early.
833    if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized())
834      continue;
835
836    // Check to see if this allocation is only modified by a memcpy/memmove from
837    // a constant global.  If this is the case, we can change all users to use
838    // the constant global instead.  This is commonly produced by the CFE by
839    // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
840    // is only subsequently read.
841    if (MemTransferInst *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) {
842      DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n');
843      DEBUG(dbgs() << "  memcpy = " << *TheCopy << '\n');
844      Constant *TheSrc = cast<Constant>(TheCopy->getSource());
845      AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
846      TheCopy->eraseFromParent();  // Don't mutate the global.
847      AI->eraseFromParent();
848      ++NumGlobals;
849      Changed = true;
850      continue;
851    }
852
853    // Check to see if we can perform the core SROA transformation.  We cannot
854    // transform the allocation instruction if it is an array allocation
855    // (allocations OF arrays are ok though), and an allocation of a scalar
856    // value cannot be decomposed at all.
857    uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType());
858
859    // Do not promote [0 x %struct].
860    if (AllocaSize == 0) continue;
861
862    // Do not promote any struct whose size is too big.
863    if (AllocaSize > SRThreshold) continue;
864
865    // If the alloca looks like a good candidate for scalar replacement, and if
866    // all its users can be transformed, then split up the aggregate into its
867    // separate elements.
868    if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) {
869      DoScalarReplacement(AI, WorkList);
870      Changed = true;
871      continue;
872    }
873
874    // If we can turn this aggregate value (potentially with casts) into a
875    // simple scalar value that can be mem2reg'd into a register value.
876    // IsNotTrivial tracks whether this is something that mem2reg could have
877    // promoted itself.  If so, we don't want to transform it needlessly.  Note
878    // that we can't just check based on the type: the alloca may be of an i32
879    // but that has pointer arithmetic to set byte 3 of it or something.
880    if (AllocaInst *NewAI =
881          ConvertToScalarInfo((unsigned)AllocaSize, *TD).TryConvert(AI)) {
882      NewAI->takeName(AI);
883      AI->eraseFromParent();
884      ++NumConverted;
885      Changed = true;
886      continue;
887    }
888
889    // Otherwise, couldn't process this alloca.
890  }
891
892  return Changed;
893}
894
895/// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl
896/// predicate, do SROA now.
897void SROA::DoScalarReplacement(AllocaInst *AI,
898                               std::vector<AllocaInst*> &WorkList) {
899  DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n');
900  SmallVector<AllocaInst*, 32> ElementAllocas;
901  if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
902    ElementAllocas.reserve(ST->getNumContainedTypes());
903    for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {
904      AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0,
905                                      AI->getAlignment(),
906                                      AI->getName() + "." + Twine(i), AI);
907      ElementAllocas.push_back(NA);
908      WorkList.push_back(NA);  // Add to worklist for recursive processing
909    }
910  } else {
911    const ArrayType *AT = cast<ArrayType>(AI->getAllocatedType());
912    ElementAllocas.reserve(AT->getNumElements());
913    const Type *ElTy = AT->getElementType();
914    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
915      AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(),
916                                      AI->getName() + "." + Twine(i), AI);
917      ElementAllocas.push_back(NA);
918      WorkList.push_back(NA);  // Add to worklist for recursive processing
919    }
920  }
921
922  // Now that we have created the new alloca instructions, rewrite all the
923  // uses of the old alloca.
924  RewriteForScalarRepl(AI, AI, 0, ElementAllocas);
925
926  // Now erase any instructions that were made dead while rewriting the alloca.
927  DeleteDeadInstructions();
928  AI->eraseFromParent();
929
930  ++NumReplaced;
931}
932
933/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list,
934/// recursively including all their operands that become trivially dead.
935void SROA::DeleteDeadInstructions() {
936  while (!DeadInsts.empty()) {
937    Instruction *I = cast<Instruction>(DeadInsts.pop_back_val());
938
939    for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
940      if (Instruction *U = dyn_cast<Instruction>(*OI)) {
941        // Zero out the operand and see if it becomes trivially dead.
942        // (But, don't add allocas to the dead instruction list -- they are
943        // already on the worklist and will be deleted separately.)
944        *OI = 0;
945        if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U))
946          DeadInsts.push_back(U);
947      }
948
949    I->eraseFromParent();
950  }
951}
952
953/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to
954/// performing scalar replacement of alloca AI.  The results are flagged in
955/// the Info parameter.  Offset indicates the position within AI that is
956/// referenced by this instruction.
957void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
958                               AllocaInfo &Info) {
959  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
960    Instruction *User = cast<Instruction>(*UI);
961
962    if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
963      isSafeForScalarRepl(BC, AI, Offset, Info);
964    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
965      uint64_t GEPOffset = Offset;
966      isSafeGEP(GEPI, AI, GEPOffset, Info);
967      if (!Info.isUnsafe)
968        isSafeForScalarRepl(GEPI, AI, GEPOffset, Info);
969    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
970      ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
971      if (Length)
972        isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0,
973                        UI.getOperandNo() == 0, Info);
974      else
975        MarkUnsafe(Info);
976    } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
977      if (!LI->isVolatile()) {
978        const Type *LIType = LI->getType();
979        isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(LIType),
980                        LIType, false, Info);
981      } else
982        MarkUnsafe(Info);
983    } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
984      // Store is ok if storing INTO the pointer, not storing the pointer
985      if (!SI->isVolatile() && SI->getOperand(0) != I) {
986        const Type *SIType = SI->getOperand(0)->getType();
987        isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(SIType),
988                        SIType, true, Info);
989      } else
990        MarkUnsafe(Info);
991    } else {
992      DEBUG(errs() << "  Transformation preventing inst: " << *User << '\n');
993      MarkUnsafe(Info);
994    }
995    if (Info.isUnsafe) return;
996  }
997}
998
999/// isSafeGEP - Check if a GEP instruction can be handled for scalar
1000/// replacement.  It is safe when all the indices are constant, in-bounds
1001/// references, and when the resulting offset corresponds to an element within
1002/// the alloca type.  The results are flagged in the Info parameter.  Upon
1003/// return, Offset is adjusted as specified by the GEP indices.
1004void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI,
1005                     uint64_t &Offset, AllocaInfo &Info) {
1006  gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI);
1007  if (GEPIt == E)
1008    return;
1009
1010  // Walk through the GEP type indices, checking the types that this indexes
1011  // into.
1012  for (; GEPIt != E; ++GEPIt) {
1013    // Ignore struct elements, no extra checking needed for these.
1014    if ((*GEPIt)->isStructTy())
1015      continue;
1016
1017    ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
1018    if (!IdxVal)
1019      return MarkUnsafe(Info);
1020  }
1021
1022  // Compute the offset due to this GEP and check if the alloca has a
1023  // component element at that offset.
1024  SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
1025  Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
1026                                 &Indices[0], Indices.size());
1027  if (!TypeHasComponent(AI->getAllocatedType(), Offset, 0))
1028    MarkUnsafe(Info);
1029}
1030
1031/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI
1032/// alloca or has an offset and size that corresponds to a component element
1033/// within it.  The offset checked here may have been formed from a GEP with a
1034/// pointer bitcasted to a different type.
1035void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize,
1036                           const Type *MemOpType, bool isStore,
1037                           AllocaInfo &Info) {
1038  // Check if this is a load/store of the entire alloca.
1039  if (Offset == 0 && MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) {
1040    bool UsesAggregateType = (MemOpType == AI->getAllocatedType());
1041    // This is safe for MemIntrinsics (where MemOpType is 0), integer types
1042    // (which are essentially the same as the MemIntrinsics, especially with
1043    // regard to copying padding between elements), or references using the
1044    // aggregate type of the alloca.
1045    if (!MemOpType || MemOpType->isIntegerTy() || UsesAggregateType) {
1046      if (!UsesAggregateType) {
1047        if (isStore)
1048          Info.isMemCpyDst = true;
1049        else
1050          Info.isMemCpySrc = true;
1051      }
1052      return;
1053    }
1054  }
1055  // Check if the offset/size correspond to a component within the alloca type.
1056  const Type *T = AI->getAllocatedType();
1057  if (TypeHasComponent(T, Offset, MemSize))
1058    return;
1059
1060  return MarkUnsafe(Info);
1061}
1062
1063/// TypeHasComponent - Return true if T has a component type with the
1064/// specified offset and size.  If Size is zero, do not check the size.
1065bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) {
1066  const Type *EltTy;
1067  uint64_t EltSize;
1068  if (const StructType *ST = dyn_cast<StructType>(T)) {
1069    const StructLayout *Layout = TD->getStructLayout(ST);
1070    unsigned EltIdx = Layout->getElementContainingOffset(Offset);
1071    EltTy = ST->getContainedType(EltIdx);
1072    EltSize = TD->getTypeAllocSize(EltTy);
1073    Offset -= Layout->getElementOffset(EltIdx);
1074  } else if (const ArrayType *AT = dyn_cast<ArrayType>(T)) {
1075    EltTy = AT->getElementType();
1076    EltSize = TD->getTypeAllocSize(EltTy);
1077    if (Offset >= AT->getNumElements() * EltSize)
1078      return false;
1079    Offset %= EltSize;
1080  } else {
1081    return false;
1082  }
1083  if (Offset == 0 && (Size == 0 || EltSize == Size))
1084    return true;
1085  // Check if the component spans multiple elements.
1086  if (Offset + Size > EltSize)
1087    return false;
1088  return TypeHasComponent(EltTy, Offset, Size);
1089}
1090
1091/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite
1092/// the instruction I, which references it, to use the separate elements.
1093/// Offset indicates the position within AI that is referenced by this
1094/// instruction.
1095void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
1096                                SmallVector<AllocaInst*, 32> &NewElts) {
1097  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
1098    Instruction *User = cast<Instruction>(*UI);
1099
1100    if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
1101      RewriteBitCast(BC, AI, Offset, NewElts);
1102    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
1103      RewriteGEP(GEPI, AI, Offset, NewElts);
1104    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
1105      ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
1106      uint64_t MemSize = Length->getZExtValue();
1107      if (Offset == 0 &&
1108          MemSize == TD->getTypeAllocSize(AI->getAllocatedType()))
1109        RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts);
1110      // Otherwise the intrinsic can only touch a single element and the
1111      // address operand will be updated, so nothing else needs to be done.
1112    } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
1113      const Type *LIType = LI->getType();
1114      if (LIType == AI->getAllocatedType()) {
1115        // Replace:
1116        //   %res = load { i32, i32 }* %alloc
1117        // with:
1118        //   %load.0 = load i32* %alloc.0
1119        //   %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0
1120        //   %load.1 = load i32* %alloc.1
1121        //   %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1
1122        // (Also works for arrays instead of structs)
1123        Value *Insert = UndefValue::get(LIType);
1124        for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
1125          Value *Load = new LoadInst(NewElts[i], "load", LI);
1126          Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI);
1127        }
1128        LI->replaceAllUsesWith(Insert);
1129        DeadInsts.push_back(LI);
1130      } else if (LIType->isIntegerTy() &&
1131                 TD->getTypeAllocSize(LIType) ==
1132                 TD->getTypeAllocSize(AI->getAllocatedType())) {
1133        // If this is a load of the entire alloca to an integer, rewrite it.
1134        RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
1135      }
1136    } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
1137      Value *Val = SI->getOperand(0);
1138      const Type *SIType = Val->getType();
1139      if (SIType == AI->getAllocatedType()) {
1140        // Replace:
1141        //   store { i32, i32 } %val, { i32, i32 }* %alloc
1142        // with:
1143        //   %val.0 = extractvalue { i32, i32 } %val, 0
1144        //   store i32 %val.0, i32* %alloc.0
1145        //   %val.1 = extractvalue { i32, i32 } %val, 1
1146        //   store i32 %val.1, i32* %alloc.1
1147        // (Also works for arrays instead of structs)
1148        for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
1149          Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI);
1150          new StoreInst(Extract, NewElts[i], SI);
1151        }
1152        DeadInsts.push_back(SI);
1153      } else if (SIType->isIntegerTy() &&
1154                 TD->getTypeAllocSize(SIType) ==
1155                 TD->getTypeAllocSize(AI->getAllocatedType())) {
1156        // If this is a store of the entire alloca from an integer, rewrite it.
1157        RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
1158      }
1159    }
1160  }
1161}
1162
1163/// RewriteBitCast - Update a bitcast reference to the alloca being replaced
1164/// and recursively continue updating all of its uses.
1165void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
1166                          SmallVector<AllocaInst*, 32> &NewElts) {
1167  RewriteForScalarRepl(BC, AI, Offset, NewElts);
1168  if (BC->getOperand(0) != AI)
1169    return;
1170
1171  // The bitcast references the original alloca.  Replace its uses with
1172  // references to the first new element alloca.
1173  Instruction *Val = NewElts[0];
1174  if (Val->getType() != BC->getDestTy()) {
1175    Val = new BitCastInst(Val, BC->getDestTy(), "", BC);
1176    Val->takeName(BC);
1177  }
1178  BC->replaceAllUsesWith(Val);
1179  DeadInsts.push_back(BC);
1180}
1181
1182/// FindElementAndOffset - Return the index of the element containing Offset
1183/// within the specified type, which must be either a struct or an array.
1184/// Sets T to the type of the element and Offset to the offset within that
1185/// element.  IdxTy is set to the type of the index result to be used in a
1186/// GEP instruction.
1187uint64_t SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset,
1188                                    const Type *&IdxTy) {
1189  uint64_t Idx = 0;
1190  if (const StructType *ST = dyn_cast<StructType>(T)) {
1191    const StructLayout *Layout = TD->getStructLayout(ST);
1192    Idx = Layout->getElementContainingOffset(Offset);
1193    T = ST->getContainedType(Idx);
1194    Offset -= Layout->getElementOffset(Idx);
1195    IdxTy = Type::getInt32Ty(T->getContext());
1196    return Idx;
1197  }
1198  const ArrayType *AT = cast<ArrayType>(T);
1199  T = AT->getElementType();
1200  uint64_t EltSize = TD->getTypeAllocSize(T);
1201  Idx = Offset / EltSize;
1202  Offset -= Idx * EltSize;
1203  IdxTy = Type::getInt64Ty(T->getContext());
1204  return Idx;
1205}
1206
1207/// RewriteGEP - Check if this GEP instruction moves the pointer across
1208/// elements of the alloca that are being split apart, and if so, rewrite
1209/// the GEP to be relative to the new element.
1210void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
1211                      SmallVector<AllocaInst*, 32> &NewElts) {
1212  uint64_t OldOffset = Offset;
1213  SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
1214  Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
1215                                 &Indices[0], Indices.size());
1216
1217  RewriteForScalarRepl(GEPI, AI, Offset, NewElts);
1218
1219  const Type *T = AI->getAllocatedType();
1220  const Type *IdxTy;
1221  uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy);
1222  if (GEPI->getOperand(0) == AI)
1223    OldIdx = ~0ULL; // Force the GEP to be rewritten.
1224
1225  T = AI->getAllocatedType();
1226  uint64_t EltOffset = Offset;
1227  uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy);
1228
1229  // If this GEP does not move the pointer across elements of the alloca
1230  // being split, then it does not needs to be rewritten.
1231  if (Idx == OldIdx)
1232    return;
1233
1234  const Type *i32Ty = Type::getInt32Ty(AI->getContext());
1235  SmallVector<Value*, 8> NewArgs;
1236  NewArgs.push_back(Constant::getNullValue(i32Ty));
1237  while (EltOffset != 0) {
1238    uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy);
1239    NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx));
1240  }
1241  Instruction *Val = NewElts[Idx];
1242  if (NewArgs.size() > 1) {
1243    Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(),
1244                                            NewArgs.end(), "", GEPI);
1245    Val->takeName(GEPI);
1246  }
1247  if (Val->getType() != GEPI->getType())
1248    Val = new BitCastInst(Val, GEPI->getType(), Val->getName(), GEPI);
1249  GEPI->replaceAllUsesWith(Val);
1250  DeadInsts.push_back(GEPI);
1251}
1252
1253/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI.
1254/// Rewrite it to copy or set the elements of the scalarized memory.
1255void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
1256                                        AllocaInst *AI,
1257                                        SmallVector<AllocaInst*, 32> &NewElts) {
1258  // If this is a memcpy/memmove, construct the other pointer as the
1259  // appropriate type.  The "Other" pointer is the pointer that goes to memory
1260  // that doesn't have anything to do with the alloca that we are promoting. For
1261  // memset, this Value* stays null.
1262  Value *OtherPtr = 0;
1263  unsigned MemAlignment = MI->getAlignment();
1264  if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy
1265    if (Inst == MTI->getRawDest())
1266      OtherPtr = MTI->getRawSource();
1267    else {
1268      assert(Inst == MTI->getRawSource());
1269      OtherPtr = MTI->getRawDest();
1270    }
1271  }
1272
1273  // If there is an other pointer, we want to convert it to the same pointer
1274  // type as AI has, so we can GEP through it safely.
1275  if (OtherPtr) {
1276    unsigned AddrSpace =
1277      cast<PointerType>(OtherPtr->getType())->getAddressSpace();
1278
1279    // Remove bitcasts and all-zero GEPs from OtherPtr.  This is an
1280    // optimization, but it's also required to detect the corner case where
1281    // both pointer operands are referencing the same memory, and where
1282    // OtherPtr may be a bitcast or GEP that currently being rewritten.  (This
1283    // function is only called for mem intrinsics that access the whole
1284    // aggregate, so non-zero GEPs are not an issue here.)
1285    OtherPtr = OtherPtr->stripPointerCasts();
1286
1287    // Copying the alloca to itself is a no-op: just delete it.
1288    if (OtherPtr == AI || OtherPtr == NewElts[0]) {
1289      // This code will run twice for a no-op memcpy -- once for each operand.
1290      // Put only one reference to MI on the DeadInsts list.
1291      for (SmallVector<Value*, 32>::const_iterator I = DeadInsts.begin(),
1292             E = DeadInsts.end(); I != E; ++I)
1293        if (*I == MI) return;
1294      DeadInsts.push_back(MI);
1295      return;
1296    }
1297
1298    // If the pointer is not the right type, insert a bitcast to the right
1299    // type.
1300    const Type *NewTy =
1301      PointerType::get(AI->getType()->getElementType(), AddrSpace);
1302
1303    if (OtherPtr->getType() != NewTy)
1304      OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI);
1305  }
1306
1307  // Process each element of the aggregate.
1308  Value *TheFn = MI->getCalledValue();
1309  const Type *BytePtrTy = MI->getRawDest()->getType();
1310  bool SROADest = MI->getRawDest() == Inst;
1311
1312  Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
1313
1314  for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
1315    // If this is a memcpy/memmove, emit a GEP of the other element address.
1316    Value *OtherElt = 0;
1317    unsigned OtherEltAlign = MemAlignment;
1318
1319    if (OtherPtr) {
1320      Value *Idx[2] = { Zero,
1321                      ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) };
1322      OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2,
1323                                              OtherPtr->getName()+"."+Twine(i),
1324                                                   MI);
1325      uint64_t EltOffset;
1326      const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
1327      const Type *OtherTy = OtherPtrTy->getElementType();
1328      if (const StructType *ST = dyn_cast<StructType>(OtherTy)) {
1329        EltOffset = TD->getStructLayout(ST)->getElementOffset(i);
1330      } else {
1331        const Type *EltTy = cast<SequentialType>(OtherTy)->getElementType();
1332        EltOffset = TD->getTypeAllocSize(EltTy)*i;
1333      }
1334
1335      // The alignment of the other pointer is the guaranteed alignment of the
1336      // element, which is affected by both the known alignment of the whole
1337      // mem intrinsic and the alignment of the element.  If the alignment of
1338      // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the
1339      // known alignment is just 4 bytes.
1340      OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset);
1341    }
1342
1343    Value *EltPtr = NewElts[i];
1344    const Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType();
1345
1346    // If we got down to a scalar, insert a load or store as appropriate.
1347    if (EltTy->isSingleValueType()) {
1348      if (isa<MemTransferInst>(MI)) {
1349        if (SROADest) {
1350          // From Other to Alloca.
1351          Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI);
1352          new StoreInst(Elt, EltPtr, MI);
1353        } else {
1354          // From Alloca to Other.
1355          Value *Elt = new LoadInst(EltPtr, "tmp", MI);
1356          new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI);
1357        }
1358        continue;
1359      }
1360      assert(isa<MemSetInst>(MI));
1361
1362      // If the stored element is zero (common case), just store a null
1363      // constant.
1364      Constant *StoreVal;
1365      if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getArgOperand(1))) {
1366        if (CI->isZero()) {
1367          StoreVal = Constant::getNullValue(EltTy);  // 0.0, null, 0, <0,0>
1368        } else {
1369          // If EltTy is a vector type, get the element type.
1370          const Type *ValTy = EltTy->getScalarType();
1371
1372          // Construct an integer with the right value.
1373          unsigned EltSize = TD->getTypeSizeInBits(ValTy);
1374          APInt OneVal(EltSize, CI->getZExtValue());
1375          APInt TotalVal(OneVal);
1376          // Set each byte.
1377          for (unsigned i = 0; 8*i < EltSize; ++i) {
1378            TotalVal = TotalVal.shl(8);
1379            TotalVal |= OneVal;
1380          }
1381
1382          // Convert the integer value to the appropriate type.
1383          StoreVal = ConstantInt::get(CI->getContext(), TotalVal);
1384          if (ValTy->isPointerTy())
1385            StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy);
1386          else if (ValTy->isFloatingPointTy())
1387            StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
1388          assert(StoreVal->getType() == ValTy && "Type mismatch!");
1389
1390          // If the requested value was a vector constant, create it.
1391          if (EltTy != ValTy) {
1392            unsigned NumElts = cast<VectorType>(ValTy)->getNumElements();
1393            SmallVector<Constant*, 16> Elts(NumElts, StoreVal);
1394            StoreVal = ConstantVector::get(&Elts[0], NumElts);
1395          }
1396        }
1397        new StoreInst(StoreVal, EltPtr, MI);
1398        continue;
1399      }
1400      // Otherwise, if we're storing a byte variable, use a memset call for
1401      // this element.
1402    }
1403
1404    // Cast the element pointer to BytePtrTy.
1405    if (EltPtr->getType() != BytePtrTy)
1406      EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getName(), MI);
1407
1408    // Cast the other pointer (if we have one) to BytePtrTy.
1409    if (OtherElt && OtherElt->getType() != BytePtrTy) {
1410      // Preserve address space of OtherElt
1411      const PointerType* OtherPTy = cast<PointerType>(OtherElt->getType());
1412      const PointerType* PTy = cast<PointerType>(BytePtrTy);
1413      if (OtherPTy->getElementType() != PTy->getElementType()) {
1414        Type *NewOtherPTy = PointerType::get(PTy->getElementType(),
1415                                             OtherPTy->getAddressSpace());
1416        OtherElt = new BitCastInst(OtherElt, NewOtherPTy,
1417                                   OtherElt->getNameStr(), MI);
1418      }
1419    }
1420
1421    unsigned EltSize = TD->getTypeAllocSize(EltTy);
1422
1423    // Finally, insert the meminst for this element.
1424    if (isa<MemTransferInst>(MI)) {
1425      Value *Ops[] = {
1426        SROADest ? EltPtr : OtherElt,  // Dest ptr
1427        SROADest ? OtherElt : EltPtr,  // Src ptr
1428        ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size
1429        // Align
1430        ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign),
1431        MI->getVolatileCst()
1432      };
1433      // In case we fold the address space overloaded memcpy of A to B
1434      // with memcpy of B to C, change the function to be a memcpy of A to C.
1435      const Type *Tys[] = { Ops[0]->getType(), Ops[1]->getType(),
1436                            Ops[2]->getType() };
1437      Module *M = MI->getParent()->getParent()->getParent();
1438      TheFn = Intrinsic::getDeclaration(M, MI->getIntrinsicID(), Tys, 3);
1439      CallInst::Create(TheFn, Ops, Ops + 5, "", MI);
1440    } else {
1441      assert(isa<MemSetInst>(MI));
1442      Value *Ops[] = {
1443        EltPtr, MI->getArgOperand(1),  // Dest, Value,
1444        ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size
1445        Zero,  // Align
1446        ConstantInt::get(Type::getInt1Ty(MI->getContext()), 0) // isVolatile
1447      };
1448      const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() };
1449      Module *M = MI->getParent()->getParent()->getParent();
1450      TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
1451      CallInst::Create(TheFn, Ops, Ops + 5, "", MI);
1452    }
1453  }
1454  DeadInsts.push_back(MI);
1455}
1456
1457/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that
1458/// overwrites the entire allocation.  Extract out the pieces of the stored
1459/// integer and store them individually.
1460void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
1461                                         SmallVector<AllocaInst*, 32> &NewElts){
1462  // Extract each element out of the integer according to its structure offset
1463  // and store the element value to the individual alloca.
1464  Value *SrcVal = SI->getOperand(0);
1465  const Type *AllocaEltTy = AI->getAllocatedType();
1466  uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
1467
1468  // Handle tail padding by extending the operand
1469  if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
1470    SrcVal = new ZExtInst(SrcVal,
1471                          IntegerType::get(SI->getContext(), AllocaSizeBits),
1472                          "", SI);
1473
1474  DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI
1475               << '\n');
1476
1477  // There are two forms here: AI could be an array or struct.  Both cases
1478  // have different ways to compute the element offset.
1479  if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
1480    const StructLayout *Layout = TD->getStructLayout(EltSTy);
1481
1482    for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
1483      // Get the number of bits to shift SrcVal to get the value.
1484      const Type *FieldTy = EltSTy->getElementType(i);
1485      uint64_t Shift = Layout->getElementOffsetInBits(i);
1486
1487      if (TD->isBigEndian())
1488        Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy);
1489
1490      Value *EltVal = SrcVal;
1491      if (Shift) {
1492        Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
1493        EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
1494                                            "sroa.store.elt", SI);
1495      }
1496
1497      // Truncate down to an integer of the right size.
1498      uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
1499
1500      // Ignore zero sized fields like {}, they obviously contain no data.
1501      if (FieldSizeBits == 0) continue;
1502
1503      if (FieldSizeBits != AllocaSizeBits)
1504        EltVal = new TruncInst(EltVal,
1505                             IntegerType::get(SI->getContext(), FieldSizeBits),
1506                              "", SI);
1507      Value *DestField = NewElts[i];
1508      if (EltVal->getType() == FieldTy) {
1509        // Storing to an integer field of this size, just do it.
1510      } else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) {
1511        // Bitcast to the right element type (for fp/vector values).
1512        EltVal = new BitCastInst(EltVal, FieldTy, "", SI);
1513      } else {
1514        // Otherwise, bitcast the dest pointer (for aggregates).
1515        DestField = new BitCastInst(DestField,
1516                              PointerType::getUnqual(EltVal->getType()),
1517                                    "", SI);
1518      }
1519      new StoreInst(EltVal, DestField, SI);
1520    }
1521
1522  } else {
1523    const ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
1524    const Type *ArrayEltTy = ATy->getElementType();
1525    uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
1526    uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy);
1527
1528    uint64_t Shift;
1529
1530    if (TD->isBigEndian())
1531      Shift = AllocaSizeBits-ElementOffset;
1532    else
1533      Shift = 0;
1534
1535    for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
1536      // Ignore zero sized fields like {}, they obviously contain no data.
1537      if (ElementSizeBits == 0) continue;
1538
1539      Value *EltVal = SrcVal;
1540      if (Shift) {
1541        Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
1542        EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
1543                                            "sroa.store.elt", SI);
1544      }
1545
1546      // Truncate down to an integer of the right size.
1547      if (ElementSizeBits != AllocaSizeBits)
1548        EltVal = new TruncInst(EltVal,
1549                               IntegerType::get(SI->getContext(),
1550                                                ElementSizeBits),"",SI);
1551      Value *DestField = NewElts[i];
1552      if (EltVal->getType() == ArrayEltTy) {
1553        // Storing to an integer field of this size, just do it.
1554      } else if (ArrayEltTy->isFloatingPointTy() ||
1555                 ArrayEltTy->isVectorTy()) {
1556        // Bitcast to the right element type (for fp/vector values).
1557        EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI);
1558      } else {
1559        // Otherwise, bitcast the dest pointer (for aggregates).
1560        DestField = new BitCastInst(DestField,
1561                              PointerType::getUnqual(EltVal->getType()),
1562                                    "", SI);
1563      }
1564      new StoreInst(EltVal, DestField, SI);
1565
1566      if (TD->isBigEndian())
1567        Shift -= ElementOffset;
1568      else
1569        Shift += ElementOffset;
1570    }
1571  }
1572
1573  DeadInsts.push_back(SI);
1574}
1575
1576/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to
1577/// an integer.  Load the individual pieces to form the aggregate value.
1578void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
1579                                        SmallVector<AllocaInst*, 32> &NewElts) {
1580  // Extract each element out of the NewElts according to its structure offset
1581  // and form the result value.
1582  const Type *AllocaEltTy = AI->getAllocatedType();
1583  uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
1584
1585  DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
1586               << '\n');
1587
1588  // There are two forms here: AI could be an array or struct.  Both cases
1589  // have different ways to compute the element offset.
1590  const StructLayout *Layout = 0;
1591  uint64_t ArrayEltBitOffset = 0;
1592  if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
1593    Layout = TD->getStructLayout(EltSTy);
1594  } else {
1595    const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
1596    ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
1597  }
1598
1599  Value *ResultVal =
1600    Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits));
1601
1602  for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
1603    // Load the value from the alloca.  If the NewElt is an aggregate, cast
1604    // the pointer to an integer of the same size before doing the load.
1605    Value *SrcField = NewElts[i];
1606    const Type *FieldTy =
1607      cast<PointerType>(SrcField->getType())->getElementType();
1608    uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
1609
1610    // Ignore zero sized fields like {}, they obviously contain no data.
1611    if (FieldSizeBits == 0) continue;
1612
1613    const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
1614                                                     FieldSizeBits);
1615    if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() &&
1616        !FieldTy->isVectorTy())
1617      SrcField = new BitCastInst(SrcField,
1618                                 PointerType::getUnqual(FieldIntTy),
1619                                 "", LI);
1620    SrcField = new LoadInst(SrcField, "sroa.load.elt", LI);
1621
1622    // If SrcField is a fp or vector of the right size but that isn't an
1623    // integer type, bitcast to an integer so we can shift it.
1624    if (SrcField->getType() != FieldIntTy)
1625      SrcField = new BitCastInst(SrcField, FieldIntTy, "", LI);
1626
1627    // Zero extend the field to be the same size as the final alloca so that
1628    // we can shift and insert it.
1629    if (SrcField->getType() != ResultVal->getType())
1630      SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI);
1631
1632    // Determine the number of bits to shift SrcField.
1633    uint64_t Shift;
1634    if (Layout) // Struct case.
1635      Shift = Layout->getElementOffsetInBits(i);
1636    else  // Array case.
1637      Shift = i*ArrayEltBitOffset;
1638
1639    if (TD->isBigEndian())
1640      Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth();
1641
1642    if (Shift) {
1643      Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift);
1644      SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI);
1645    }
1646
1647    // Don't create an 'or x, 0' on the first iteration.
1648    if (!isa<Constant>(ResultVal) ||
1649        !cast<Constant>(ResultVal)->isNullValue())
1650      ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI);
1651    else
1652      ResultVal = SrcField;
1653  }
1654
1655  // Handle tail padding by truncating the result
1656  if (TD->getTypeSizeInBits(LI->getType()) != AllocaSizeBits)
1657    ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI);
1658
1659  LI->replaceAllUsesWith(ResultVal);
1660  DeadInsts.push_back(LI);
1661}
1662
1663/// HasPadding - Return true if the specified type has any structure or
1664/// alignment padding, false otherwise.
1665static bool HasPadding(const Type *Ty, const TargetData &TD) {
1666  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
1667    const StructLayout *SL = TD.getStructLayout(STy);
1668    unsigned PrevFieldBitOffset = 0;
1669    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
1670      unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
1671
1672      // Padding in sub-elements?
1673      if (HasPadding(STy->getElementType(i), TD))
1674        return true;
1675
1676      // Check to see if there is any padding between this element and the
1677      // previous one.
1678      if (i) {
1679        unsigned PrevFieldEnd =
1680        PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1));
1681        if (PrevFieldEnd < FieldBitOffset)
1682          return true;
1683      }
1684
1685      PrevFieldBitOffset = FieldBitOffset;
1686    }
1687
1688    //  Check for tail padding.
1689    if (unsigned EltCount = STy->getNumElements()) {
1690      unsigned PrevFieldEnd = PrevFieldBitOffset +
1691                   TD.getTypeSizeInBits(STy->getElementType(EltCount-1));
1692      if (PrevFieldEnd < SL->getSizeInBits())
1693        return true;
1694    }
1695
1696  } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1697    return HasPadding(ATy->getElementType(), TD);
1698  } else if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1699    return HasPadding(VTy->getElementType(), TD);
1700  }
1701  return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
1702}
1703
1704/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
1705/// an aggregate can be broken down into elements.  Return 0 if not, 3 if safe,
1706/// or 1 if safe after canonicalization has been performed.
1707bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
1708  // Loop over the use list of the alloca.  We can only transform it if all of
1709  // the users are safe to transform.
1710  AllocaInfo Info;
1711
1712  isSafeForScalarRepl(AI, AI, 0, Info);
1713  if (Info.isUnsafe) {
1714    DEBUG(dbgs() << "Cannot transform: " << *AI << '\n');
1715    return false;
1716  }
1717
1718  // Okay, we know all the users are promotable.  If the aggregate is a memcpy
1719  // source and destination, we have to be careful.  In particular, the memcpy
1720  // could be moving around elements that live in structure padding of the LLVM
1721  // types, but may actually be used.  In these cases, we refuse to promote the
1722  // struct.
1723  if (Info.isMemCpySrc && Info.isMemCpyDst &&
1724      HasPadding(AI->getAllocatedType(), *TD))
1725    return false;
1726
1727  return true;
1728}
1729
1730
1731
1732/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to
1733/// some part of a constant global variable.  This intentionally only accepts
1734/// constant expressions because we don't can't rewrite arbitrary instructions.
1735static bool PointsToConstantGlobal(Value *V) {
1736  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
1737    return GV->isConstant();
1738  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
1739    if (CE->getOpcode() == Instruction::BitCast ||
1740        CE->getOpcode() == Instruction::GetElementPtr)
1741      return PointsToConstantGlobal(CE->getOperand(0));
1742  return false;
1743}
1744
1745/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
1746/// pointer to an alloca.  Ignore any reads of the pointer, return false if we
1747/// see any stores or other unknown uses.  If we see pointer arithmetic, keep
1748/// track of whether it moves the pointer (with isOffset) but otherwise traverse
1749/// the uses.  If we see a memcpy/memmove that targets an unoffseted pointer to
1750/// the alloca, and if the source pointer is a pointer to a constant  global, we
1751/// can optimize this.
1752static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
1753                                           bool isOffset) {
1754  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
1755    User *U = cast<Instruction>(*UI);
1756
1757    if (LoadInst *LI = dyn_cast<LoadInst>(U))
1758      // Ignore non-volatile loads, they are always ok.
1759      if (!LI->isVolatile())
1760        continue;
1761
1762    if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
1763      // If uses of the bitcast are ok, we are ok.
1764      if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset))
1765        return false;
1766      continue;
1767    }
1768    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
1769      // If the GEP has all zero indices, it doesn't offset the pointer.  If it
1770      // doesn't, it does.
1771      if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy,
1772                                         isOffset || !GEP->hasAllZeroIndices()))
1773        return false;
1774      continue;
1775    }
1776
1777    // If this is isn't our memcpy/memmove, reject it as something we can't
1778    // handle.
1779    MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
1780    if (MI == 0)
1781      return false;
1782
1783    // If we already have seen a copy, reject the second one.
1784    if (TheCopy) return false;
1785
1786    // If the pointer has been offset from the start of the alloca, we can't
1787    // safely handle this.
1788    if (isOffset) return false;
1789
1790    // If the memintrinsic isn't using the alloca as the dest, reject it.
1791    if (UI.getOperandNo() != 0) return false;
1792
1793    // If the source of the memcpy/move is not a constant global, reject it.
1794    if (!PointsToConstantGlobal(MI->getSource()))
1795      return false;
1796
1797    // Otherwise, the transform is safe.  Remember the copy instruction.
1798    TheCopy = MI;
1799  }
1800  return true;
1801}
1802
1803/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
1804/// modified by a copy from a constant global.  If we can prove this, we can
1805/// replace any uses of the alloca with uses of the global directly.
1806MemTransferInst *SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI) {
1807  MemTransferInst *TheCopy = 0;
1808  if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false))
1809    return TheCopy;
1810  return 0;
1811}
1812