ScalarReplAggregates.cpp revision 9adc0abad3c3ed40a268ccbcee0c74cb9e1359fe
1//===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This transformation implements the well known scalar replacement of
11// aggregates transformation.  This xform breaks up alloca instructions of
12// aggregate type (structure or array) into individual alloca instructions for
13// each member (if possible).  Then, if possible, it transforms the individual
14// alloca instructions into nice clean scalar SSA form.
15//
16// This combines a simple SRoA algorithm with the Mem2Reg algorithm because
17// often interact, especially for C++ programs.  As such, iterating between
18// SRoA, then Mem2Reg until we run out of things to promote works well.
19//
20//===----------------------------------------------------------------------===//
21
22#define DEBUG_TYPE "scalarrepl"
23#include "llvm/Transforms/Scalar.h"
24#include "llvm/Constants.h"
25#include "llvm/DerivedTypes.h"
26#include "llvm/Function.h"
27#include "llvm/GlobalVariable.h"
28#include "llvm/Instructions.h"
29#include "llvm/IntrinsicInst.h"
30#include "llvm/LLVMContext.h"
31#include "llvm/Pass.h"
32#include "llvm/Analysis/Dominators.h"
33#include "llvm/Target/TargetData.h"
34#include "llvm/Transforms/Utils/PromoteMemToReg.h"
35#include "llvm/Transforms/Utils/Local.h"
36#include "llvm/Support/Debug.h"
37#include "llvm/Support/ErrorHandling.h"
38#include "llvm/Support/GetElementPtrTypeIterator.h"
39#include "llvm/Support/IRBuilder.h"
40#include "llvm/Support/MathExtras.h"
41#include "llvm/Support/Compiler.h"
42#include "llvm/ADT/SmallVector.h"
43#include "llvm/ADT/Statistic.h"
44#include "llvm/ADT/StringExtras.h"
45using namespace llvm;
46
47STATISTIC(NumReplaced,  "Number of allocas broken up");
48STATISTIC(NumPromoted,  "Number of allocas promoted");
49STATISTIC(NumConverted, "Number of aggregates converted to scalar");
50STATISTIC(NumGlobals,   "Number of allocas copied from constant global");
51
52namespace {
53  struct VISIBILITY_HIDDEN SROA : public FunctionPass {
54    static char ID; // Pass identification, replacement for typeid
55    explicit SROA(signed T = -1) : FunctionPass(&ID) {
56      if (T == -1)
57        SRThreshold = 128;
58      else
59        SRThreshold = T;
60    }
61
62    bool runOnFunction(Function &F);
63
64    bool performScalarRepl(Function &F);
65    bool performPromotion(Function &F);
66
67    // getAnalysisUsage - This pass does not require any passes, but we know it
68    // will not alter the CFG, so say so.
69    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
70      AU.addRequired<DominatorTree>();
71      AU.addRequired<DominanceFrontier>();
72      AU.addRequired<TargetData>();
73      AU.setPreservesCFG();
74    }
75
76  private:
77    TargetData *TD;
78
79    /// AllocaInfo - When analyzing uses of an alloca instruction, this captures
80    /// information about the uses.  All these fields are initialized to false
81    /// and set to true when something is learned.
82    struct AllocaInfo {
83      /// isUnsafe - This is set to true if the alloca cannot be SROA'd.
84      bool isUnsafe : 1;
85
86      /// needsCleanup - This is set to true if there is some use of the alloca
87      /// that requires cleanup.
88      bool needsCleanup : 1;
89
90      /// isMemCpySrc - This is true if this aggregate is memcpy'd from.
91      bool isMemCpySrc : 1;
92
93      /// isMemCpyDst - This is true if this aggregate is memcpy'd into.
94      bool isMemCpyDst : 1;
95
96      AllocaInfo()
97        : isUnsafe(false), needsCleanup(false),
98          isMemCpySrc(false), isMemCpyDst(false) {}
99    };
100
101    unsigned SRThreshold;
102
103    void MarkUnsafe(AllocaInfo &I) { I.isUnsafe = true; }
104
105    int isSafeAllocaToScalarRepl(AllocationInst *AI);
106
107    void isSafeUseOfAllocation(Instruction *User, AllocationInst *AI,
108                               AllocaInfo &Info);
109    void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI,
110                         AllocaInfo &Info);
111    void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocationInst *AI,
112                                        unsigned OpNo, AllocaInfo &Info);
113    void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocationInst *AI,
114                                        AllocaInfo &Info);
115
116    void DoScalarReplacement(AllocationInst *AI,
117                             std::vector<AllocationInst*> &WorkList);
118    void CleanupGEP(GetElementPtrInst *GEP);
119    void CleanupAllocaUsers(AllocationInst *AI);
120    AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocationInst *Base);
121
122    void RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI,
123                                    SmallVector<AllocaInst*, 32> &NewElts);
124
125    void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
126                                      AllocationInst *AI,
127                                      SmallVector<AllocaInst*, 32> &NewElts);
128    void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocationInst *AI,
129                                       SmallVector<AllocaInst*, 32> &NewElts);
130    void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI,
131                                      SmallVector<AllocaInst*, 32> &NewElts);
132
133    bool CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
134                            bool &SawVec, uint64_t Offset, unsigned AllocaSize);
135    void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
136    Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType,
137                                     uint64_t Offset, IRBuilder<> &Builder);
138    Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
139                                     uint64_t Offset, IRBuilder<> &Builder);
140    static Instruction *isOnlyCopiedFromConstantGlobal(AllocationInst *AI);
141  };
142}
143
144char SROA::ID = 0;
145static RegisterPass<SROA> X("scalarrepl", "Scalar Replacement of Aggregates");
146
147// Public interface to the ScalarReplAggregates pass
148FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) {
149  return new SROA(Threshold);
150}
151
152
153bool SROA::runOnFunction(Function &F) {
154  TD = &getAnalysis<TargetData>();
155
156  bool Changed = performPromotion(F);
157  while (1) {
158    bool LocalChange = performScalarRepl(F);
159    if (!LocalChange) break;   // No need to repromote if no scalarrepl
160    Changed = true;
161    LocalChange = performPromotion(F);
162    if (!LocalChange) break;   // No need to re-scalarrepl if no promotion
163  }
164
165  return Changed;
166}
167
168
169bool SROA::performPromotion(Function &F) {
170  std::vector<AllocaInst*> Allocas;
171  DominatorTree         &DT = getAnalysis<DominatorTree>();
172  DominanceFrontier &DF = getAnalysis<DominanceFrontier>();
173
174  BasicBlock &BB = F.getEntryBlock();  // Get the entry node for the function
175
176  bool Changed = false;
177
178  while (1) {
179    Allocas.clear();
180
181    // Find allocas that are safe to promote, by looking at all instructions in
182    // the entry node
183    for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
184      if (AllocaInst *AI = dyn_cast<AllocaInst>(I))       // Is it an alloca?
185        if (isAllocaPromotable(AI))
186          Allocas.push_back(AI);
187
188    if (Allocas.empty()) break;
189
190    PromoteMemToReg(Allocas, DT, DF, Context);
191    NumPromoted += Allocas.size();
192    Changed = true;
193  }
194
195  return Changed;
196}
197
198/// getNumSAElements - Return the number of elements in the specific struct or
199/// array.
200static uint64_t getNumSAElements(const Type *T) {
201  if (const StructType *ST = dyn_cast<StructType>(T))
202    return ST->getNumElements();
203  return cast<ArrayType>(T)->getNumElements();
204}
205
206// performScalarRepl - This algorithm is a simple worklist driven algorithm,
207// which runs on all of the malloc/alloca instructions in the function, removing
208// them if they are only used by getelementptr instructions.
209//
210bool SROA::performScalarRepl(Function &F) {
211  std::vector<AllocationInst*> WorkList;
212
213  // Scan the entry basic block, adding any alloca's and mallocs to the worklist
214  BasicBlock &BB = F.getEntryBlock();
215  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
216    if (AllocationInst *A = dyn_cast<AllocationInst>(I))
217      WorkList.push_back(A);
218
219  // Process the worklist
220  bool Changed = false;
221  while (!WorkList.empty()) {
222    AllocationInst *AI = WorkList.back();
223    WorkList.pop_back();
224
225    // Handle dead allocas trivially.  These can be formed by SROA'ing arrays
226    // with unused elements.
227    if (AI->use_empty()) {
228      AI->eraseFromParent();
229      continue;
230    }
231
232    // If this alloca is impossible for us to promote, reject it early.
233    if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized())
234      continue;
235
236    // Check to see if this allocation is only modified by a memcpy/memmove from
237    // a constant global.  If this is the case, we can change all users to use
238    // the constant global instead.  This is commonly produced by the CFE by
239    // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
240    // is only subsequently read.
241    if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) {
242      DOUT << "Found alloca equal to global: " << *AI;
243      DOUT << "  memcpy = " << *TheCopy;
244      Constant *TheSrc = cast<Constant>(TheCopy->getOperand(2));
245      AI->replaceAllUsesWith(
246                        Context->getConstantExprBitCast(TheSrc, AI->getType()));
247      TheCopy->eraseFromParent();  // Don't mutate the global.
248      AI->eraseFromParent();
249      ++NumGlobals;
250      Changed = true;
251      continue;
252    }
253
254    // Check to see if we can perform the core SROA transformation.  We cannot
255    // transform the allocation instruction if it is an array allocation
256    // (allocations OF arrays are ok though), and an allocation of a scalar
257    // value cannot be decomposed at all.
258    uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType());
259
260    // Do not promote any struct whose size is too big.
261    if (AllocaSize > SRThreshold) continue;
262
263    if ((isa<StructType>(AI->getAllocatedType()) ||
264         isa<ArrayType>(AI->getAllocatedType())) &&
265        // Do not promote any struct into more than "32" separate vars.
266        getNumSAElements(AI->getAllocatedType()) <= SRThreshold/4) {
267      // Check that all of the users of the allocation are capable of being
268      // transformed.
269      switch (isSafeAllocaToScalarRepl(AI)) {
270      default: llvm_unreachable("Unexpected value!");
271      case 0:  // Not safe to scalar replace.
272        break;
273      case 1:  // Safe, but requires cleanup/canonicalizations first
274        CleanupAllocaUsers(AI);
275        // FALL THROUGH.
276      case 3:  // Safe to scalar replace.
277        DoScalarReplacement(AI, WorkList);
278        Changed = true;
279        continue;
280      }
281    }
282
283    // If we can turn this aggregate value (potentially with casts) into a
284    // simple scalar value that can be mem2reg'd into a register value.
285    // IsNotTrivial tracks whether this is something that mem2reg could have
286    // promoted itself.  If so, we don't want to transform it needlessly.  Note
287    // that we can't just check based on the type: the alloca may be of an i32
288    // but that has pointer arithmetic to set byte 3 of it or something.
289    bool IsNotTrivial = false;
290    const Type *VectorTy = 0;
291    bool HadAVector = false;
292    if (CanConvertToScalar(AI, IsNotTrivial, VectorTy, HadAVector,
293                           0, unsigned(AllocaSize)) && IsNotTrivial) {
294      AllocaInst *NewAI;
295      // If we were able to find a vector type that can handle this with
296      // insert/extract elements, and if there was at least one use that had
297      // a vector type, promote this to a vector.  We don't want to promote
298      // random stuff that doesn't use vectors (e.g. <9 x double>) because then
299      // we just get a lot of insert/extracts.  If at least one vector is
300      // involved, then we probably really do have a union of vector/array.
301      if (VectorTy && isa<VectorType>(VectorTy) && HadAVector) {
302        DOUT << "CONVERT TO VECTOR: " << *AI << "  TYPE = " << *VectorTy <<"\n";
303
304        // Create and insert the vector alloca.
305        NewAI = new AllocaInst(*Context, VectorTy, 0, "",
306                               AI->getParent()->begin());
307        ConvertUsesToScalar(AI, NewAI, 0);
308      } else {
309        DOUT << "CONVERT TO SCALAR INTEGER: " << *AI << "\n";
310
311        // Create and insert the integer alloca.
312        const Type *NewTy = Context->getIntegerType(AllocaSize*8);
313        NewAI = new AllocaInst(*Context, NewTy, 0, "",
314                               AI->getParent()->begin());
315        ConvertUsesToScalar(AI, NewAI, 0);
316      }
317      NewAI->takeName(AI);
318      AI->eraseFromParent();
319      ++NumConverted;
320      Changed = true;
321      continue;
322    }
323
324    // Otherwise, couldn't process this alloca.
325  }
326
327  return Changed;
328}
329
330/// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl
331/// predicate, do SROA now.
332void SROA::DoScalarReplacement(AllocationInst *AI,
333                               std::vector<AllocationInst*> &WorkList) {
334  DOUT << "Found inst to SROA: " << *AI;
335  SmallVector<AllocaInst*, 32> ElementAllocas;
336  if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
337    ElementAllocas.reserve(ST->getNumContainedTypes());
338    for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {
339      AllocaInst *NA = new AllocaInst(*Context,
340                                      ST->getContainedType(i), 0,
341                                      AI->getAlignment(),
342                                      AI->getName() + "." + utostr(i), AI);
343      ElementAllocas.push_back(NA);
344      WorkList.push_back(NA);  // Add to worklist for recursive processing
345    }
346  } else {
347    const ArrayType *AT = cast<ArrayType>(AI->getAllocatedType());
348    ElementAllocas.reserve(AT->getNumElements());
349    const Type *ElTy = AT->getElementType();
350    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
351      AllocaInst *NA = new AllocaInst(*Context, ElTy, 0, AI->getAlignment(),
352                                      AI->getName() + "." + utostr(i), AI);
353      ElementAllocas.push_back(NA);
354      WorkList.push_back(NA);  // Add to worklist for recursive processing
355    }
356  }
357
358  // Now that we have created the alloca instructions that we want to use,
359  // expand the getelementptr instructions to use them.
360  //
361  while (!AI->use_empty()) {
362    Instruction *User = cast<Instruction>(AI->use_back());
363    if (BitCastInst *BCInst = dyn_cast<BitCastInst>(User)) {
364      RewriteBitCastUserOfAlloca(BCInst, AI, ElementAllocas);
365      BCInst->eraseFromParent();
366      continue;
367    }
368
369    // Replace:
370    //   %res = load { i32, i32 }* %alloc
371    // with:
372    //   %load.0 = load i32* %alloc.0
373    //   %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0
374    //   %load.1 = load i32* %alloc.1
375    //   %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1
376    // (Also works for arrays instead of structs)
377    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
378      Value *Insert = Context->getUndef(LI->getType());
379      for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) {
380        Value *Load = new LoadInst(ElementAllocas[i], "load", LI);
381        Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI);
382      }
383      LI->replaceAllUsesWith(Insert);
384      LI->eraseFromParent();
385      continue;
386    }
387
388    // Replace:
389    //   store { i32, i32 } %val, { i32, i32 }* %alloc
390    // with:
391    //   %val.0 = extractvalue { i32, i32 } %val, 0
392    //   store i32 %val.0, i32* %alloc.0
393    //   %val.1 = extractvalue { i32, i32 } %val, 1
394    //   store i32 %val.1, i32* %alloc.1
395    // (Also works for arrays instead of structs)
396    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
397      Value *Val = SI->getOperand(0);
398      for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) {
399        Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI);
400        new StoreInst(Extract, ElementAllocas[i], SI);
401      }
402      SI->eraseFromParent();
403      continue;
404    }
405
406    GetElementPtrInst *GEPI = cast<GetElementPtrInst>(User);
407    // We now know that the GEP is of the form: GEP <ptr>, 0, <cst>
408    unsigned Idx =
409       (unsigned)cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
410
411    assert(Idx < ElementAllocas.size() && "Index out of range?");
412    AllocaInst *AllocaToUse = ElementAllocas[Idx];
413
414    Value *RepValue;
415    if (GEPI->getNumOperands() == 3) {
416      // Do not insert a new getelementptr instruction with zero indices, only
417      // to have it optimized out later.
418      RepValue = AllocaToUse;
419    } else {
420      // We are indexing deeply into the structure, so we still need a
421      // getelement ptr instruction to finish the indexing.  This may be
422      // expanded itself once the worklist is rerun.
423      //
424      SmallVector<Value*, 8> NewArgs;
425      NewArgs.push_back(Context->getNullValue(Type::Int32Ty));
426      NewArgs.append(GEPI->op_begin()+3, GEPI->op_end());
427      RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(),
428                                           NewArgs.end(), "", GEPI);
429      RepValue->takeName(GEPI);
430    }
431
432    // If this GEP is to the start of the aggregate, check for memcpys.
433    if (Idx == 0 && GEPI->hasAllZeroIndices())
434      RewriteBitCastUserOfAlloca(GEPI, AI, ElementAllocas);
435
436    // Move all of the users over to the new GEP.
437    GEPI->replaceAllUsesWith(RepValue);
438    // Delete the old GEP
439    GEPI->eraseFromParent();
440  }
441
442  // Finally, delete the Alloca instruction
443  AI->eraseFromParent();
444  NumReplaced++;
445}
446
447
448/// isSafeElementUse - Check to see if this use is an allowed use for a
449/// getelementptr instruction of an array aggregate allocation.  isFirstElt
450/// indicates whether Ptr is known to the start of the aggregate.
451///
452void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI,
453                            AllocaInfo &Info) {
454  for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end();
455       I != E; ++I) {
456    Instruction *User = cast<Instruction>(*I);
457    switch (User->getOpcode()) {
458    case Instruction::Load:  break;
459    case Instruction::Store:
460      // Store is ok if storing INTO the pointer, not storing the pointer
461      if (User->getOperand(0) == Ptr) return MarkUnsafe(Info);
462      break;
463    case Instruction::GetElementPtr: {
464      GetElementPtrInst *GEP = cast<GetElementPtrInst>(User);
465      bool AreAllZeroIndices = isFirstElt;
466      if (GEP->getNumOperands() > 1) {
467        if (!isa<ConstantInt>(GEP->getOperand(1)) ||
468            !cast<ConstantInt>(GEP->getOperand(1))->isZero())
469          // Using pointer arithmetic to navigate the array.
470          return MarkUnsafe(Info);
471
472        if (AreAllZeroIndices)
473          AreAllZeroIndices = GEP->hasAllZeroIndices();
474      }
475      isSafeElementUse(GEP, AreAllZeroIndices, AI, Info);
476      if (Info.isUnsafe) return;
477      break;
478    }
479    case Instruction::BitCast:
480      if (isFirstElt) {
481        isSafeUseOfBitCastedAllocation(cast<BitCastInst>(User), AI, Info);
482        if (Info.isUnsafe) return;
483        break;
484      }
485      DOUT << "  Transformation preventing inst: " << *User;
486      return MarkUnsafe(Info);
487    case Instruction::Call:
488      if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
489        if (isFirstElt) {
490          isSafeMemIntrinsicOnAllocation(MI, AI, I.getOperandNo(), Info);
491          if (Info.isUnsafe) return;
492          break;
493        }
494      }
495      DOUT << "  Transformation preventing inst: " << *User;
496      return MarkUnsafe(Info);
497    default:
498      DOUT << "  Transformation preventing inst: " << *User;
499      return MarkUnsafe(Info);
500    }
501  }
502  return;  // All users look ok :)
503}
504
505/// AllUsersAreLoads - Return true if all users of this value are loads.
506static bool AllUsersAreLoads(Value *Ptr) {
507  for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end();
508       I != E; ++I)
509    if (cast<Instruction>(*I)->getOpcode() != Instruction::Load)
510      return false;
511  return true;
512}
513
514/// isSafeUseOfAllocation - Check to see if this user is an allowed use for an
515/// aggregate allocation.
516///
517void SROA::isSafeUseOfAllocation(Instruction *User, AllocationInst *AI,
518                                 AllocaInfo &Info) {
519  if (BitCastInst *C = dyn_cast<BitCastInst>(User))
520    return isSafeUseOfBitCastedAllocation(C, AI, Info);
521
522  if (LoadInst *LI = dyn_cast<LoadInst>(User))
523    if (!LI->isVolatile())
524      return;// Loads (returning a first class aggregrate) are always rewritable
525
526  if (StoreInst *SI = dyn_cast<StoreInst>(User))
527    if (!SI->isVolatile() && SI->getOperand(0) != AI)
528      return;// Store is ok if storing INTO the pointer, not storing the pointer
529
530  GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User);
531  if (GEPI == 0)
532    return MarkUnsafe(Info);
533
534  gep_type_iterator I = gep_type_begin(GEPI), E = gep_type_end(GEPI);
535
536  // The GEP is not safe to transform if not of the form "GEP <ptr>, 0, <cst>".
537  if (I == E ||
538      I.getOperand() != Context->getNullValue(I.getOperand()->getType())) {
539    return MarkUnsafe(Info);
540  }
541
542  ++I;
543  if (I == E) return MarkUnsafe(Info);  // ran out of GEP indices??
544
545  bool IsAllZeroIndices = true;
546
547  // If the first index is a non-constant index into an array, see if we can
548  // handle it as a special case.
549  if (const ArrayType *AT = dyn_cast<ArrayType>(*I)) {
550    if (!isa<ConstantInt>(I.getOperand())) {
551      IsAllZeroIndices = 0;
552      uint64_t NumElements = AT->getNumElements();
553
554      // If this is an array index and the index is not constant, we cannot
555      // promote... that is unless the array has exactly one or two elements in
556      // it, in which case we CAN promote it, but we have to canonicalize this
557      // out if this is the only problem.
558      if ((NumElements == 1 || NumElements == 2) &&
559          AllUsersAreLoads(GEPI)) {
560        Info.needsCleanup = true;
561        return;  // Canonicalization required!
562      }
563      return MarkUnsafe(Info);
564    }
565  }
566
567  // Walk through the GEP type indices, checking the types that this indexes
568  // into.
569  for (; I != E; ++I) {
570    // Ignore struct elements, no extra checking needed for these.
571    if (isa<StructType>(*I))
572      continue;
573
574    ConstantInt *IdxVal = dyn_cast<ConstantInt>(I.getOperand());
575    if (!IdxVal) return MarkUnsafe(Info);
576
577    // Are all indices still zero?
578    IsAllZeroIndices &= IdxVal->isZero();
579
580    if (const ArrayType *AT = dyn_cast<ArrayType>(*I)) {
581      // This GEP indexes an array.  Verify that this is an in-range constant
582      // integer. Specifically, consider A[0][i]. We cannot know that the user
583      // isn't doing invalid things like allowing i to index an out-of-range
584      // subscript that accesses A[1].  Because of this, we have to reject SROA
585      // of any accesses into structs where any of the components are variables.
586      if (IdxVal->getZExtValue() >= AT->getNumElements())
587        return MarkUnsafe(Info);
588    } else if (const VectorType *VT = dyn_cast<VectorType>(*I)) {
589      if (IdxVal->getZExtValue() >= VT->getNumElements())
590        return MarkUnsafe(Info);
591    }
592  }
593
594  // If there are any non-simple uses of this getelementptr, make sure to reject
595  // them.
596  return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info);
597}
598
599/// isSafeMemIntrinsicOnAllocation - Return true if the specified memory
600/// intrinsic can be promoted by SROA.  At this point, we know that the operand
601/// of the memintrinsic is a pointer to the beginning of the allocation.
602void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocationInst *AI,
603                                          unsigned OpNo, AllocaInfo &Info) {
604  // If not constant length, give up.
605  ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
606  if (!Length) return MarkUnsafe(Info);
607
608  // If not the whole aggregate, give up.
609  if (Length->getZExtValue() !=
610      TD->getTypeAllocSize(AI->getType()->getElementType()))
611    return MarkUnsafe(Info);
612
613  // We only know about memcpy/memset/memmove.
614  if (!isa<MemIntrinsic>(MI))
615    return MarkUnsafe(Info);
616
617  // Otherwise, we can transform it.  Determine whether this is a memcpy/set
618  // into or out of the aggregate.
619  if (OpNo == 1)
620    Info.isMemCpyDst = true;
621  else {
622    assert(OpNo == 2);
623    Info.isMemCpySrc = true;
624  }
625}
626
627/// isSafeUseOfBitCastedAllocation - Return true if all users of this bitcast
628/// are
629void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocationInst *AI,
630                                          AllocaInfo &Info) {
631  for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end();
632       UI != E; ++UI) {
633    if (BitCastInst *BCU = dyn_cast<BitCastInst>(UI)) {
634      isSafeUseOfBitCastedAllocation(BCU, AI, Info);
635    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(UI)) {
636      isSafeMemIntrinsicOnAllocation(MI, AI, UI.getOperandNo(), Info);
637    } else if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
638      if (SI->isVolatile())
639        return MarkUnsafe(Info);
640
641      // If storing the entire alloca in one chunk through a bitcasted pointer
642      // to integer, we can transform it.  This happens (for example) when you
643      // cast a {i32,i32}* to i64* and store through it.  This is similar to the
644      // memcpy case and occurs in various "byval" cases and emulated memcpys.
645      if (isa<IntegerType>(SI->getOperand(0)->getType()) &&
646          TD->getTypeAllocSize(SI->getOperand(0)->getType()) ==
647          TD->getTypeAllocSize(AI->getType()->getElementType())) {
648        Info.isMemCpyDst = true;
649        continue;
650      }
651      return MarkUnsafe(Info);
652    } else if (LoadInst *LI = dyn_cast<LoadInst>(UI)) {
653      if (LI->isVolatile())
654        return MarkUnsafe(Info);
655
656      // If loading the entire alloca in one chunk through a bitcasted pointer
657      // to integer, we can transform it.  This happens (for example) when you
658      // cast a {i32,i32}* to i64* and load through it.  This is similar to the
659      // memcpy case and occurs in various "byval" cases and emulated memcpys.
660      if (isa<IntegerType>(LI->getType()) &&
661          TD->getTypeAllocSize(LI->getType()) ==
662          TD->getTypeAllocSize(AI->getType()->getElementType())) {
663        Info.isMemCpySrc = true;
664        continue;
665      }
666      return MarkUnsafe(Info);
667    } else if (isa<DbgInfoIntrinsic>(UI)) {
668      // If one user is DbgInfoIntrinsic then check if all users are
669      // DbgInfoIntrinsics.
670      if (OnlyUsedByDbgInfoIntrinsics(BC)) {
671        Info.needsCleanup = true;
672        return;
673      }
674      else
675        MarkUnsafe(Info);
676    }
677    else {
678      return MarkUnsafe(Info);
679    }
680    if (Info.isUnsafe) return;
681  }
682}
683
684/// RewriteBitCastUserOfAlloca - BCInst (transitively) bitcasts AI, or indexes
685/// to its first element.  Transform users of the cast to use the new values
686/// instead.
687void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI,
688                                      SmallVector<AllocaInst*, 32> &NewElts) {
689  Value::use_iterator UI = BCInst->use_begin(), UE = BCInst->use_end();
690  while (UI != UE) {
691    Instruction *User = cast<Instruction>(*UI++);
692    if (BitCastInst *BCU = dyn_cast<BitCastInst>(User)) {
693      RewriteBitCastUserOfAlloca(BCU, AI, NewElts);
694      if (BCU->use_empty()) BCU->eraseFromParent();
695      continue;
696    }
697
698    if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
699      // This must be memcpy/memmove/memset of the entire aggregate.
700      // Split into one per element.
701      RewriteMemIntrinUserOfAlloca(MI, BCInst, AI, NewElts);
702      continue;
703    }
704
705    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
706      // If this is a store of the entire alloca from an integer, rewrite it.
707      RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
708      continue;
709    }
710
711    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
712      // If this is a load of the entire alloca to an integer, rewrite it.
713      RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
714      continue;
715    }
716
717    // Otherwise it must be some other user of a gep of the first pointer.  Just
718    // leave these alone.
719    continue;
720  }
721}
722
723/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI.
724/// Rewrite it to copy or set the elements of the scalarized memory.
725void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
726                                        AllocationInst *AI,
727                                        SmallVector<AllocaInst*, 32> &NewElts) {
728
729  // If this is a memcpy/memmove, construct the other pointer as the
730  // appropriate type.  The "Other" pointer is the pointer that goes to memory
731  // that doesn't have anything to do with the alloca that we are promoting. For
732  // memset, this Value* stays null.
733  Value *OtherPtr = 0;
734  unsigned MemAlignment = MI->getAlignment();
735  if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy
736    if (BCInst == MTI->getRawDest())
737      OtherPtr = MTI->getRawSource();
738    else {
739      assert(BCInst == MTI->getRawSource());
740      OtherPtr = MTI->getRawDest();
741    }
742  }
743
744  // If there is an other pointer, we want to convert it to the same pointer
745  // type as AI has, so we can GEP through it safely.
746  if (OtherPtr) {
747    // It is likely that OtherPtr is a bitcast, if so, remove it.
748    if (BitCastInst *BC = dyn_cast<BitCastInst>(OtherPtr))
749      OtherPtr = BC->getOperand(0);
750    // All zero GEPs are effectively bitcasts.
751    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(OtherPtr))
752      if (GEP->hasAllZeroIndices())
753        OtherPtr = GEP->getOperand(0);
754
755    if (ConstantExpr *BCE = dyn_cast<ConstantExpr>(OtherPtr))
756      if (BCE->getOpcode() == Instruction::BitCast)
757        OtherPtr = BCE->getOperand(0);
758
759    // If the pointer is not the right type, insert a bitcast to the right
760    // type.
761    if (OtherPtr->getType() != AI->getType())
762      OtherPtr = new BitCastInst(OtherPtr, AI->getType(), OtherPtr->getName(),
763                                 MI);
764  }
765
766  // Process each element of the aggregate.
767  Value *TheFn = MI->getOperand(0);
768  const Type *BytePtrTy = MI->getRawDest()->getType();
769  bool SROADest = MI->getRawDest() == BCInst;
770
771  Constant *Zero = Context->getNullValue(Type::Int32Ty);
772
773  for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
774    // If this is a memcpy/memmove, emit a GEP of the other element address.
775    Value *OtherElt = 0;
776    unsigned OtherEltAlign = MemAlignment;
777
778    if (OtherPtr) {
779      Value *Idx[2] = { Zero, Context->getConstantInt(Type::Int32Ty, i) };
780      OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2,
781                                           OtherPtr->getNameStr()+"."+utostr(i),
782                                           MI);
783      uint64_t EltOffset;
784      const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
785      if (const StructType *ST =
786            dyn_cast<StructType>(OtherPtrTy->getElementType())) {
787        EltOffset = TD->getStructLayout(ST)->getElementOffset(i);
788      } else {
789        const Type *EltTy =
790          cast<SequentialType>(OtherPtr->getType())->getElementType();
791        EltOffset = TD->getTypeAllocSize(EltTy)*i;
792      }
793
794      // The alignment of the other pointer is the guaranteed alignment of the
795      // element, which is affected by both the known alignment of the whole
796      // mem intrinsic and the alignment of the element.  If the alignment of
797      // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the
798      // known alignment is just 4 bytes.
799      OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset);
800    }
801
802    Value *EltPtr = NewElts[i];
803    const Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType();
804
805    // If we got down to a scalar, insert a load or store as appropriate.
806    if (EltTy->isSingleValueType()) {
807      if (isa<MemTransferInst>(MI)) {
808        if (SROADest) {
809          // From Other to Alloca.
810          Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI);
811          new StoreInst(Elt, EltPtr, MI);
812        } else {
813          // From Alloca to Other.
814          Value *Elt = new LoadInst(EltPtr, "tmp", MI);
815          new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI);
816        }
817        continue;
818      }
819      assert(isa<MemSetInst>(MI));
820
821      // If the stored element is zero (common case), just store a null
822      // constant.
823      Constant *StoreVal;
824      if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getOperand(2))) {
825        if (CI->isZero()) {
826          StoreVal = Context->getNullValue(EltTy);  // 0.0, null, 0, <0,0>
827        } else {
828          // If EltTy is a vector type, get the element type.
829          const Type *ValTy = EltTy->getScalarType();
830
831          // Construct an integer with the right value.
832          unsigned EltSize = TD->getTypeSizeInBits(ValTy);
833          APInt OneVal(EltSize, CI->getZExtValue());
834          APInt TotalVal(OneVal);
835          // Set each byte.
836          for (unsigned i = 0; 8*i < EltSize; ++i) {
837            TotalVal = TotalVal.shl(8);
838            TotalVal |= OneVal;
839          }
840
841          // Convert the integer value to the appropriate type.
842          StoreVal = Context->getConstantInt(TotalVal);
843          if (isa<PointerType>(ValTy))
844            StoreVal = Context->getConstantExprIntToPtr(StoreVal, ValTy);
845          else if (ValTy->isFloatingPoint())
846            StoreVal = Context->getConstantExprBitCast(StoreVal, ValTy);
847          assert(StoreVal->getType() == ValTy && "Type mismatch!");
848
849          // If the requested value was a vector constant, create it.
850          if (EltTy != ValTy) {
851            unsigned NumElts = cast<VectorType>(ValTy)->getNumElements();
852            SmallVector<Constant*, 16> Elts(NumElts, StoreVal);
853            StoreVal = Context->getConstantVector(&Elts[0], NumElts);
854          }
855        }
856        new StoreInst(StoreVal, EltPtr, MI);
857        continue;
858      }
859      // Otherwise, if we're storing a byte variable, use a memset call for
860      // this element.
861    }
862
863    // Cast the element pointer to BytePtrTy.
864    if (EltPtr->getType() != BytePtrTy)
865      EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getNameStr(), MI);
866
867    // Cast the other pointer (if we have one) to BytePtrTy.
868    if (OtherElt && OtherElt->getType() != BytePtrTy)
869      OtherElt = new BitCastInst(OtherElt, BytePtrTy,OtherElt->getNameStr(),
870                                 MI);
871
872    unsigned EltSize = TD->getTypeAllocSize(EltTy);
873
874    // Finally, insert the meminst for this element.
875    if (isa<MemTransferInst>(MI)) {
876      Value *Ops[] = {
877        SROADest ? EltPtr : OtherElt,  // Dest ptr
878        SROADest ? OtherElt : EltPtr,  // Src ptr
879        Context->getConstantInt(MI->getOperand(3)->getType(), EltSize), // Size
880        Context->getConstantInt(Type::Int32Ty, OtherEltAlign)  // Align
881      };
882      CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
883    } else {
884      assert(isa<MemSetInst>(MI));
885      Value *Ops[] = {
886        EltPtr, MI->getOperand(2),  // Dest, Value,
887        Context->getConstantInt(MI->getOperand(3)->getType(), EltSize), // Size
888        Zero  // Align
889      };
890      CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
891    }
892  }
893  MI->eraseFromParent();
894}
895
896/// RewriteStoreUserOfWholeAlloca - We found an store of an integer that
897/// overwrites the entire allocation.  Extract out the pieces of the stored
898/// integer and store them individually.
899void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,
900                                         AllocationInst *AI,
901                                         SmallVector<AllocaInst*, 32> &NewElts){
902  // Extract each element out of the integer according to its structure offset
903  // and store the element value to the individual alloca.
904  Value *SrcVal = SI->getOperand(0);
905  const Type *AllocaEltTy = AI->getType()->getElementType();
906  uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
907
908  // If this isn't a store of an integer to the whole alloca, it may be a store
909  // to the first element.  Just ignore the store in this case and normal SROA
910  // will handle it.
911  if (!isa<IntegerType>(SrcVal->getType()) ||
912      TD->getTypeAllocSizeInBits(SrcVal->getType()) != AllocaSizeBits)
913    return;
914  // Handle tail padding by extending the operand
915  if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
916    SrcVal = new ZExtInst(SrcVal,
917                          Context->getIntegerType(AllocaSizeBits), "", SI);
918
919  DOUT << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << *SI;
920
921  // There are two forms here: AI could be an array or struct.  Both cases
922  // have different ways to compute the element offset.
923  if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
924    const StructLayout *Layout = TD->getStructLayout(EltSTy);
925
926    for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
927      // Get the number of bits to shift SrcVal to get the value.
928      const Type *FieldTy = EltSTy->getElementType(i);
929      uint64_t Shift = Layout->getElementOffsetInBits(i);
930
931      if (TD->isBigEndian())
932        Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy);
933
934      Value *EltVal = SrcVal;
935      if (Shift) {
936        Value *ShiftVal = Context->getConstantInt(EltVal->getType(), Shift);
937        EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
938                                            "sroa.store.elt", SI);
939      }
940
941      // Truncate down to an integer of the right size.
942      uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
943
944      // Ignore zero sized fields like {}, they obviously contain no data.
945      if (FieldSizeBits == 0) continue;
946
947      if (FieldSizeBits != AllocaSizeBits)
948        EltVal = new TruncInst(EltVal,
949                               Context->getIntegerType(FieldSizeBits), "", SI);
950      Value *DestField = NewElts[i];
951      if (EltVal->getType() == FieldTy) {
952        // Storing to an integer field of this size, just do it.
953      } else if (FieldTy->isFloatingPoint() || isa<VectorType>(FieldTy)) {
954        // Bitcast to the right element type (for fp/vector values).
955        EltVal = new BitCastInst(EltVal, FieldTy, "", SI);
956      } else {
957        // Otherwise, bitcast the dest pointer (for aggregates).
958        DestField = new BitCastInst(DestField,
959                              Context->getPointerTypeUnqual(EltVal->getType()),
960                                    "", SI);
961      }
962      new StoreInst(EltVal, DestField, SI);
963    }
964
965  } else {
966    const ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
967    const Type *ArrayEltTy = ATy->getElementType();
968    uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
969    uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy);
970
971    uint64_t Shift;
972
973    if (TD->isBigEndian())
974      Shift = AllocaSizeBits-ElementOffset;
975    else
976      Shift = 0;
977
978    for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
979      // Ignore zero sized fields like {}, they obviously contain no data.
980      if (ElementSizeBits == 0) continue;
981
982      Value *EltVal = SrcVal;
983      if (Shift) {
984        Value *ShiftVal = Context->getConstantInt(EltVal->getType(), Shift);
985        EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
986                                            "sroa.store.elt", SI);
987      }
988
989      // Truncate down to an integer of the right size.
990      if (ElementSizeBits != AllocaSizeBits)
991        EltVal = new TruncInst(EltVal,
992                               Context->getIntegerType(ElementSizeBits),"",SI);
993      Value *DestField = NewElts[i];
994      if (EltVal->getType() == ArrayEltTy) {
995        // Storing to an integer field of this size, just do it.
996      } else if (ArrayEltTy->isFloatingPoint() || isa<VectorType>(ArrayEltTy)) {
997        // Bitcast to the right element type (for fp/vector values).
998        EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI);
999      } else {
1000        // Otherwise, bitcast the dest pointer (for aggregates).
1001        DestField = new BitCastInst(DestField,
1002                              Context->getPointerTypeUnqual(EltVal->getType()),
1003                                    "", SI);
1004      }
1005      new StoreInst(EltVal, DestField, SI);
1006
1007      if (TD->isBigEndian())
1008        Shift -= ElementOffset;
1009      else
1010        Shift += ElementOffset;
1011    }
1012  }
1013
1014  SI->eraseFromParent();
1015}
1016
1017/// RewriteLoadUserOfWholeAlloca - We found an load of the entire allocation to
1018/// an integer.  Load the individual pieces to form the aggregate value.
1019void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI,
1020                                        SmallVector<AllocaInst*, 32> &NewElts) {
1021  // Extract each element out of the NewElts according to its structure offset
1022  // and form the result value.
1023  const Type *AllocaEltTy = AI->getType()->getElementType();
1024  uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
1025
1026  // If this isn't a load of the whole alloca to an integer, it may be a load
1027  // of the first element.  Just ignore the load in this case and normal SROA
1028  // will handle it.
1029  if (!isa<IntegerType>(LI->getType()) ||
1030      TD->getTypeAllocSizeInBits(LI->getType()) != AllocaSizeBits)
1031    return;
1032
1033  DOUT << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << *LI;
1034
1035  // There are two forms here: AI could be an array or struct.  Both cases
1036  // have different ways to compute the element offset.
1037  const StructLayout *Layout = 0;
1038  uint64_t ArrayEltBitOffset = 0;
1039  if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
1040    Layout = TD->getStructLayout(EltSTy);
1041  } else {
1042    const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
1043    ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
1044  }
1045
1046  Value *ResultVal =
1047                 Context->getNullValue(Context->getIntegerType(AllocaSizeBits));
1048
1049  for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
1050    // Load the value from the alloca.  If the NewElt is an aggregate, cast
1051    // the pointer to an integer of the same size before doing the load.
1052    Value *SrcField = NewElts[i];
1053    const Type *FieldTy =
1054      cast<PointerType>(SrcField->getType())->getElementType();
1055    uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
1056
1057    // Ignore zero sized fields like {}, they obviously contain no data.
1058    if (FieldSizeBits == 0) continue;
1059
1060    const IntegerType *FieldIntTy = Context->getIntegerType(FieldSizeBits);
1061    if (!isa<IntegerType>(FieldTy) && !FieldTy->isFloatingPoint() &&
1062        !isa<VectorType>(FieldTy))
1063      SrcField = new BitCastInst(SrcField,
1064                                 Context->getPointerTypeUnqual(FieldIntTy),
1065                                 "", LI);
1066    SrcField = new LoadInst(SrcField, "sroa.load.elt", LI);
1067
1068    // If SrcField is a fp or vector of the right size but that isn't an
1069    // integer type, bitcast to an integer so we can shift it.
1070    if (SrcField->getType() != FieldIntTy)
1071      SrcField = new BitCastInst(SrcField, FieldIntTy, "", LI);
1072
1073    // Zero extend the field to be the same size as the final alloca so that
1074    // we can shift and insert it.
1075    if (SrcField->getType() != ResultVal->getType())
1076      SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI);
1077
1078    // Determine the number of bits to shift SrcField.
1079    uint64_t Shift;
1080    if (Layout) // Struct case.
1081      Shift = Layout->getElementOffsetInBits(i);
1082    else  // Array case.
1083      Shift = i*ArrayEltBitOffset;
1084
1085    if (TD->isBigEndian())
1086      Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth();
1087
1088    if (Shift) {
1089      Value *ShiftVal = Context->getConstantInt(SrcField->getType(), Shift);
1090      SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI);
1091    }
1092
1093    ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI);
1094  }
1095
1096  // Handle tail padding by truncating the result
1097  if (TD->getTypeSizeInBits(LI->getType()) != AllocaSizeBits)
1098    ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI);
1099
1100  LI->replaceAllUsesWith(ResultVal);
1101  LI->eraseFromParent();
1102}
1103
1104
1105/// HasPadding - Return true if the specified type has any structure or
1106/// alignment padding, false otherwise.
1107static bool HasPadding(const Type *Ty, const TargetData &TD) {
1108  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
1109    const StructLayout *SL = TD.getStructLayout(STy);
1110    unsigned PrevFieldBitOffset = 0;
1111    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
1112      unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
1113
1114      // Padding in sub-elements?
1115      if (HasPadding(STy->getElementType(i), TD))
1116        return true;
1117
1118      // Check to see if there is any padding between this element and the
1119      // previous one.
1120      if (i) {
1121        unsigned PrevFieldEnd =
1122        PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1));
1123        if (PrevFieldEnd < FieldBitOffset)
1124          return true;
1125      }
1126
1127      PrevFieldBitOffset = FieldBitOffset;
1128    }
1129
1130    //  Check for tail padding.
1131    if (unsigned EltCount = STy->getNumElements()) {
1132      unsigned PrevFieldEnd = PrevFieldBitOffset +
1133                   TD.getTypeSizeInBits(STy->getElementType(EltCount-1));
1134      if (PrevFieldEnd < SL->getSizeInBits())
1135        return true;
1136    }
1137
1138  } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1139    return HasPadding(ATy->getElementType(), TD);
1140  } else if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1141    return HasPadding(VTy->getElementType(), TD);
1142  }
1143  return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
1144}
1145
1146/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
1147/// an aggregate can be broken down into elements.  Return 0 if not, 3 if safe,
1148/// or 1 if safe after canonicalization has been performed.
1149///
1150int SROA::isSafeAllocaToScalarRepl(AllocationInst *AI) {
1151  // Loop over the use list of the alloca.  We can only transform it if all of
1152  // the users are safe to transform.
1153  AllocaInfo Info;
1154
1155  for (Value::use_iterator I = AI->use_begin(), E = AI->use_end();
1156       I != E; ++I) {
1157    isSafeUseOfAllocation(cast<Instruction>(*I), AI, Info);
1158    if (Info.isUnsafe) {
1159      DOUT << "Cannot transform: " << *AI << "  due to user: " << **I;
1160      return 0;
1161    }
1162  }
1163
1164  // Okay, we know all the users are promotable.  If the aggregate is a memcpy
1165  // source and destination, we have to be careful.  In particular, the memcpy
1166  // could be moving around elements that live in structure padding of the LLVM
1167  // types, but may actually be used.  In these cases, we refuse to promote the
1168  // struct.
1169  if (Info.isMemCpySrc && Info.isMemCpyDst &&
1170      HasPadding(AI->getType()->getElementType(), *TD))
1171    return 0;
1172
1173  // If we require cleanup, return 1, otherwise return 3.
1174  return Info.needsCleanup ? 1 : 3;
1175}
1176
1177/// CleanupGEP - GEP is used by an Alloca, which can be prompted after the GEP
1178/// is canonicalized here.
1179void SROA::CleanupGEP(GetElementPtrInst *GEPI) {
1180  gep_type_iterator I = gep_type_begin(GEPI);
1181  ++I;
1182
1183  const ArrayType *AT = dyn_cast<ArrayType>(*I);
1184  if (!AT)
1185    return;
1186
1187  uint64_t NumElements = AT->getNumElements();
1188
1189  if (isa<ConstantInt>(I.getOperand()))
1190    return;
1191
1192  if (NumElements == 1) {
1193    GEPI->setOperand(2, Context->getNullValue(Type::Int32Ty));
1194    return;
1195  }
1196
1197  assert(NumElements == 2 && "Unhandled case!");
1198  // All users of the GEP must be loads.  At each use of the GEP, insert
1199  // two loads of the appropriate indexed GEP and select between them.
1200  Value *IsOne = new ICmpInst(GEPI, ICmpInst::ICMP_NE, I.getOperand(),
1201                              Context->getNullValue(I.getOperand()->getType()),
1202                              "isone");
1203  // Insert the new GEP instructions, which are properly indexed.
1204  SmallVector<Value*, 8> Indices(GEPI->op_begin()+1, GEPI->op_end());
1205  Indices[1] = Context->getNullValue(Type::Int32Ty);
1206  Value *ZeroIdx = GetElementPtrInst::Create(GEPI->getOperand(0),
1207                                             Indices.begin(),
1208                                             Indices.end(),
1209                                             GEPI->getName()+".0", GEPI);
1210  Indices[1] = Context->getConstantInt(Type::Int32Ty, 1);
1211  Value *OneIdx = GetElementPtrInst::Create(GEPI->getOperand(0),
1212                                            Indices.begin(),
1213                                            Indices.end(),
1214                                            GEPI->getName()+".1", GEPI);
1215  // Replace all loads of the variable index GEP with loads from both
1216  // indexes and a select.
1217  while (!GEPI->use_empty()) {
1218    LoadInst *LI = cast<LoadInst>(GEPI->use_back());
1219    Value *Zero = new LoadInst(ZeroIdx, LI->getName()+".0", LI);
1220    Value *One  = new LoadInst(OneIdx , LI->getName()+".1", LI);
1221    Value *R = SelectInst::Create(IsOne, One, Zero, LI->getName(), LI);
1222    LI->replaceAllUsesWith(R);
1223    LI->eraseFromParent();
1224  }
1225  GEPI->eraseFromParent();
1226}
1227
1228
1229/// CleanupAllocaUsers - If SROA reported that it can promote the specified
1230/// allocation, but only if cleaned up, perform the cleanups required.
1231void SROA::CleanupAllocaUsers(AllocationInst *AI) {
1232  // At this point, we know that the end result will be SROA'd and promoted, so
1233  // we can insert ugly code if required so long as sroa+mem2reg will clean it
1234  // up.
1235  for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
1236       UI != E; ) {
1237    User *U = *UI++;
1238    if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U))
1239      CleanupGEP(GEPI);
1240    else {
1241      Instruction *I = cast<Instruction>(U);
1242      SmallVector<DbgInfoIntrinsic *, 2> DbgInUses;
1243      if (!isa<StoreInst>(I) && OnlyUsedByDbgInfoIntrinsics(I, &DbgInUses)) {
1244        // Safe to remove debug info uses.
1245        while (!DbgInUses.empty()) {
1246          DbgInfoIntrinsic *DI = DbgInUses.back(); DbgInUses.pop_back();
1247          DI->eraseFromParent();
1248        }
1249        I->eraseFromParent();
1250      }
1251    }
1252  }
1253}
1254
1255/// MergeInType - Add the 'In' type to the accumulated type (Accum) so far at
1256/// the offset specified by Offset (which is specified in bytes).
1257///
1258/// There are two cases we handle here:
1259///   1) A union of vector types of the same size and potentially its elements.
1260///      Here we turn element accesses into insert/extract element operations.
1261///      This promotes a <4 x float> with a store of float to the third element
1262///      into a <4 x float> that uses insert element.
1263///   2) A fully general blob of memory, which we turn into some (potentially
1264///      large) integer type with extract and insert operations where the loads
1265///      and stores would mutate the memory.
1266static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,
1267                        unsigned AllocaSize, const TargetData &TD,
1268                        LLVMContext *Context) {
1269  // If this could be contributing to a vector, analyze it.
1270  if (VecTy != Type::VoidTy) { // either null or a vector type.
1271
1272    // If the In type is a vector that is the same size as the alloca, see if it
1273    // matches the existing VecTy.
1274    if (const VectorType *VInTy = dyn_cast<VectorType>(In)) {
1275      if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
1276        // If we're storing/loading a vector of the right size, allow it as a
1277        // vector.  If this the first vector we see, remember the type so that
1278        // we know the element size.
1279        if (VecTy == 0)
1280          VecTy = VInTy;
1281        return;
1282      }
1283    } else if (In == Type::FloatTy || In == Type::DoubleTy ||
1284               (isa<IntegerType>(In) && In->getPrimitiveSizeInBits() >= 8 &&
1285                isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
1286      // If we're accessing something that could be an element of a vector, see
1287      // if the implied vector agrees with what we already have and if Offset is
1288      // compatible with it.
1289      unsigned EltSize = In->getPrimitiveSizeInBits()/8;
1290      if (Offset % EltSize == 0 &&
1291          AllocaSize % EltSize == 0 &&
1292          (VecTy == 0 ||
1293           cast<VectorType>(VecTy)->getElementType()
1294                 ->getPrimitiveSizeInBits()/8 == EltSize)) {
1295        if (VecTy == 0)
1296          VecTy = Context->getVectorType(In, AllocaSize/EltSize);
1297        return;
1298      }
1299    }
1300  }
1301
1302  // Otherwise, we have a case that we can't handle with an optimized vector
1303  // form.  We can still turn this into a large integer.
1304  VecTy = Type::VoidTy;
1305}
1306
1307/// CanConvertToScalar - V is a pointer.  If we can convert the pointee and all
1308/// its accesses to use a to single vector type, return true, and set VecTy to
1309/// the new type.  If we could convert the alloca into a single promotable
1310/// integer, return true but set VecTy to VoidTy.  Further, if the use is not a
1311/// completely trivial use that mem2reg could promote, set IsNotTrivial.  Offset
1312/// is the current offset from the base of the alloca being analyzed.
1313///
1314/// If we see at least one access to the value that is as a vector type, set the
1315/// SawVec flag.
1316///
1317bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
1318                              bool &SawVec, uint64_t Offset,
1319                              unsigned AllocaSize) {
1320  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
1321    Instruction *User = cast<Instruction>(*UI);
1322
1323    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
1324      // Don't break volatile loads.
1325      if (LI->isVolatile())
1326        return false;
1327      MergeInType(LI->getType(), Offset, VecTy, AllocaSize, *TD, Context);
1328      SawVec |= isa<VectorType>(LI->getType());
1329      continue;
1330    }
1331
1332    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
1333      // Storing the pointer, not into the value?
1334      if (SI->getOperand(0) == V || SI->isVolatile()) return 0;
1335      MergeInType(SI->getOperand(0)->getType(), Offset,
1336                  VecTy, AllocaSize, *TD, Context);
1337      SawVec |= isa<VectorType>(SI->getOperand(0)->getType());
1338      continue;
1339    }
1340
1341    if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
1342      if (!CanConvertToScalar(BCI, IsNotTrivial, VecTy, SawVec, Offset,
1343                              AllocaSize))
1344        return false;
1345      IsNotTrivial = true;
1346      continue;
1347    }
1348
1349    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
1350      // If this is a GEP with a variable indices, we can't handle it.
1351      if (!GEP->hasAllConstantIndices())
1352        return false;
1353
1354      // Compute the offset that this GEP adds to the pointer.
1355      SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
1356      uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(),
1357                                                &Indices[0], Indices.size());
1358      // See if all uses can be converted.
1359      if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, SawVec,Offset+GEPOffset,
1360                              AllocaSize))
1361        return false;
1362      IsNotTrivial = true;
1363      continue;
1364    }
1365
1366    // If this is a constant sized memset of a constant value (e.g. 0) we can
1367    // handle it.
1368    if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
1369      // Store of constant value and constant size.
1370      if (isa<ConstantInt>(MSI->getValue()) &&
1371          isa<ConstantInt>(MSI->getLength())) {
1372        IsNotTrivial = true;
1373        continue;
1374      }
1375    }
1376
1377    // If this is a memcpy or memmove into or out of the whole allocation, we
1378    // can handle it like a load or store of the scalar type.
1379    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
1380      if (ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength()))
1381        if (Len->getZExtValue() == AllocaSize && Offset == 0) {
1382          IsNotTrivial = true;
1383          continue;
1384        }
1385    }
1386
1387    // Ignore dbg intrinsic.
1388    if (isa<DbgInfoIntrinsic>(User))
1389      continue;
1390
1391    // Otherwise, we cannot handle this!
1392    return false;
1393  }
1394
1395  return true;
1396}
1397
1398
1399/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca
1400/// directly.  This happens when we are converting an "integer union" to a
1401/// single integer scalar, or when we are converting a "vector union" to a
1402/// vector with insert/extractelement instructions.
1403///
1404/// Offset is an offset from the original alloca, in bits that need to be
1405/// shifted to the right.  By the end of this, there should be no uses of Ptr.
1406void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {
1407  while (!Ptr->use_empty()) {
1408    Instruction *User = cast<Instruction>(Ptr->use_back());
1409
1410    if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) {
1411      ConvertUsesToScalar(CI, NewAI, Offset);
1412      CI->eraseFromParent();
1413      continue;
1414    }
1415
1416    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
1417      // Compute the offset that this GEP adds to the pointer.
1418      SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
1419      uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(),
1420                                                &Indices[0], Indices.size());
1421      ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8);
1422      GEP->eraseFromParent();
1423      continue;
1424    }
1425
1426    IRBuilder<> Builder(User->getParent(), User);
1427
1428    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
1429      // The load is a bit extract from NewAI shifted right by Offset bits.
1430      Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp");
1431      Value *NewLoadVal
1432        = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder);
1433      LI->replaceAllUsesWith(NewLoadVal);
1434      LI->eraseFromParent();
1435      continue;
1436    }
1437
1438    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
1439      assert(SI->getOperand(0) != Ptr && "Consistency error!");
1440      Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").c_str());
1441      Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
1442                                             Builder);
1443      Builder.CreateStore(New, NewAI);
1444      SI->eraseFromParent();
1445      continue;
1446    }
1447
1448    // If this is a constant sized memset of a constant value (e.g. 0) we can
1449    // transform it into a store of the expanded constant value.
1450    if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
1451      assert(MSI->getRawDest() == Ptr && "Consistency error!");
1452      unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
1453      if (NumBytes != 0) {
1454        unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
1455
1456        // Compute the value replicated the right number of times.
1457        APInt APVal(NumBytes*8, Val);
1458
1459        // Splat the value if non-zero.
1460        if (Val)
1461          for (unsigned i = 1; i != NumBytes; ++i)
1462            APVal |= APVal << 8;
1463
1464        Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").c_str());
1465        Value *New = ConvertScalar_InsertValue(Context->getConstantInt(APVal),
1466                                               Old, Offset, Builder);
1467        Builder.CreateStore(New, NewAI);
1468      }
1469      MSI->eraseFromParent();
1470      continue;
1471    }
1472
1473    // If this is a memcpy or memmove into or out of the whole allocation, we
1474    // can handle it like a load or store of the scalar type.
1475    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
1476      assert(Offset == 0 && "must be store to start of alloca");
1477
1478      // If the source and destination are both to the same alloca, then this is
1479      // a noop copy-to-self, just delete it.  Otherwise, emit a load and store
1480      // as appropriate.
1481      AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject());
1482
1483      if (MTI->getSource()->getUnderlyingObject() != OrigAI) {
1484        // Dest must be OrigAI, change this to be a load from the original
1485        // pointer (bitcasted), then a store to our new alloca.
1486        assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
1487        Value *SrcPtr = MTI->getSource();
1488        SrcPtr = Builder.CreateBitCast(SrcPtr, NewAI->getType());
1489
1490        LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
1491        SrcVal->setAlignment(MTI->getAlignment());
1492        Builder.CreateStore(SrcVal, NewAI);
1493      } else if (MTI->getDest()->getUnderlyingObject() != OrigAI) {
1494        // Src must be OrigAI, change this to be a load from NewAI then a store
1495        // through the original dest pointer (bitcasted).
1496        assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
1497        LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
1498
1499        Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), NewAI->getType());
1500        StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
1501        NewStore->setAlignment(MTI->getAlignment());
1502      } else {
1503        // Noop transfer. Src == Dst
1504      }
1505
1506
1507      MTI->eraseFromParent();
1508      continue;
1509    }
1510
1511    // If user is a dbg info intrinsic then it is safe to remove it.
1512    if (isa<DbgInfoIntrinsic>(User)) {
1513      User->eraseFromParent();
1514      continue;
1515    }
1516
1517    llvm_unreachable("Unsupported operation!");
1518  }
1519}
1520
1521/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
1522/// or vector value FromVal, extracting the bits from the offset specified by
1523/// Offset.  This returns the value, which is of type ToType.
1524///
1525/// This happens when we are converting an "integer union" to a single
1526/// integer scalar, or when we are converting a "vector union" to a vector with
1527/// insert/extractelement instructions.
1528///
1529/// Offset is an offset from the original alloca, in bits that need to be
1530/// shifted to the right.
1531Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
1532                                        uint64_t Offset, IRBuilder<> &Builder) {
1533  // If the load is of the whole new alloca, no conversion is needed.
1534  if (FromVal->getType() == ToType && Offset == 0)
1535    return FromVal;
1536
1537  // If the result alloca is a vector type, this is either an element
1538  // access or a bitcast to another vector type of the same size.
1539  if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
1540    if (isa<VectorType>(ToType))
1541      return Builder.CreateBitCast(FromVal, ToType, "tmp");
1542
1543    // Otherwise it must be an element access.
1544    unsigned Elt = 0;
1545    if (Offset) {
1546      unsigned EltSize = TD->getTypeAllocSizeInBits(VTy->getElementType());
1547      Elt = Offset/EltSize;
1548      assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
1549    }
1550    // Return the element extracted out of it.
1551    Value *V = Builder.CreateExtractElement(FromVal,
1552                                    Context->getConstantInt(Type::Int32Ty,Elt),
1553                                            "tmp");
1554    if (V->getType() != ToType)
1555      V = Builder.CreateBitCast(V, ToType, "tmp");
1556    return V;
1557  }
1558
1559  // If ToType is a first class aggregate, extract out each of the pieces and
1560  // use insertvalue's to form the FCA.
1561  if (const StructType *ST = dyn_cast<StructType>(ToType)) {
1562    const StructLayout &Layout = *TD->getStructLayout(ST);
1563    Value *Res = Context->getUndef(ST);
1564    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
1565      Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
1566                                        Offset+Layout.getElementOffsetInBits(i),
1567                                              Builder);
1568      Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
1569    }
1570    return Res;
1571  }
1572
1573  if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
1574    uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType());
1575    Value *Res = Context->getUndef(AT);
1576    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
1577      Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
1578                                              Offset+i*EltSize, Builder);
1579      Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
1580    }
1581    return Res;
1582  }
1583
1584  // Otherwise, this must be a union that was converted to an integer value.
1585  const IntegerType *NTy = cast<IntegerType>(FromVal->getType());
1586
1587  // If this is a big-endian system and the load is narrower than the
1588  // full alloca type, we need to do a shift to get the right bits.
1589  int ShAmt = 0;
1590  if (TD->isBigEndian()) {
1591    // On big-endian machines, the lowest bit is stored at the bit offset
1592    // from the pointer given by getTypeStoreSizeInBits.  This matters for
1593    // integers with a bitwidth that is not a multiple of 8.
1594    ShAmt = TD->getTypeStoreSizeInBits(NTy) -
1595            TD->getTypeStoreSizeInBits(ToType) - Offset;
1596  } else {
1597    ShAmt = Offset;
1598  }
1599
1600  // Note: we support negative bitwidths (with shl) which are not defined.
1601  // We do this to support (f.e.) loads off the end of a structure where
1602  // only some bits are used.
1603  if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
1604    FromVal = Builder.CreateLShr(FromVal,
1605                                 Context->getConstantInt(FromVal->getType(),
1606                                                           ShAmt), "tmp");
1607  else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
1608    FromVal = Builder.CreateShl(FromVal,
1609                                Context->getConstantInt(FromVal->getType(),
1610                                                          -ShAmt), "tmp");
1611
1612  // Finally, unconditionally truncate the integer to the right width.
1613  unsigned LIBitWidth = TD->getTypeSizeInBits(ToType);
1614  if (LIBitWidth < NTy->getBitWidth())
1615    FromVal =
1616      Builder.CreateTrunc(FromVal, Context->getIntegerType(LIBitWidth), "tmp");
1617  else if (LIBitWidth > NTy->getBitWidth())
1618    FromVal =
1619       Builder.CreateZExt(FromVal, Context->getIntegerType(LIBitWidth), "tmp");
1620
1621  // If the result is an integer, this is a trunc or bitcast.
1622  if (isa<IntegerType>(ToType)) {
1623    // Should be done.
1624  } else if (ToType->isFloatingPoint() || isa<VectorType>(ToType)) {
1625    // Just do a bitcast, we know the sizes match up.
1626    FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp");
1627  } else {
1628    // Otherwise must be a pointer.
1629    FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp");
1630  }
1631  assert(FromVal->getType() == ToType && "Didn't convert right?");
1632  return FromVal;
1633}
1634
1635
1636/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer
1637/// or vector value "Old" at the offset specified by Offset.
1638///
1639/// This happens when we are converting an "integer union" to a
1640/// single integer scalar, or when we are converting a "vector union" to a
1641/// vector with insert/extractelement instructions.
1642///
1643/// Offset is an offset from the original alloca, in bits that need to be
1644/// shifted to the right.
1645Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,
1646                                       uint64_t Offset, IRBuilder<> &Builder) {
1647
1648  // Convert the stored type to the actual type, shift it left to insert
1649  // then 'or' into place.
1650  const Type *AllocaType = Old->getType();
1651
1652  if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
1653    uint64_t VecSize = TD->getTypeAllocSizeInBits(VTy);
1654    uint64_t ValSize = TD->getTypeAllocSizeInBits(SV->getType());
1655
1656    // Changing the whole vector with memset or with an access of a different
1657    // vector type?
1658    if (ValSize == VecSize)
1659      return Builder.CreateBitCast(SV, AllocaType, "tmp");
1660
1661    uint64_t EltSize = TD->getTypeAllocSizeInBits(VTy->getElementType());
1662
1663    // Must be an element insertion.
1664    unsigned Elt = Offset/EltSize;
1665
1666    if (SV->getType() != VTy->getElementType())
1667      SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp");
1668
1669    SV = Builder.CreateInsertElement(Old, SV,
1670                                   Context->getConstantInt(Type::Int32Ty, Elt),
1671                                     "tmp");
1672    return SV;
1673  }
1674
1675  // If SV is a first-class aggregate value, insert each value recursively.
1676  if (const StructType *ST = dyn_cast<StructType>(SV->getType())) {
1677    const StructLayout &Layout = *TD->getStructLayout(ST);
1678    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
1679      Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
1680      Old = ConvertScalar_InsertValue(Elt, Old,
1681                                      Offset+Layout.getElementOffsetInBits(i),
1682                                      Builder);
1683    }
1684    return Old;
1685  }
1686
1687  if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
1688    uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType());
1689    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
1690      Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
1691      Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder);
1692    }
1693    return Old;
1694  }
1695
1696  // If SV is a float, convert it to the appropriate integer type.
1697  // If it is a pointer, do the same.
1698  unsigned SrcWidth = TD->getTypeSizeInBits(SV->getType());
1699  unsigned DestWidth = TD->getTypeSizeInBits(AllocaType);
1700  unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType());
1701  unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType);
1702  if (SV->getType()->isFloatingPoint() || isa<VectorType>(SV->getType()))
1703    SV = Builder.CreateBitCast(SV, Context->getIntegerType(SrcWidth), "tmp");
1704  else if (isa<PointerType>(SV->getType()))
1705    SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(), "tmp");
1706
1707  // Zero extend or truncate the value if needed.
1708  if (SV->getType() != AllocaType) {
1709    if (SV->getType()->getPrimitiveSizeInBits() <
1710             AllocaType->getPrimitiveSizeInBits())
1711      SV = Builder.CreateZExt(SV, AllocaType, "tmp");
1712    else {
1713      // Truncation may be needed if storing more than the alloca can hold
1714      // (undefined behavior).
1715      SV = Builder.CreateTrunc(SV, AllocaType, "tmp");
1716      SrcWidth = DestWidth;
1717      SrcStoreWidth = DestStoreWidth;
1718    }
1719  }
1720
1721  // If this is a big-endian system and the store is narrower than the
1722  // full alloca type, we need to do a shift to get the right bits.
1723  int ShAmt = 0;
1724  if (TD->isBigEndian()) {
1725    // On big-endian machines, the lowest bit is stored at the bit offset
1726    // from the pointer given by getTypeStoreSizeInBits.  This matters for
1727    // integers with a bitwidth that is not a multiple of 8.
1728    ShAmt = DestStoreWidth - SrcStoreWidth - Offset;
1729  } else {
1730    ShAmt = Offset;
1731  }
1732
1733  // Note: we support negative bitwidths (with shr) which are not defined.
1734  // We do this to support (f.e.) stores off the end of a structure where
1735  // only some bits in the structure are set.
1736  APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
1737  if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
1738    SV = Builder.CreateShl(SV, Context->getConstantInt(SV->getType(),
1739                           ShAmt), "tmp");
1740    Mask <<= ShAmt;
1741  } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
1742    SV = Builder.CreateLShr(SV, Context->getConstantInt(SV->getType(),
1743                            -ShAmt), "tmp");
1744    Mask = Mask.lshr(-ShAmt);
1745  }
1746
1747  // Mask out the bits we are about to insert from the old value, and or
1748  // in the new bits.
1749  if (SrcWidth != DestWidth) {
1750    assert(DestWidth > SrcWidth);
1751    Old = Builder.CreateAnd(Old, Context->getConstantInt(~Mask), "mask");
1752    SV = Builder.CreateOr(Old, SV, "ins");
1753  }
1754  return SV;
1755}
1756
1757
1758
1759/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to
1760/// some part of a constant global variable.  This intentionally only accepts
1761/// constant expressions because we don't can't rewrite arbitrary instructions.
1762static bool PointsToConstantGlobal(Value *V) {
1763  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
1764    return GV->isConstant();
1765  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
1766    if (CE->getOpcode() == Instruction::BitCast ||
1767        CE->getOpcode() == Instruction::GetElementPtr)
1768      return PointsToConstantGlobal(CE->getOperand(0));
1769  return false;
1770}
1771
1772/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
1773/// pointer to an alloca.  Ignore any reads of the pointer, return false if we
1774/// see any stores or other unknown uses.  If we see pointer arithmetic, keep
1775/// track of whether it moves the pointer (with isOffset) but otherwise traverse
1776/// the uses.  If we see a memcpy/memmove that targets an unoffseted pointer to
1777/// the alloca, and if the source pointer is a pointer to a constant  global, we
1778/// can optimize this.
1779static bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy,
1780                                           bool isOffset) {
1781  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
1782    if (LoadInst *LI = dyn_cast<LoadInst>(*UI))
1783      // Ignore non-volatile loads, they are always ok.
1784      if (!LI->isVolatile())
1785        continue;
1786
1787    if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI)) {
1788      // If uses of the bitcast are ok, we are ok.
1789      if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset))
1790        return false;
1791      continue;
1792    }
1793    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) {
1794      // If the GEP has all zero indices, it doesn't offset the pointer.  If it
1795      // doesn't, it does.
1796      if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy,
1797                                         isOffset || !GEP->hasAllZeroIndices()))
1798        return false;
1799      continue;
1800    }
1801
1802    // If this is isn't our memcpy/memmove, reject it as something we can't
1803    // handle.
1804    if (!isa<MemTransferInst>(*UI))
1805      return false;
1806
1807    // If we already have seen a copy, reject the second one.
1808    if (TheCopy) return false;
1809
1810    // If the pointer has been offset from the start of the alloca, we can't
1811    // safely handle this.
1812    if (isOffset) return false;
1813
1814    // If the memintrinsic isn't using the alloca as the dest, reject it.
1815    if (UI.getOperandNo() != 1) return false;
1816
1817    MemIntrinsic *MI = cast<MemIntrinsic>(*UI);
1818
1819    // If the source of the memcpy/move is not a constant global, reject it.
1820    if (!PointsToConstantGlobal(MI->getOperand(2)))
1821      return false;
1822
1823    // Otherwise, the transform is safe.  Remember the copy instruction.
1824    TheCopy = MI;
1825  }
1826  return true;
1827}
1828
1829/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
1830/// modified by a copy from a constant global.  If we can prove this, we can
1831/// replace any uses of the alloca with uses of the global directly.
1832Instruction *SROA::isOnlyCopiedFromConstantGlobal(AllocationInst *AI) {
1833  Instruction *TheCopy = 0;
1834  if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false))
1835    return TheCopy;
1836  return 0;
1837}
1838