Scalarizer.cpp revision dce4a407a24b04eebc6a376f8e62b41aaa7b071f
1//===--- Scalarizer.cpp - Scalarize vector operations ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass converts vector operations into scalar operations, in order
11// to expose optimization opportunities on the individual scalar operations.
12// It is mainly intended for targets that do not have vector units, but it
13// may also be useful for revectorizing code to different vector widths.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/IR/IRBuilder.h"
19#include "llvm/IR/InstVisitor.h"
20#include "llvm/Pass.h"
21#include "llvm/Support/CommandLine.h"
22#include "llvm/Transforms/Scalar.h"
23#include "llvm/Transforms/Utils/BasicBlockUtils.h"
24
25using namespace llvm;
26
27#define DEBUG_TYPE "scalarizer"
28
29namespace {
30// Used to store the scattered form of a vector.
31typedef SmallVector<Value *, 8> ValueVector;
32
33// Used to map a vector Value to its scattered form.  We use std::map
34// because we want iterators to persist across insertion and because the
35// values are relatively large.
36typedef std::map<Value *, ValueVector> ScatterMap;
37
38// Lists Instructions that have been replaced with scalar implementations,
39// along with a pointer to their scattered forms.
40typedef SmallVector<std::pair<Instruction *, ValueVector *>, 16> GatherList;
41
42// Provides a very limited vector-like interface for lazily accessing one
43// component of a scattered vector or vector pointer.
44class Scatterer {
45public:
46  Scatterer() {}
47
48  // Scatter V into Size components.  If new instructions are needed,
49  // insert them before BBI in BB.  If Cache is nonnull, use it to cache
50  // the results.
51  Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
52            ValueVector *cachePtr = nullptr);
53
54  // Return component I, creating a new Value for it if necessary.
55  Value *operator[](unsigned I);
56
57  // Return the number of components.
58  unsigned size() const { return Size; }
59
60private:
61  BasicBlock *BB;
62  BasicBlock::iterator BBI;
63  Value *V;
64  ValueVector *CachePtr;
65  PointerType *PtrTy;
66  ValueVector Tmp;
67  unsigned Size;
68};
69
70// FCmpSpliiter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp
71// called Name that compares X and Y in the same way as FCI.
72struct FCmpSplitter {
73  FCmpSplitter(FCmpInst &fci) : FCI(fci) {}
74  Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
75                    const Twine &Name) const {
76    return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name);
77  }
78  FCmpInst &FCI;
79};
80
81// ICmpSpliiter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp
82// called Name that compares X and Y in the same way as ICI.
83struct ICmpSplitter {
84  ICmpSplitter(ICmpInst &ici) : ICI(ici) {}
85  Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
86                    const Twine &Name) const {
87    return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name);
88  }
89  ICmpInst &ICI;
90};
91
92// BinarySpliiter(BO)(Builder, X, Y, Name) uses Builder to create
93// a binary operator like BO called Name with operands X and Y.
94struct BinarySplitter {
95  BinarySplitter(BinaryOperator &bo) : BO(bo) {}
96  Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
97                    const Twine &Name) const {
98    return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name);
99  }
100  BinaryOperator &BO;
101};
102
103// Information about a load or store that we're scalarizing.
104struct VectorLayout {
105  VectorLayout() : VecTy(nullptr), ElemTy(nullptr), VecAlign(0), ElemSize(0) {}
106
107  // Return the alignment of element I.
108  uint64_t getElemAlign(unsigned I) {
109    return MinAlign(VecAlign, I * ElemSize);
110  }
111
112  // The type of the vector.
113  VectorType *VecTy;
114
115  // The type of each element.
116  Type *ElemTy;
117
118  // The alignment of the vector.
119  uint64_t VecAlign;
120
121  // The size of each element.
122  uint64_t ElemSize;
123};
124
125class Scalarizer : public FunctionPass,
126                   public InstVisitor<Scalarizer, bool> {
127public:
128  static char ID;
129
130  Scalarizer() :
131    FunctionPass(ID) {
132    initializeScalarizerPass(*PassRegistry::getPassRegistry());
133  }
134
135  bool doInitialization(Module &M) override;
136  bool runOnFunction(Function &F) override;
137
138  // InstVisitor methods.  They return true if the instruction was scalarized,
139  // false if nothing changed.
140  bool visitInstruction(Instruction &) { return false; }
141  bool visitSelectInst(SelectInst &SI);
142  bool visitICmpInst(ICmpInst &);
143  bool visitFCmpInst(FCmpInst &);
144  bool visitBinaryOperator(BinaryOperator &);
145  bool visitGetElementPtrInst(GetElementPtrInst &);
146  bool visitCastInst(CastInst &);
147  bool visitBitCastInst(BitCastInst &);
148  bool visitShuffleVectorInst(ShuffleVectorInst &);
149  bool visitPHINode(PHINode &);
150  bool visitLoadInst(LoadInst &);
151  bool visitStoreInst(StoreInst &);
152
153private:
154  Scatterer scatter(Instruction *, Value *);
155  void gather(Instruction *, const ValueVector &);
156  bool canTransferMetadata(unsigned Kind);
157  void transferMetadata(Instruction *, const ValueVector &);
158  bool getVectorLayout(Type *, unsigned, VectorLayout &);
159  bool finish();
160
161  template<typename T> bool splitBinary(Instruction &, const T &);
162
163  ScatterMap Scattered;
164  GatherList Gathered;
165  unsigned ParallelLoopAccessMDKind;
166  const DataLayout *DL;
167};
168
169char Scalarizer::ID = 0;
170} // end anonymous namespace
171
172// This is disabled by default because having separate loads and stores makes
173// it more likely that the -combiner-alias-analysis limits will be reached.
174static cl::opt<bool> ScalarizeLoadStore
175  ("scalarize-load-store", cl::Hidden, cl::init(false),
176   cl::desc("Allow the scalarizer pass to scalarize loads and store"));
177
178INITIALIZE_PASS(Scalarizer, "scalarizer", "Scalarize vector operations",
179                false, false)
180
181Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
182                     ValueVector *cachePtr)
183  : BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) {
184  Type *Ty = V->getType();
185  PtrTy = dyn_cast<PointerType>(Ty);
186  if (PtrTy)
187    Ty = PtrTy->getElementType();
188  Size = Ty->getVectorNumElements();
189  if (!CachePtr)
190    Tmp.resize(Size, nullptr);
191  else if (CachePtr->empty())
192    CachePtr->resize(Size, nullptr);
193  else
194    assert(Size == CachePtr->size() && "Inconsistent vector sizes");
195}
196
197// Return component I, creating a new Value for it if necessary.
198Value *Scatterer::operator[](unsigned I) {
199  ValueVector &CV = (CachePtr ? *CachePtr : Tmp);
200  // Try to reuse a previous value.
201  if (CV[I])
202    return CV[I];
203  IRBuilder<> Builder(BB, BBI);
204  if (PtrTy) {
205    if (!CV[0]) {
206      Type *Ty =
207        PointerType::get(PtrTy->getElementType()->getVectorElementType(),
208                         PtrTy->getAddressSpace());
209      CV[0] = Builder.CreateBitCast(V, Ty, V->getName() + ".i0");
210    }
211    if (I != 0)
212      CV[I] = Builder.CreateConstGEP1_32(CV[0], I,
213                                         V->getName() + ".i" + Twine(I));
214  } else {
215    // Search through a chain of InsertElementInsts looking for element I.
216    // Record other elements in the cache.  The new V is still suitable
217    // for all uncached indices.
218    for (;;) {
219      InsertElementInst *Insert = dyn_cast<InsertElementInst>(V);
220      if (!Insert)
221        break;
222      ConstantInt *Idx = dyn_cast<ConstantInt>(Insert->getOperand(2));
223      if (!Idx)
224        break;
225      unsigned J = Idx->getZExtValue();
226      CV[J] = Insert->getOperand(1);
227      V = Insert->getOperand(0);
228      if (I == J)
229        return CV[J];
230    }
231    CV[I] = Builder.CreateExtractElement(V, Builder.getInt32(I),
232                                         V->getName() + ".i" + Twine(I));
233  }
234  return CV[I];
235}
236
237bool Scalarizer::doInitialization(Module &M) {
238  ParallelLoopAccessMDKind =
239    M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
240  return false;
241}
242
243bool Scalarizer::runOnFunction(Function &F) {
244  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
245  DL = DLP ? &DLP->getDataLayout() : nullptr;
246  for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
247    BasicBlock *BB = BBI;
248    for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
249      Instruction *I = II;
250      bool Done = visit(I);
251      ++II;
252      if (Done && I->getType()->isVoidTy())
253        I->eraseFromParent();
254    }
255  }
256  return finish();
257}
258
259// Return a scattered form of V that can be accessed by Point.  V must be a
260// vector or a pointer to a vector.
261Scatterer Scalarizer::scatter(Instruction *Point, Value *V) {
262  if (Argument *VArg = dyn_cast<Argument>(V)) {
263    // Put the scattered form of arguments in the entry block,
264    // so that it can be used everywhere.
265    Function *F = VArg->getParent();
266    BasicBlock *BB = &F->getEntryBlock();
267    return Scatterer(BB, BB->begin(), V, &Scattered[V]);
268  }
269  if (Instruction *VOp = dyn_cast<Instruction>(V)) {
270    // Put the scattered form of an instruction directly after the
271    // instruction.
272    BasicBlock *BB = VOp->getParent();
273    return Scatterer(BB, std::next(BasicBlock::iterator(VOp)),
274                     V, &Scattered[V]);
275  }
276  // In the fallback case, just put the scattered before Point and
277  // keep the result local to Point.
278  return Scatterer(Point->getParent(), Point, V);
279}
280
281// Replace Op with the gathered form of the components in CV.  Defer the
282// deletion of Op and creation of the gathered form to the end of the pass,
283// so that we can avoid creating the gathered form if all uses of Op are
284// replaced with uses of CV.
285void Scalarizer::gather(Instruction *Op, const ValueVector &CV) {
286  // Since we're not deleting Op yet, stub out its operands, so that it
287  // doesn't make anything live unnecessarily.
288  for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I)
289    Op->setOperand(I, UndefValue::get(Op->getOperand(I)->getType()));
290
291  transferMetadata(Op, CV);
292
293  // If we already have a scattered form of Op (created from ExtractElements
294  // of Op itself), replace them with the new form.
295  ValueVector &SV = Scattered[Op];
296  if (!SV.empty()) {
297    for (unsigned I = 0, E = SV.size(); I != E; ++I) {
298      Instruction *Old = cast<Instruction>(SV[I]);
299      CV[I]->takeName(Old);
300      Old->replaceAllUsesWith(CV[I]);
301      Old->eraseFromParent();
302    }
303  }
304  SV = CV;
305  Gathered.push_back(GatherList::value_type(Op, &SV));
306}
307
308// Return true if it is safe to transfer the given metadata tag from
309// vector to scalar instructions.
310bool Scalarizer::canTransferMetadata(unsigned Tag) {
311  return (Tag == LLVMContext::MD_tbaa
312          || Tag == LLVMContext::MD_fpmath
313          || Tag == LLVMContext::MD_tbaa_struct
314          || Tag == LLVMContext::MD_invariant_load
315          || Tag == ParallelLoopAccessMDKind);
316}
317
318// Transfer metadata from Op to the instructions in CV if it is known
319// to be safe to do so.
320void Scalarizer::transferMetadata(Instruction *Op, const ValueVector &CV) {
321  SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
322  Op->getAllMetadataOtherThanDebugLoc(MDs);
323  for (unsigned I = 0, E = CV.size(); I != E; ++I) {
324    if (Instruction *New = dyn_cast<Instruction>(CV[I])) {
325      for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
326             MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI)
327        if (canTransferMetadata(MI->first))
328          New->setMetadata(MI->first, MI->second);
329      New->setDebugLoc(Op->getDebugLoc());
330    }
331  }
332}
333
334// Try to fill in Layout from Ty, returning true on success.  Alignment is
335// the alignment of the vector, or 0 if the ABI default should be used.
336bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment,
337                                 VectorLayout &Layout) {
338  if (!DL)
339    return false;
340
341  // Make sure we're dealing with a vector.
342  Layout.VecTy = dyn_cast<VectorType>(Ty);
343  if (!Layout.VecTy)
344    return false;
345
346  // Check that we're dealing with full-byte elements.
347  Layout.ElemTy = Layout.VecTy->getElementType();
348  if (DL->getTypeSizeInBits(Layout.ElemTy) !=
349      DL->getTypeStoreSizeInBits(Layout.ElemTy))
350    return false;
351
352  if (Alignment)
353    Layout.VecAlign = Alignment;
354  else
355    Layout.VecAlign = DL->getABITypeAlignment(Layout.VecTy);
356  Layout.ElemSize = DL->getTypeStoreSize(Layout.ElemTy);
357  return true;
358}
359
360// Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
361// to create an instruction like I with operands X and Y and name Name.
362template<typename Splitter>
363bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
364  VectorType *VT = dyn_cast<VectorType>(I.getType());
365  if (!VT)
366    return false;
367
368  unsigned NumElems = VT->getNumElements();
369  IRBuilder<> Builder(I.getParent(), &I);
370  Scatterer Op0 = scatter(&I, I.getOperand(0));
371  Scatterer Op1 = scatter(&I, I.getOperand(1));
372  assert(Op0.size() == NumElems && "Mismatched binary operation");
373  assert(Op1.size() == NumElems && "Mismatched binary operation");
374  ValueVector Res;
375  Res.resize(NumElems);
376  for (unsigned Elem = 0; Elem < NumElems; ++Elem)
377    Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem],
378                      I.getName() + ".i" + Twine(Elem));
379  gather(&I, Res);
380  return true;
381}
382
383bool Scalarizer::visitSelectInst(SelectInst &SI) {
384  VectorType *VT = dyn_cast<VectorType>(SI.getType());
385  if (!VT)
386    return false;
387
388  unsigned NumElems = VT->getNumElements();
389  IRBuilder<> Builder(SI.getParent(), &SI);
390  Scatterer Op1 = scatter(&SI, SI.getOperand(1));
391  Scatterer Op2 = scatter(&SI, SI.getOperand(2));
392  assert(Op1.size() == NumElems && "Mismatched select");
393  assert(Op2.size() == NumElems && "Mismatched select");
394  ValueVector Res;
395  Res.resize(NumElems);
396
397  if (SI.getOperand(0)->getType()->isVectorTy()) {
398    Scatterer Op0 = scatter(&SI, SI.getOperand(0));
399    assert(Op0.size() == NumElems && "Mismatched select");
400    for (unsigned I = 0; I < NumElems; ++I)
401      Res[I] = Builder.CreateSelect(Op0[I], Op1[I], Op2[I],
402                                    SI.getName() + ".i" + Twine(I));
403  } else {
404    Value *Op0 = SI.getOperand(0);
405    for (unsigned I = 0; I < NumElems; ++I)
406      Res[I] = Builder.CreateSelect(Op0, Op1[I], Op2[I],
407                                    SI.getName() + ".i" + Twine(I));
408  }
409  gather(&SI, Res);
410  return true;
411}
412
413bool Scalarizer::visitICmpInst(ICmpInst &ICI) {
414  return splitBinary(ICI, ICmpSplitter(ICI));
415}
416
417bool Scalarizer::visitFCmpInst(FCmpInst &FCI) {
418  return splitBinary(FCI, FCmpSplitter(FCI));
419}
420
421bool Scalarizer::visitBinaryOperator(BinaryOperator &BO) {
422  return splitBinary(BO, BinarySplitter(BO));
423}
424
425bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
426  VectorType *VT = dyn_cast<VectorType>(GEPI.getType());
427  if (!VT)
428    return false;
429
430  IRBuilder<> Builder(GEPI.getParent(), &GEPI);
431  unsigned NumElems = VT->getNumElements();
432  unsigned NumIndices = GEPI.getNumIndices();
433
434  Scatterer Base = scatter(&GEPI, GEPI.getOperand(0));
435
436  SmallVector<Scatterer, 8> Ops;
437  Ops.resize(NumIndices);
438  for (unsigned I = 0; I < NumIndices; ++I)
439    Ops[I] = scatter(&GEPI, GEPI.getOperand(I + 1));
440
441  ValueVector Res;
442  Res.resize(NumElems);
443  for (unsigned I = 0; I < NumElems; ++I) {
444    SmallVector<Value *, 8> Indices;
445    Indices.resize(NumIndices);
446    for (unsigned J = 0; J < NumIndices; ++J)
447      Indices[J] = Ops[J][I];
448    Res[I] = Builder.CreateGEP(Base[I], Indices,
449                               GEPI.getName() + ".i" + Twine(I));
450    if (GEPI.isInBounds())
451      if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I]))
452        NewGEPI->setIsInBounds();
453  }
454  gather(&GEPI, Res);
455  return true;
456}
457
458bool Scalarizer::visitCastInst(CastInst &CI) {
459  VectorType *VT = dyn_cast<VectorType>(CI.getDestTy());
460  if (!VT)
461    return false;
462
463  unsigned NumElems = VT->getNumElements();
464  IRBuilder<> Builder(CI.getParent(), &CI);
465  Scatterer Op0 = scatter(&CI, CI.getOperand(0));
466  assert(Op0.size() == NumElems && "Mismatched cast");
467  ValueVector Res;
468  Res.resize(NumElems);
469  for (unsigned I = 0; I < NumElems; ++I)
470    Res[I] = Builder.CreateCast(CI.getOpcode(), Op0[I], VT->getElementType(),
471                                CI.getName() + ".i" + Twine(I));
472  gather(&CI, Res);
473  return true;
474}
475
476bool Scalarizer::visitBitCastInst(BitCastInst &BCI) {
477  VectorType *DstVT = dyn_cast<VectorType>(BCI.getDestTy());
478  VectorType *SrcVT = dyn_cast<VectorType>(BCI.getSrcTy());
479  if (!DstVT || !SrcVT)
480    return false;
481
482  unsigned DstNumElems = DstVT->getNumElements();
483  unsigned SrcNumElems = SrcVT->getNumElements();
484  IRBuilder<> Builder(BCI.getParent(), &BCI);
485  Scatterer Op0 = scatter(&BCI, BCI.getOperand(0));
486  ValueVector Res;
487  Res.resize(DstNumElems);
488
489  if (DstNumElems == SrcNumElems) {
490    for (unsigned I = 0; I < DstNumElems; ++I)
491      Res[I] = Builder.CreateBitCast(Op0[I], DstVT->getElementType(),
492                                     BCI.getName() + ".i" + Twine(I));
493  } else if (DstNumElems > SrcNumElems) {
494    // <M x t1> -> <N*M x t2>.  Convert each t1 to <N x t2> and copy the
495    // individual elements to the destination.
496    unsigned FanOut = DstNumElems / SrcNumElems;
497    Type *MidTy = VectorType::get(DstVT->getElementType(), FanOut);
498    unsigned ResI = 0;
499    for (unsigned Op0I = 0; Op0I < SrcNumElems; ++Op0I) {
500      Value *V = Op0[Op0I];
501      Instruction *VI;
502      // Look through any existing bitcasts before converting to <N x t2>.
503      // In the best case, the resulting conversion might be a no-op.
504      while ((VI = dyn_cast<Instruction>(V)) &&
505             VI->getOpcode() == Instruction::BitCast)
506        V = VI->getOperand(0);
507      V = Builder.CreateBitCast(V, MidTy, V->getName() + ".cast");
508      Scatterer Mid = scatter(&BCI, V);
509      for (unsigned MidI = 0; MidI < FanOut; ++MidI)
510        Res[ResI++] = Mid[MidI];
511    }
512  } else {
513    // <N*M x t1> -> <M x t2>.  Convert each group of <N x t1> into a t2.
514    unsigned FanIn = SrcNumElems / DstNumElems;
515    Type *MidTy = VectorType::get(SrcVT->getElementType(), FanIn);
516    unsigned Op0I = 0;
517    for (unsigned ResI = 0; ResI < DstNumElems; ++ResI) {
518      Value *V = UndefValue::get(MidTy);
519      for (unsigned MidI = 0; MidI < FanIn; ++MidI)
520        V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI),
521                                        BCI.getName() + ".i" + Twine(ResI)
522                                        + ".upto" + Twine(MidI));
523      Res[ResI] = Builder.CreateBitCast(V, DstVT->getElementType(),
524                                        BCI.getName() + ".i" + Twine(ResI));
525    }
526  }
527  gather(&BCI, Res);
528  return true;
529}
530
531bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
532  VectorType *VT = dyn_cast<VectorType>(SVI.getType());
533  if (!VT)
534    return false;
535
536  unsigned NumElems = VT->getNumElements();
537  Scatterer Op0 = scatter(&SVI, SVI.getOperand(0));
538  Scatterer Op1 = scatter(&SVI, SVI.getOperand(1));
539  ValueVector Res;
540  Res.resize(NumElems);
541
542  for (unsigned I = 0; I < NumElems; ++I) {
543    int Selector = SVI.getMaskValue(I);
544    if (Selector < 0)
545      Res[I] = UndefValue::get(VT->getElementType());
546    else if (unsigned(Selector) < Op0.size())
547      Res[I] = Op0[Selector];
548    else
549      Res[I] = Op1[Selector - Op0.size()];
550  }
551  gather(&SVI, Res);
552  return true;
553}
554
555bool Scalarizer::visitPHINode(PHINode &PHI) {
556  VectorType *VT = dyn_cast<VectorType>(PHI.getType());
557  if (!VT)
558    return false;
559
560  unsigned NumElems = VT->getNumElements();
561  IRBuilder<> Builder(PHI.getParent(), &PHI);
562  ValueVector Res;
563  Res.resize(NumElems);
564
565  unsigned NumOps = PHI.getNumOperands();
566  for (unsigned I = 0; I < NumElems; ++I)
567    Res[I] = Builder.CreatePHI(VT->getElementType(), NumOps,
568                               PHI.getName() + ".i" + Twine(I));
569
570  for (unsigned I = 0; I < NumOps; ++I) {
571    Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I));
572    BasicBlock *IncomingBlock = PHI.getIncomingBlock(I);
573    for (unsigned J = 0; J < NumElems; ++J)
574      cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock);
575  }
576  gather(&PHI, Res);
577  return true;
578}
579
580bool Scalarizer::visitLoadInst(LoadInst &LI) {
581  if (!ScalarizeLoadStore)
582    return false;
583  if (!LI.isSimple())
584    return false;
585
586  VectorLayout Layout;
587  if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout))
588    return false;
589
590  unsigned NumElems = Layout.VecTy->getNumElements();
591  IRBuilder<> Builder(LI.getParent(), &LI);
592  Scatterer Ptr = scatter(&LI, LI.getPointerOperand());
593  ValueVector Res;
594  Res.resize(NumElems);
595
596  for (unsigned I = 0; I < NumElems; ++I)
597    Res[I] = Builder.CreateAlignedLoad(Ptr[I], Layout.getElemAlign(I),
598                                       LI.getName() + ".i" + Twine(I));
599  gather(&LI, Res);
600  return true;
601}
602
603bool Scalarizer::visitStoreInst(StoreInst &SI) {
604  if (!ScalarizeLoadStore)
605    return false;
606  if (!SI.isSimple())
607    return false;
608
609  VectorLayout Layout;
610  Value *FullValue = SI.getValueOperand();
611  if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout))
612    return false;
613
614  unsigned NumElems = Layout.VecTy->getNumElements();
615  IRBuilder<> Builder(SI.getParent(), &SI);
616  Scatterer Ptr = scatter(&SI, SI.getPointerOperand());
617  Scatterer Val = scatter(&SI, FullValue);
618
619  ValueVector Stores;
620  Stores.resize(NumElems);
621  for (unsigned I = 0; I < NumElems; ++I) {
622    unsigned Align = Layout.getElemAlign(I);
623    Stores[I] = Builder.CreateAlignedStore(Val[I], Ptr[I], Align);
624  }
625  transferMetadata(&SI, Stores);
626  return true;
627}
628
629// Delete the instructions that we scalarized.  If a full vector result
630// is still needed, recreate it using InsertElements.
631bool Scalarizer::finish() {
632  if (Gathered.empty())
633    return false;
634  for (GatherList::iterator GMI = Gathered.begin(), GME = Gathered.end();
635       GMI != GME; ++GMI) {
636    Instruction *Op = GMI->first;
637    ValueVector &CV = *GMI->second;
638    if (!Op->use_empty()) {
639      // The value is still needed, so recreate it using a series of
640      // InsertElements.
641      Type *Ty = Op->getType();
642      Value *Res = UndefValue::get(Ty);
643      BasicBlock *BB = Op->getParent();
644      unsigned Count = Ty->getVectorNumElements();
645      IRBuilder<> Builder(BB, Op);
646      if (isa<PHINode>(Op))
647        Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
648      for (unsigned I = 0; I < Count; ++I)
649        Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I),
650                                          Op->getName() + ".upto" + Twine(I));
651      Res->takeName(Op);
652      Op->replaceAllUsesWith(Res);
653    }
654    Op->eraseFromParent();
655  }
656  Gathered.clear();
657  Scattered.clear();
658  return true;
659}
660
661FunctionPass *llvm::createScalarizerPass() {
662  return new Scalarizer();
663}
664