SLPVectorizer.cpp revision 0c7f116bb6950ef819323d855415b2f2b0aad987
18383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem//===- SLPVectorizer.cpp - A bottom up SLP Vectorizer ---------------------===//
28383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem//
38383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem//                     The LLVM Compiler Infrastructure
48383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem//
58383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// This file is distributed under the University of Illinois Open Source
68383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// License. See LICENSE.TXT for details.
78383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem//
88383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem//===----------------------------------------------------------------------===//
98383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// This pass implements the Bottom Up SLP vectorizer. It detects consecutive
108383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// stores that can be put together into vector-stores. Next, it attempts to
118383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// construct vectorizable tree using the use-def chains. If a profitable tree
128383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// was found, the SLP vectorizer performs vectorization on the tree.
138383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem//
148383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// The pass is inspired by the work described in the paper:
158383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem//  "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks.
168383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem//
178383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem//===----------------------------------------------------------------------===//
188383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/Transforms/Vectorize.h"
193f75c6cfb575917c8c112b2de9593cb860f79e56Nadav Rotem#include "llvm/ADT/MapVector.h"
204c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar#include "llvm/ADT/Optional.h"
216959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem#include "llvm/ADT/PostOrderIterator.h"
2253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem#include "llvm/ADT/SetVector.h"
2337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/ADT/Statistic.h"
248383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/Analysis/AliasAnalysis.h"
25ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines#include "llvm/Analysis/AssumptionCache.h"
2637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/Analysis/CodeMetrics.h"
2736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/Analysis/LoopInfo.h"
288383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/Analysis/ScalarEvolution.h"
2953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem#include "llvm/Analysis/ScalarEvolutionExpressions.h"
308383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/Analysis/TargetTransformInfo.h"
316623d050c6f4351293bc1849e49bc0e37ec04596Benjamin Kramer#include "llvm/Analysis/ValueTracking.h"
328383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/IR/DataLayout.h"
3336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/IR/Dominators.h"
3436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/IR/IRBuilder.h"
358383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/IR/Instructions.h"
36f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem#include "llvm/IR/IntrinsicInst.h"
378383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/IR/Module.h"
38dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/IR/NoFolder.h"
398383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/IR/Type.h"
408383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/IR/Value.h"
4136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/IR/Verifier.h"
428383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/Pass.h"
438383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/Support/CommandLine.h"
448383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/Support/Debug.h"
458383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/Support/raw_ostream.h"
46dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/Transforms/Utils/VectorUtils.h"
4753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem#include <algorithm>
488383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include <map>
4937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include <memory>
508383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
518383b539ff4c039108ee0c202a27b787621d96cfNadav Rotemusing namespace llvm;
528383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
53dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define SV_NAME "slp-vectorizer"
54dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define DEBUG_TYPE "SLP"
55dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
5637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen HinesSTATISTIC(NumVectorInstructions, "Number of vector instructions generated");
5737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
588383b539ff4c039108ee0c202a27b787621d96cfNadav Rotemstatic cl::opt<int>
590b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem    SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
6008e20fbea18922e9f5f1cfb0ea7931c90006100eNadav Rotem                     cl::desc("Only vectorize if you gain more than this "
6108e20fbea18922e9f5f1cfb0ea7931c90006100eNadav Rotem                              "number "));
62a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
63a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighoferstatic cl::opt<bool>
64a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold SchwaighoferShouldVectorizeHor("slp-vectorize-hor", cl::init(false), cl::Hidden,
65a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer                   cl::desc("Attempt to vectorize horizontal reductions"));
66a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
679660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighoferstatic cl::opt<bool> ShouldStartVectorizeHorAtStore(
689660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer    "slp-vectorize-hor-store", cl::init(false), cl::Hidden,
699660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer    cl::desc(
709660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer        "Attempt to vectorize horizontal reductions feeding into a store"));
719660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer
728383b539ff4c039108ee0c202a27b787621d96cfNadav Rotemnamespace {
738383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
74b8f54d86f28f84103a5e8dff5d3f3a3b493aaaa7Craig Topperstatic const unsigned MinVecRegSize = 128;
7553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
7625961b469a9debe69b915bcb4fa49d35d2ee9544Nadav Rotemstatic const unsigned RecursionMaxDepth = 12;
7753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
78ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// Limit the number of alias checks. The limit is chosen so that
79ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// it has no negative effect on the llvm benchmarks.
80ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesstatic const unsigned AliasedCheckLimit = 10;
81ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
82ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// Another limit for the alias checks: The maximum distance between load/store
83ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// instructions where alias checks are done.
84ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// This limit is useful for very large basic blocks.
85ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesstatic const unsigned MaxMemDepDistance = 160;
86ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
87ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// \brief Predicate for the element types that the SLP vectorizer supports.
88ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines///
89ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// The most important thing to filter here are types which are invalid in LLVM
90ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// vectors. We also filter target specific types which have absolutely no
91ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// meaningful vectorization path such as x86_fp80 and ppc_f128. This just
92ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// avoids spending time checking the cost model and realizing that they will
93ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// be inevitably scalarized.
94ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesstatic bool isValidElementType(Type *Ty) {
95ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  return VectorType::isValidElementType(Ty) && !Ty->isX86_FP80Ty() &&
96ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines         !Ty->isPPC_FP128Ty();
97ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines}
98ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
99369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// \returns the parent basic block if all of the instructions in \p VL
100369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// are in the same block or null otherwise.
101369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemstatic BasicBlock *getSameBlock(ArrayRef<Value *> VL) {
102369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  Instruction *I0 = dyn_cast<Instruction>(VL[0]);
103369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (!I0)
104dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return nullptr;
105369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  BasicBlock *BB = I0->getParent();
106369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  for (int i = 1, e = VL.size(); i < e; i++) {
107369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    Instruction *I = dyn_cast<Instruction>(VL[i]);
108369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    if (!I)
109dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      return nullptr;
110369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
111369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    if (BB != I->getParent())
112dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      return nullptr;
113369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  }
114369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  return BB;
115369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
116369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
117369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// \returns True if all of the values in \p VL are constants.
118369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemstatic bool allConstant(ArrayRef<Value *> VL) {
119369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  for (unsigned i = 0, e = VL.size(); i < e; ++i)
120369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    if (!isa<Constant>(VL[i]))
121369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return false;
122369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  return true;
123369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
124369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
125369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// \returns True if all of the values in \p VL are identical.
126369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemstatic bool isSplat(ArrayRef<Value *> VL) {
127369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  for (unsigned i = 1, e = VL.size(); i < e; ++i)
128369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    if (VL[i] != VL[0])
129369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return false;
130369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  return true;
131369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
132369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
133c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines///\returns Opcode that can be clubbed with \p Op to create an alternate
134c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines/// sequence which can later be merged as a ShuffleVector instruction.
135c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hinesstatic unsigned getAltOpcode(unsigned Op) {
136c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  switch (Op) {
137c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  case Instruction::FAdd:
138c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    return Instruction::FSub;
139c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  case Instruction::FSub:
140c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    return Instruction::FAdd;
141c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  case Instruction::Add:
142c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    return Instruction::Sub;
143c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  case Instruction::Sub:
144c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    return Instruction::Add;
145c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  default:
146c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    return 0;
147c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  }
148c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines}
149c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
150c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines///\returns bool representing if Opcode \p Op can be part
151c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines/// of an alternate sequence which can later be merged as
152c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines/// a ShuffleVector instruction.
153c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hinesstatic bool canCombineAsAltInst(unsigned Op) {
154c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  if (Op == Instruction::FAdd || Op == Instruction::FSub ||
155c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Op == Instruction::Sub || Op == Instruction::Add)
156c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    return true;
157c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  return false;
158c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines}
159c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
160c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines/// \returns ShuffleVector instruction if intructions in \p VL have
161c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines///  alternate fadd,fsub / fsub,fadd/add,sub/sub,add sequence.
162c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines/// (i.e. e.g. opcodes of fadd,fsub,fadd,fsub...)
163c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hinesstatic unsigned isAltInst(ArrayRef<Value *> VL) {
164c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  Instruction *I0 = dyn_cast<Instruction>(VL[0]);
165c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  unsigned Opcode = I0->getOpcode();
166c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  unsigned AltOpcode = getAltOpcode(Opcode);
167c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  for (int i = 1, e = VL.size(); i < e; i++) {
168c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    Instruction *I = dyn_cast<Instruction>(VL[i]);
169c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    if (!I || I->getOpcode() != ((i & 1) ? AltOpcode : Opcode))
170c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      return 0;
171c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  }
172c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  return Instruction::ShuffleVector;
173c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines}
174c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
175369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// \returns The opcode if all of the Instructions in \p VL have the same
176369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// opcode, or zero.
177369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemstatic unsigned getSameOpcode(ArrayRef<Value *> VL) {
178369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  Instruction *I0 = dyn_cast<Instruction>(VL[0]);
179369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (!I0)
180369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    return 0;
181369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  unsigned Opcode = I0->getOpcode();
182369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  for (int i = 1, e = VL.size(); i < e; i++) {
183369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    Instruction *I = dyn_cast<Instruction>(VL[i]);
184c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    if (!I || Opcode != I->getOpcode()) {
185c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      if (canCombineAsAltInst(Opcode) && i == 1)
186c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        return isAltInst(VL);
187369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return 0;
188c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    }
189369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  }
190369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  return Opcode;
191369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
192369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
19337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// Get the intersection (logical and) of all of the potential IR flags
19437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// of each scalar operation (VL) that will be converted into a vector (I).
19537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// Flag set: NSW, NUW, exact, and all of fast-math.
19637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesstatic void propagateIRFlags(Value *I, ArrayRef<Value *> VL) {
19737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  if (auto *VecOp = dyn_cast<BinaryOperator>(I)) {
19837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (auto *Intersection = dyn_cast<BinaryOperator>(VL[0])) {
19937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // Intersection is initialized to the 0th scalar,
20037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // so start counting from index '1'.
20137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      for (int i = 1, e = VL.size(); i < e; ++i) {
20237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        if (auto *Scalar = dyn_cast<BinaryOperator>(VL[i]))
20337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          Intersection->andIRFlags(Scalar);
20437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
20537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      VecOp->copyIRFlags(Intersection);
20637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
20737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
20837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
20937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
210fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling/// \returns \p I after propagating metadata from \p VL.
211fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendlingstatic Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL) {
212fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling  Instruction *I0 = cast<Instruction>(VL[0]);
213fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling  SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
214fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling  I0->getAllMetadataOtherThanDebugLoc(Metadata);
215fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling
216fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling  for (unsigned i = 0, n = Metadata.size(); i != n; ++i) {
217fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling    unsigned Kind = Metadata[i].first;
218fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling    MDNode *MD = Metadata[i].second;
219fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling
220fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling    for (int i = 1, e = VL.size(); MD && i != e; i++) {
221fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling      Instruction *I = cast<Instruction>(VL[i]);
222fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling      MDNode *IMD = I->getMetadata(Kind);
223fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling
224fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling      switch (Kind) {
225fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling      default:
226dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        MD = nullptr; // Remove unknown metadata
227fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling        break;
228fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling      case LLVMContext::MD_tbaa:
229fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling        MD = MDNode::getMostGenericTBAA(MD, IMD);
230fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling        break;
23137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      case LLVMContext::MD_alias_scope:
232ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        MD = MDNode::getMostGenericAliasScope(MD, IMD);
233ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        break;
23437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      case LLVMContext::MD_noalias:
23537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        MD = MDNode::intersect(MD, IMD);
23637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        break;
237fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling      case LLVMContext::MD_fpmath:
238fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling        MD = MDNode::getMostGenericFPMath(MD, IMD);
239fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling        break;
240fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling      }
241fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling    }
242fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling    I->setMetadata(Kind, MD);
243fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling  }
244fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling  return I;
245fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling}
246fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling
247369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// \returns The type that all of the values in \p VL have or null if there
248369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// are different types.
249369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemstatic Type* getSameType(ArrayRef<Value *> VL) {
250369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  Type *Ty = VL[0]->getType();
251369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  for (int i = 1, e = VL.size(); i < e; i++)
25230bbf070a2683fc95c105ad78f921ca59c56bb35Nadav Rotem    if (VL[i]->getType() != Ty)
253dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      return nullptr;
254369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
255369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  return Ty;
256369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
257369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
258369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// \returns True if the ExtractElement instructions in VL can be vectorized
259369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// to use the original vector.
260369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemstatic bool CanReuseExtract(ArrayRef<Value *> VL) {
261369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  assert(Instruction::ExtractElement == getSameOpcode(VL) && "Invalid opcode");
262369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // Check if all of the extracts come from the same vector and from the
263369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // correct offset.
264369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  Value *VL0 = VL[0];
265369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  ExtractElementInst *E0 = cast<ExtractElementInst>(VL0);
266369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  Value *Vec = E0->getOperand(0);
267369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
268369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // We have to extract from the same vector type.
269369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  unsigned NElts = Vec->getType()->getVectorNumElements();
270369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
271369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (NElts != VL.size())
272369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    return false;
273369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
274369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // Check that all of the indices extract from the correct offset.
275369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  ConstantInt *CI = dyn_cast<ConstantInt>(E0->getOperand(1));
276369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (!CI || CI->getZExtValue())
277369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    return false;
278369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
279369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  for (unsigned i = 1, e = VL.size(); i < e; ++i) {
280369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    ExtractElementInst *E = cast<ExtractElementInst>(VL[i]);
281369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    ConstantInt *CI = dyn_cast<ConstantInt>(E->getOperand(1));
282369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
283369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    if (!CI || CI->getZExtValue() != i || E->getOperand(0) != Vec)
284369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return false;
285369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  }
286369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
287369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  return true;
288369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
289369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
29037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// \returns True if in-tree use also needs extract. This refers to
29137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// possible scalar operand in vectorized instruction.
29237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesstatic bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
29337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                                    TargetLibraryInfo *TLI) {
29437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
29537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  unsigned Opcode = UserInst->getOpcode();
29637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  switch (Opcode) {
29737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case Instruction::Load: {
29837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    LoadInst *LI = cast<LoadInst>(UserInst);
29937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    return (LI->getPointerOperand() == Scalar);
30037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
30137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case Instruction::Store: {
30237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    StoreInst *SI = cast<StoreInst>(UserInst);
30337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    return (SI->getPointerOperand() == Scalar);
30437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
30537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case Instruction::Call: {
30637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    CallInst *CI = cast<CallInst>(UserInst);
30737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
30837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (hasVectorInstrinsicScalarOpd(ID, 1)) {
30937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      return (CI->getArgOperand(1) == Scalar);
31037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
31137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
31237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  default:
31337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    return false;
31437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
31537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
31637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
317ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// \returns the AA location that is being access by the instruction.
318ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesstatic AliasAnalysis::Location getLocation(Instruction *I, AliasAnalysis *AA) {
319ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  if (StoreInst *SI = dyn_cast<StoreInst>(I))
320ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    return AA->getLocation(SI);
321ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  if (LoadInst *LI = dyn_cast<LoadInst>(I))
322ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    return AA->getLocation(LI);
323ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  return AliasAnalysis::Location();
324ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines}
325ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
326ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// \returns True if the instruction is not a volatile or atomic load/store.
327ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesstatic bool isSimple(Instruction *I) {
328ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  if (LoadInst *LI = dyn_cast<LoadInst>(I))
329ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    return LI->isSimple();
330ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  if (StoreInst *SI = dyn_cast<StoreInst>(I))
331ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    return SI->isSimple();
332ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
333ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    return !MI->isVolatile();
334ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  return true;
335ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines}
336ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
337369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// Bottom Up SLP Vectorizer.
338369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemclass BoUpSLP {
339369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotempublic:
34053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  typedef SmallVector<Value *, 8> ValueList;
34153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  typedef SmallVector<Instruction *, 16> InstrList;
34253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  typedef SmallPtrSet<Value *, 16> ValueSet;
34353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  typedef SmallVector<StoreInst *, 8> StoreList;
34453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
3454c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti,
3464c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar          TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li,
3474c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar          DominatorTree *Dt, AssumptionCache *AC)
348ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      : NumLoadsWantToKeepOrder(0), NumLoadsWantToChangeOrder(0), F(Func),
3494c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar        SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt),
35037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        Builder(Se->getContext()) {
351ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    CodeMetrics::collectEphemeralValues(F, AC, EphValues);
35237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
35353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
35453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  /// \brief Vectorize the tree that starts with the elements in \p VL.
355a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  /// Returns the vectorized root.
356a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  Value *vectorizeTree();
35753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
35837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// \returns the cost incurred by unwanted spills and fills, caused by
35937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// holding live values over call sites.
36037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  int getSpillCost();
36137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
36253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  /// \returns the vectorization cost of the subtree that starts at \p VL.
36353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  /// A negative number means that this is profitable.
364369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  int getTreeCost();
365369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
366dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  /// Construct a vectorizable tree that starts at \p Roots, ignoring users for
367dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  /// the purpose of scheduling and extraction in the \p UserIgnoreLst.
368dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  void buildTree(ArrayRef<Value *> Roots,
369dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                 ArrayRef<Value *> UserIgnoreLst = None);
370369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
371369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  /// Clear the internal data structures that are created by 'buildTree'.
372369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  void deleteTree() {
373369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    VectorizableTree.clear();
374369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    ScalarToTreeEntry.clear();
375369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    MustGather.clear();
376a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    ExternalUses.clear();
37737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    NumLoadsWantToKeepOrder = 0;
37837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    NumLoadsWantToChangeOrder = 0;
37937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    for (auto &Iter : BlocksSchedules) {
38037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      BlockScheduling *BS = Iter.second.get();
38137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      BS->clear();
38237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
383369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  }
38453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
385369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  /// \returns true if the memory operations A and B are consecutive.
3864c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL);
387369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
388369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  /// \brief Perform LICM and CSE on the newly generated gather sequences.
389369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  void optimizeGatherSequence();
390c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
39137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// \returns true if it is benefitial to reverse the vector order.
39237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  bool shouldReorder() const {
39337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    return NumLoadsWantToChangeOrder > NumLoadsWantToKeepOrder;
39437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
39537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
396369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemprivate:
397369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  struct TreeEntry;
39853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
399369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  /// \returns the cost of the vectorizable entry.
400369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  int getEntryCost(TreeEntry *E);
40153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
402369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  /// This is the recursive part of buildTree.
403369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth);
40453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
40562657090de3a5731bf644437701ccd78c247119fNadav Rotem  /// Vectorize a single entry in the tree.
406369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  Value *vectorizeTree(TreeEntry *E);
407369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
40862657090de3a5731bf644437701ccd78c247119fNadav Rotem  /// Vectorize a single entry in the tree, starting in \p VL.
409369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  Value *vectorizeTree(ArrayRef<Value *> VL);
41053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
41162657090de3a5731bf644437701ccd78c247119fNadav Rotem  /// \returns the pointer to the vectorized value if \p VL is already
41262657090de3a5731bf644437701ccd78c247119fNadav Rotem  /// vectorized, or NULL. They may happen in cycles.
4136a804acc4ae77c014e4ef97c37f8e720ef360394Matt Arsenault  Value *alreadyVectorized(ArrayRef<Value *> VL) const;
41462657090de3a5731bf644437701ccd78c247119fNadav Rotem
415369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  /// \brief Take the pointer operand from the Load/Store instruction.
416369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  /// \returns NULL if this is not a valid Load/Store instruction.
417369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  static Value *getPointerOperand(Value *I);
41853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
419369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  /// \brief Take the address space operand from the Load/Store instruction.
420369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  /// \returns -1 if this is not a valid Load/Store instruction.
421369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  static unsigned getAddressSpaceOperand(Value *I);
42253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
42353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  /// \returns the scalarization cost for this type. Scalarization in this
42453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  /// context means the creation of vectors from a group of scalars.
42553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  int getGatherCost(Type *Ty);
42653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
427d6f0c34273dd3536102f2d643403252468dfc4a3Nadav Rotem  /// \returns the scalarization cost for this list of values. Assuming that
428d6f0c34273dd3536102f2d643403252468dfc4a3Nadav Rotem  /// this subtree gets vectorized, we may need to extract the values from the
429d6f0c34273dd3536102f2d643403252468dfc4a3Nadav Rotem  /// roots. This method calculates the cost of extracting the values.
430d6f0c34273dd3536102f2d643403252468dfc4a3Nadav Rotem  int getGatherCost(ArrayRef<Value *> VL);
431d6f0c34273dd3536102f2d643403252468dfc4a3Nadav Rotem
4324b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault  /// \brief Set the Builder insert point to one after the last instruction in
4334b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault  /// the bundle
4344b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault  void setInsertPointAfterBundle(ArrayRef<Value *> VL);
4354b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault
43653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  /// \returns a vector from a collection of scalars in \p VL.
43753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  Value *Gather(ArrayRef<Value *> VL, VectorType *Ty);
43853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
439d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang  /// \returns whether the VectorizableTree is fully vectoriable and will
440d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang  /// be beneficial even the tree height is tiny.
44136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  bool isFullyVectorizableTinyTree();
442d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang
443ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// \reorder commutative operands in alt shuffle if they result in
444ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  ///  vectorized code.
445ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  void reorderAltShuffleOperands(ArrayRef<Value *> VL,
446ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                                 SmallVectorImpl<Value *> &Left,
447ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                                 SmallVectorImpl<Value *> &Right);
448ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// \reorder commutative operands to get better probability of
449ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// generating vectorized code.
450ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
451ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                                      SmallVectorImpl<Value *> &Left,
452ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                                      SmallVectorImpl<Value *> &Right);
453369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  struct TreeEntry {
45437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    TreeEntry() : Scalars(), VectorizedValue(nullptr),
455369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    NeedToGather(0) {}
45653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
457369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    /// \returns true if the scalars in VL are equal to this entry.
4586a804acc4ae77c014e4ef97c37f8e720ef360394Matt Arsenault    bool isSame(ArrayRef<Value *> VL) const {
459369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      assert(VL.size() == Scalars.size() && "Invalid size");
4606623d050c6f4351293bc1849e49bc0e37ec04596Benjamin Kramer      return std::equal(VL.begin(), VL.end(), Scalars.begin());
461369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
462369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
463369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    /// A vector of scalars.
464369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    ValueList Scalars;
465369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
466369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    /// The Scalars are vectorized into this value. It is initialized to Null.
467369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    Value *VectorizedValue;
468369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
469369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    /// Do we need to gather this sequence ?
470369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    bool NeedToGather;
471369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  };
47253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
473369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  /// Create a new VectorizableTree entry.
474369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  TreeEntry *newTreeEntry(ArrayRef<Value *> VL, bool Vectorized) {
475369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    VectorizableTree.push_back(TreeEntry());
476369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    int idx = VectorizableTree.size() - 1;
477369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    TreeEntry *Last = &VectorizableTree[idx];
478369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end());
479369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    Last->NeedToGather = !Vectorized;
480369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    if (Vectorized) {
481369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      for (int i = 0, e = VL.size(); i != e; ++i) {
482369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        assert(!ScalarToTreeEntry.count(VL[i]) && "Scalar already in tree!");
483369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        ScalarToTreeEntry[VL[i]] = idx;
484369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
485369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    } else {
486369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      MustGather.insert(VL.begin(), VL.end());
487369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
488369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    return Last;
489ac2678684689448d1c88f644f3fa88aa4f93997cNadav Rotem  }
49037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
49153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  /// -- Vectorization State --
492369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  /// Holds all of the tree entries.
493369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  std::vector<TreeEntry> VectorizableTree;
49453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
495369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  /// Maps a specific scalar to its tree entry.
496369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  SmallDenseMap<Value*, int> ScalarToTreeEntry;
49753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
498369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  /// A list of scalars that we found that we need to keep as scalars.
49953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  ValueSet MustGather;
50053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
501a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  /// This POD struct describes one external user in the vectorized tree.
502a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  struct ExternalUser {
503a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    ExternalUser (Value *S, llvm::User *U, int L) :
504a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem      Scalar(S), User(U), Lane(L){};
505a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    // Which scalar in our function.
506a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    Value *Scalar;
507a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    // Which user that uses the scalar.
508a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    llvm::User *User;
509a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    // Which lane does the scalar belong to.
510a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    int Lane;
511a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  };
512a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  typedef SmallVector<ExternalUser, 16> UserList;
513a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
514ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// Checks if two instructions may access the same memory.
515ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  ///
516ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// \p Loc1 is the location of \p Inst1. It is passed explicitly because it
517ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// is invariant in the calling loop.
518ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  bool isAliased(const AliasAnalysis::Location &Loc1, Instruction *Inst1,
519ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                 Instruction *Inst2) {
520ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
521ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    // First check if the result is already in the cache.
522ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    AliasCacheKey key = std::make_pair(Inst1, Inst2);
523ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    Optional<bool> &result = AliasCache[key];
524ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if (result.hasValue()) {
525ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      return result.getValue();
526ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    }
527ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    AliasAnalysis::Location Loc2 = getLocation(Inst2, AA);
528ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    bool aliased = true;
529ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if (Loc1.Ptr && Loc2.Ptr && isSimple(Inst1) && isSimple(Inst2)) {
530ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      // Do the alias check.
531ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      aliased = AA->alias(Loc1, Loc2);
532ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    }
533ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    // Store the result in the cache.
534ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    result = aliased;
535ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    return aliased;
536ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  }
537ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
538ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  typedef std::pair<Instruction *, Instruction *> AliasCacheKey;
539ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
540ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// Cache for alias results.
541ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// TODO: consider moving this to the AliasAnalysis itself.
542ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  DenseMap<AliasCacheKey, Optional<bool>> AliasCache;
543ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
544ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// Removes an instruction from its block and eventually deletes it.
545ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// It's like Instruction::eraseFromParent() except that the actual deletion
546ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// is delayed until BoUpSLP is destructed.
547ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// This is required to ensure that there are no incorrect collisions in the
548ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// AliasCache, which can happen if a new instruction is allocated at the
549ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// same address as a previously deleted instruction.
550ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  void eraseInstruction(Instruction *I) {
551ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    I->removeFromParent();
552ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    I->dropAllReferences();
553ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    DeletedInstructions.push_back(std::unique_ptr<Instruction>(I));
554ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  }
555ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
556ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// Temporary store for deleted instructions. Instructions will be deleted
557ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// eventually when the BoUpSLP is destructed.
558ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  SmallVector<std::unique_ptr<Instruction>, 8> DeletedInstructions;
559ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
560a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  /// A list of values that need to extracted out of the tree.
561a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  /// This list holds pairs of (Internal Scalar : External User).
562a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  UserList ExternalUses;
563a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
56437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// Values used only by @llvm.assume calls.
56537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  SmallPtrSet<const Value *, 32> EphValues;
56653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
56753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  /// Holds all of the instructions that we gathered.
56853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  SetVector<Instruction *> GatherSeq;
569a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling  /// A list of blocks that we are going to CSE.
57036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  SetVector<BasicBlock *> CSEBlocks;
57153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
57237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// Contains all scheduling relevant data for an instruction.
57337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// A ScheduleData either represents a single instruction or a member of an
57437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// instruction bundle (= a group of instructions which is combined into a
57537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// vector instruction).
57637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  struct ScheduleData {
57737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
57837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // The initial value for the dependency counters. It means that the
57937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // dependencies are not calculated yet.
58037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    enum { InvalidDeps = -1 };
58137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
58237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData()
58337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        : Inst(nullptr), FirstInBundle(nullptr), NextInBundle(nullptr),
58437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          NextLoadStore(nullptr), SchedulingRegionID(0), SchedulingPriority(0),
58537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          Dependencies(InvalidDeps), UnscheduledDeps(InvalidDeps),
58637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          UnscheduledDepsInBundle(InvalidDeps), IsScheduled(false) {}
58737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
58837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    void init(int BlockSchedulingRegionID) {
58937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      FirstInBundle = this;
59037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      NextInBundle = nullptr;
59137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      NextLoadStore = nullptr;
59237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      IsScheduled = false;
59337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      SchedulingRegionID = BlockSchedulingRegionID;
59437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      UnscheduledDepsInBundle = UnscheduledDeps;
59537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      clearDependencies();
59637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
59737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
59837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Returns true if the dependency information has been calculated.
59937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    bool hasValidDependencies() const { return Dependencies != InvalidDeps; }
60037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
60137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Returns true for single instructions and for bundle representatives
60237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// (= the head of a bundle).
60337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    bool isSchedulingEntity() const { return FirstInBundle == this; }
60437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
60537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Returns true if it represents an instruction bundle and not only a
60637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// single instruction.
60737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    bool isPartOfBundle() const {
60837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      return NextInBundle != nullptr || FirstInBundle != this;
60937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
61037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
61137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Returns true if it is ready for scheduling, i.e. it has no more
61237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// unscheduled depending instructions/bundles.
61337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    bool isReady() const {
61437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      assert(isSchedulingEntity() &&
61537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines             "can't consider non-scheduling entity for ready list");
61637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      return UnscheduledDepsInBundle == 0 && !IsScheduled;
61737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
61837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
61937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Modifies the number of unscheduled dependencies, also updating it for
62037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// the whole bundle.
62137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    int incrementUnscheduledDeps(int Incr) {
62237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      UnscheduledDeps += Incr;
62337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      return FirstInBundle->UnscheduledDepsInBundle += Incr;
62437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
62537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
62637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Sets the number of unscheduled dependencies to the number of
62737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// dependencies.
62837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    void resetUnscheduledDeps() {
62937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      incrementUnscheduledDeps(Dependencies - UnscheduledDeps);
63037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
63137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
63237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Clears all dependency information.
63337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    void clearDependencies() {
63437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      Dependencies = InvalidDeps;
63537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      resetUnscheduledDeps();
63637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      MemoryDependencies.clear();
63737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
63837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
63937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    void dump(raw_ostream &os) const {
64037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (!isSchedulingEntity()) {
64137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        os << "/ " << *Inst;
64237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      } else if (NextInBundle) {
64337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        os << '[' << *Inst;
64437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        ScheduleData *SD = NextInBundle;
64537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        while (SD) {
64637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          os << ';' << *SD->Inst;
64737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          SD = SD->NextInBundle;
64837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        }
64937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        os << ']';
65037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      } else {
65137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        os << *Inst;
65237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
65337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
65453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
65537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    Instruction *Inst;
65637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
65737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Points to the head in an instruction bundle (and always to this for
65837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// single instructions).
65937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *FirstInBundle;
66037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
66137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Single linked list of all instructions in a bundle. Null if it is a
66237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// single instruction.
66337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *NextInBundle;
66437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
66537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Single linked list of all memory instructions (e.g. load, store, call)
66637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// in the block - until the end of the scheduling region.
66737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *NextLoadStore;
66837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
66937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// The dependent memory instructions.
67037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// This list is derived on demand in calculateDependencies().
67137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    SmallVector<ScheduleData *, 4> MemoryDependencies;
67237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
67337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// This ScheduleData is in the current scheduling region if this matches
67437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// the current SchedulingRegionID of BlockScheduling.
67537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    int SchedulingRegionID;
67637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
67737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Used for getting a "good" final ordering of instructions.
67837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    int SchedulingPriority;
67937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
68037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// The number of dependencies. Constitutes of the number of users of the
68137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// instruction plus the number of dependent memory instructions (if any).
68237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// This value is calculated on demand.
68337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// If InvalidDeps, the number of dependencies is not calculated yet.
68437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ///
68537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    int Dependencies;
68637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
68737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// The number of dependencies minus the number of dependencies of scheduled
68837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// instructions. As soon as this is zero, the instruction/bundle gets ready
68937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// for scheduling.
69037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Note that this is negative as long as Dependencies is not calculated.
69137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    int UnscheduledDeps;
69237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
69337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// The sum of UnscheduledDeps in a bundle. Equals to UnscheduledDeps for
69437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// single instructions.
69537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    int UnscheduledDepsInBundle;
69637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
69737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// True if this instruction is scheduled (or considered as scheduled in the
69837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// dry-run).
69937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    bool IsScheduled;
70037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  };
70137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
70237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#ifndef NDEBUG
70337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  friend raw_ostream &operator<<(raw_ostream &os,
70437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                                 const BoUpSLP::ScheduleData &SD);
70537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#endif
70637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
70737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// Contains all scheduling data for a basic block.
70837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  ///
70937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  struct BlockScheduling {
71037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
71137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    BlockScheduling(BasicBlock *BB)
71237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        : BB(BB), ChunkSize(BB->size()), ChunkPos(ChunkSize),
71337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          ScheduleStart(nullptr), ScheduleEnd(nullptr),
71437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          FirstLoadStoreInRegion(nullptr), LastLoadStoreInRegion(nullptr),
71537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          // Make sure that the initial SchedulingRegionID is greater than the
71637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          // initial SchedulingRegionID in ScheduleData (which is 0).
71737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          SchedulingRegionID(1) {}
71837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
71937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    void clear() {
72037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ReadyInsts.clear();
72137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ScheduleStart = nullptr;
72237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ScheduleEnd = nullptr;
72337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      FirstLoadStoreInRegion = nullptr;
72437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      LastLoadStoreInRegion = nullptr;
72537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
72637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // Make a new scheduling region, i.e. all existing ScheduleData is not
72737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // in the new region yet.
72837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ++SchedulingRegionID;
72937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
73037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
73137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *getScheduleData(Value *V) {
73237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ScheduleData *SD = ScheduleDataMap[V];
73337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (SD && SD->SchedulingRegionID == SchedulingRegionID)
73437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        return SD;
73537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      return nullptr;
73637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
73737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
73837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    bool isInSchedulingRegion(ScheduleData *SD) {
73937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      return SD->SchedulingRegionID == SchedulingRegionID;
74037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
74137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
74237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Marks an instruction as scheduled and puts all dependent ready
74337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// instructions into the ready-list.
74437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    template <typename ReadyListType>
74537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    void schedule(ScheduleData *SD, ReadyListType &ReadyList) {
74637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      SD->IsScheduled = true;
74737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      DEBUG(dbgs() << "SLP:   schedule " << *SD << "\n");
74837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
74937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ScheduleData *BundleMember = SD;
75037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      while (BundleMember) {
75137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        // Handle the def-use chain dependencies.
75237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        for (Use &U : BundleMember->Inst->operands()) {
75337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          ScheduleData *OpDef = getScheduleData(U.get());
75437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          if (OpDef && OpDef->hasValidDependencies() &&
75537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines              OpDef->incrementUnscheduledDeps(-1) == 0) {
75637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            // There are no more unscheduled dependencies after decrementing,
75737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            // so we can put the dependent instruction into the ready list.
75837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            ScheduleData *DepBundle = OpDef->FirstInBundle;
75937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            assert(!DepBundle->IsScheduled &&
76037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                   "already scheduled bundle gets ready");
76137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            ReadyList.insert(DepBundle);
76237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            DEBUG(dbgs() << "SLP:    gets ready (def): " << *DepBundle << "\n");
76337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          }
76437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        }
76537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        // Handle the memory dependencies.
76637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        for (ScheduleData *MemoryDepSD : BundleMember->MemoryDependencies) {
76737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          if (MemoryDepSD->incrementUnscheduledDeps(-1) == 0) {
76837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            // There are no more unscheduled dependencies after decrementing,
76937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            // so we can put the dependent instruction into the ready list.
77037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            ScheduleData *DepBundle = MemoryDepSD->FirstInBundle;
77137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            assert(!DepBundle->IsScheduled &&
77237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                   "already scheduled bundle gets ready");
77337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            ReadyList.insert(DepBundle);
77437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            DEBUG(dbgs() << "SLP:    gets ready (mem): " << *DepBundle << "\n");
77537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          }
77637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        }
77737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        BundleMember = BundleMember->NextInBundle;
77837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
77937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
78037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
78137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Put all instructions into the ReadyList which are ready for scheduling.
78237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    template <typename ReadyListType>
78337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    void initialFillReadyList(ReadyListType &ReadyList) {
78437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) {
78537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        ScheduleData *SD = getScheduleData(I);
78637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        if (SD->isSchedulingEntity() && SD->isReady()) {
78737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          ReadyList.insert(SD);
78837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          DEBUG(dbgs() << "SLP:    initially in ready list: " << *I << "\n");
78937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        }
79037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
79137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
79237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
79337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Checks if a bundle of instructions can be scheduled, i.e. has no
79437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// cyclic dependencies. This is only a dry-run, no instructions are
79537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// actually moved at this stage.
796ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    bool tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP);
79737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
79837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Un-bundles a group of instructions.
79937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    void cancelScheduling(ArrayRef<Value *> VL);
80037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
80137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Extends the scheduling region so that V is inside the region.
80237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    void extendSchedulingRegion(Value *V);
80337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
80437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Initialize the ScheduleData structures for new instructions in the
80537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// scheduling region.
80637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    void initScheduleData(Instruction *FromI, Instruction *ToI,
80737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                          ScheduleData *PrevLoadStore,
80837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                          ScheduleData *NextLoadStore);
80937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
81037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Updates the dependency information of a bundle and of all instructions/
81137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// bundles which depend on the original bundle.
81237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    void calculateDependencies(ScheduleData *SD, bool InsertInReadyList,
813ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                               BoUpSLP *SLP);
81437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
81537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Sets all instruction in the scheduling region to un-scheduled.
81637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    void resetSchedule();
81737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
81837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    BasicBlock *BB;
81937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
82037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Simple memory allocation for ScheduleData.
82137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    std::vector<std::unique_ptr<ScheduleData[]>> ScheduleDataChunks;
82237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
82337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// The size of a ScheduleData array in ScheduleDataChunks.
82437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    int ChunkSize;
82537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
82637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// The allocator position in the current chunk, which is the last entry
82737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// of ScheduleDataChunks.
82837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    int ChunkPos;
82937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
83037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Attaches ScheduleData to Instruction.
83137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Note that the mapping survives during all vectorization iterations, i.e.
83237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// ScheduleData structures are recycled.
83337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    DenseMap<Value *, ScheduleData *> ScheduleDataMap;
83437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
83537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    struct ReadyList : SmallVector<ScheduleData *, 8> {
83637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      void insert(ScheduleData *SD) { push_back(SD); }
83737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    };
83837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
83937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// The ready-list for scheduling (only used for the dry-run).
84037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ReadyList ReadyInsts;
84137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
84237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// The first instruction of the scheduling region.
84337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    Instruction *ScheduleStart;
84437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
84537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// The first instruction _after_ the scheduling region.
84637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    Instruction *ScheduleEnd;
84737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
84837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// The first memory accessing instruction in the scheduling region
84937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// (can be null).
85037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *FirstLoadStoreInRegion;
85137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
85237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// The last memory accessing instruction in the scheduling region
85337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// (can be null).
85437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *LastLoadStoreInRegion;
85537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
85637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// The ID of the scheduling region. For a new vectorization iteration this
85737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// is incremented which "removes" all ScheduleData from the region.
85837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    int SchedulingRegionID;
85937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  };
86037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
86137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// Attaches the BlockScheduling structures to basic blocks.
862ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  MapVector<BasicBlock *, std::unique_ptr<BlockScheduling>> BlocksSchedules;
86337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
86437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// Performs the "real" scheduling. Done before vectorization is actually
86537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// performed in a basic block.
86637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  void scheduleBlock(BlockScheduling *BS);
867dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
868dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  /// List of users to ignore during scheduling and that don't need extracting.
869dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  ArrayRef<Value *> UserIgnoreList;
870a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
87137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // Number of load-bundles, which contain consecutive loads.
87237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  int NumLoadsWantToKeepOrder;
87337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
87437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // Number of load-bundles of size 2, which are consecutive loads if reversed.
87537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  int NumLoadsWantToChangeOrder;
87637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
87753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  // Analysis and block reference.
87853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  Function *F;
87953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  ScalarEvolution *SE;
88053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  TargetTransformInfo *TTI;
881dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  TargetLibraryInfo *TLI;
88253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  AliasAnalysis *AA;
88353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  LoopInfo *LI;
884722b0a4d293b16eebaed94ae65d5f11743cbcea5Nadav Rotem  DominatorTree *DT;
88553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  /// Instruction builder to construct the vectorized tree.
88653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  IRBuilder<> Builder;
88753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem};
88853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
88937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#ifndef NDEBUG
89037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesraw_ostream &operator<<(raw_ostream &os, const BoUpSLP::ScheduleData &SD) {
89137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  SD.dump(os);
89237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  return os;
89337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
89437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#endif
89537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
896dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesvoid BoUpSLP::buildTree(ArrayRef<Value *> Roots,
897dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                        ArrayRef<Value *> UserIgnoreLst) {
898369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  deleteTree();
899dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  UserIgnoreList = UserIgnoreLst;
90030bbf070a2683fc95c105ad78f921ca59c56bb35Nadav Rotem  if (!getSameType(Roots))
90130bbf070a2683fc95c105ad78f921ca59c56bb35Nadav Rotem    return;
902369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  buildTree_rec(Roots, 0);
903a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
904a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  // Collect the values that we need to extract from the tree.
905a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  for (int EIdx = 0, EE = VectorizableTree.size(); EIdx < EE; ++EIdx) {
906a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    TreeEntry *Entry = &VectorizableTree[EIdx];
907a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
908a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    // For each lane:
909a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
910a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem      Value *Scalar = Entry->Scalars[Lane];
911a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
912a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem      // No need to handle users of gathered values.
913a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem      if (Entry->NeedToGather)
914a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem        continue;
915a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
91636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      for (User *U : Scalar->users()) {
91736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n");
918a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
91936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        Instruction *UserInst = dyn_cast<Instruction>(U);
920a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        if (!UserInst)
921a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          continue;
922a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
92337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        // Skip in-tree scalars that become vectors
92437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        if (ScalarToTreeEntry.count(U)) {
92537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          int Idx = ScalarToTreeEntry[U];
92637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          TreeEntry *UseEntry = &VectorizableTree[Idx];
92737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          Value *UseScalar = UseEntry->Scalars[0];
92837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          // Some in-tree scalars will remain as scalar in vectorized
92937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          // instructions. If that is the case, the one in Lane 0 will
93037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          // be used.
93137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          if (UseScalar != U ||
93237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines              !InTreeUserNeedToExtract(Scalar, UserInst, TLI)) {
93337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U
93437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                         << ".\n");
93537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            assert(!VectorizableTree[Idx].NeedToGather && "Bad state");
93637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            continue;
93737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          }
93837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        }
93937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
940dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        // Ignore users in the user ignore list.
941dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UserInst) !=
942dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines            UserIgnoreList.end())
943a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem          continue;
944a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
94536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " <<
946a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem              Lane << " from " << *Scalar << ".\n");
94736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        ExternalUses.push_back(ExternalUser(Scalar, U, Lane));
948a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem      }
949a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    }
950a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  }
95153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem}
95253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
95353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
954369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemvoid BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
955369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  bool SameTy = getSameType(VL); (void)SameTy;
956c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  bool isAltShuffle = false;
957369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  assert(SameTy && "Invalid types!");
95853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
959369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (Depth == RecursionMaxDepth) {
960369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
961369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    newTreeEntry(VL, false);
962369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    return;
963369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  }
96453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
965369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // Don't handle vectors.
966369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (VL[0]->getType()->isVectorTy()) {
967369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
968369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    newTreeEntry(VL, false);
969369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    return;
970369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  }
97153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
972369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
973369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    if (SI->getValueOperand()->getType()->isVectorTy()) {
974369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n");
975369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      newTreeEntry(VL, false);
976369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return;
977369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
978c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  unsigned Opcode = getSameOpcode(VL);
979c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
980c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  // Check that this shuffle vector refers to the alternate
981c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  // sequence of opcodes.
982c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  if (Opcode == Instruction::ShuffleVector) {
983c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    Instruction *I0 = dyn_cast<Instruction>(VL[0]);
984c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    unsigned Op = I0->getOpcode();
985c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    if (Op != Instruction::ShuffleVector)
986c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      isAltShuffle = true;
987c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  }
98853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
989369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // If all of the operands are identical or constant we have a simple solution.
990c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  if (allConstant(VL) || isSplat(VL) || !getSameBlock(VL) || !Opcode) {
991369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n");
992369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    newTreeEntry(VL, false);
993369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    return;
994369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  }
99553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
996369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // We now know that this is a vector of instructions of the same type from
997369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // the same block.
998369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
99937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // Don't vectorize ephemeral values.
100037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  for (unsigned i = 0, e = VL.size(); i != e; ++i) {
100137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (EphValues.count(VL[i])) {
100237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] <<
100337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            ") is ephemeral.\n");
100437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      newTreeEntry(VL, false);
100537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      return;
100637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
100737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
100837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
1009369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // Check if this is a duplicate of another entry.
1010369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (ScalarToTreeEntry.count(VL[0])) {
1011369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    int Idx = ScalarToTreeEntry[VL[0]];
1012369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    TreeEntry *E = &VectorizableTree[Idx];
1013369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    for (unsigned i = 0, e = VL.size(); i != e; ++i) {
1014369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      DEBUG(dbgs() << "SLP: \tChecking bundle: " << *VL[i] << ".\n");
1015369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      if (E->Scalars[i] != VL[i]) {
1016369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
1017369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        newTreeEntry(VL, false);
1018369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        return;
1019369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
1020369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
1021369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *VL[0] << ".\n");
1022369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    return;
1023369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  }
102453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1025369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // Check that none of the instructions in the bundle are already in the tree.
1026369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  for (unsigned i = 0, e = VL.size(); i != e; ++i) {
1027369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    if (ScalarToTreeEntry.count(VL[i])) {
1028369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] <<
1029369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem            ") is already in tree.\n");
1030369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      newTreeEntry(VL, false);
1031369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return;
1032369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
1033369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  }
103453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1035ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // If any of the scalars is marked as a value that needs to stay scalar then
1036ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // we need to gather the scalars.
1037369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  for (unsigned i = 0, e = VL.size(); i != e; ++i) {
1038ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if (MustGather.count(VL[i])) {
1039ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
1040369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      newTreeEntry(VL, false);
1041369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return;
1042369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
1043369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  }
104453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1045369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // Check that all of the users of the scalars that we want to vectorize are
1046369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // schedulable.
1047369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  Instruction *VL0 = cast<Instruction>(VL[0]);
1048369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  BasicBlock *BB = cast<Instruction>(VL0)->getParent();
104953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
105037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  if (!DT->isReachableFromEntry(BB)) {
105137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // Don't go into unreachable blocks. They may contain instructions with
105237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // dependency cycles which confuse the final scheduling.
105337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
105437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    newTreeEntry(VL, false);
105537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    return;
105653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  }
105737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
1058369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // Check that every instructions appears once in this bundle.
105953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  for (unsigned i = 0, e = VL.size(); i < e; ++i)
1060369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    for (unsigned j = i+1; j < e; ++j)
1061369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      if (VL[i] == VL[j]) {
1062369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
1063369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        newTreeEntry(VL, false);
1064369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        return;
1065369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
106653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
106737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  auto &BSRef = BlocksSchedules[BB];
106837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  if (!BSRef) {
106937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    BSRef = llvm::make_unique<BlockScheduling>(BB);
107053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  }
107137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  BlockScheduling &BS = *BSRef.get();
107253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1073ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  if (!BS.tryScheduleBundle(VL, this)) {
107437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n");
107537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    BS.cancelScheduling(VL);
107637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    newTreeEntry(VL, false);
107737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    return;
107853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  }
107937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n");
108053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1081369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  switch (Opcode) {
1082369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::PHI: {
1083369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      PHINode *PH = dyn_cast<PHINode>(VL0);
10843c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer
10853c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer      // Check for terminator values (e.g. invoke).
10863c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer      for (unsigned j = 0; j < VL.size(); ++j)
10873c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer        for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
108836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          TerminatorInst *Term = dyn_cast<TerminatorInst>(
108936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines              cast<PHINode>(VL[j])->getIncomingValueForBlock(PH->getIncomingBlock(i)));
10903c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer          if (Term) {
10913c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer            DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (TerminatorInst use).\n");
109237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            BS.cancelScheduling(VL);
10933c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer            newTreeEntry(VL, false);
10943c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer            return;
10953c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer          }
10963c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer        }
10973c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer
1098369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      newTreeEntry(VL, true);
1099369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");
1100369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
1101369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
1102369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        ValueList Operands;
1103369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        // Prepare the operand vector.
1104369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        for (unsigned j = 0; j < VL.size(); ++j)
110536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          Operands.push_back(cast<PHINode>(VL[j])->getIncomingValueForBlock(
110636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines              PH->getIncomingBlock(i)));
1107369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
1108369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        buildTree_rec(Operands, Depth + 1);
1109369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
1110369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return;
1111369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
1112369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::ExtractElement: {
1113369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      bool Reuse = CanReuseExtract(VL);
1114369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      if (Reuse) {
1115369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        DEBUG(dbgs() << "SLP: Reusing extract sequence.\n");
111637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      } else {
111737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        BS.cancelScheduling(VL);
1118369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
1119369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      newTreeEntry(VL, Reuse);
1120369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return;
1121369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
1122369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Load: {
1123369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      // Check if the loads are consecutive or of we need to swizzle them.
1124fc1604ec7274e9b724ff9b2512c288a691167427Arnold Schwaighofer      for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) {
1125fc1604ec7274e9b724ff9b2512c288a691167427Arnold Schwaighofer        LoadInst *L = cast<LoadInst>(VL[i]);
112637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        if (!L->isSimple()) {
112737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          BS.cancelScheduling(VL);
1128369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem          newTreeEntry(VL, false);
112937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
113037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          return;
113137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        }
11324c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar        const DataLayout &DL = F->getParent()->getDataLayout();
11334c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar        if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) {
11344c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar          if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], DL)) {
113537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            ++NumLoadsWantToChangeOrder;
113637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          }
113737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          BS.cancelScheduling(VL);
113837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          newTreeEntry(VL, false);
113937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
1140369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem          return;
1141369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        }
1142fc1604ec7274e9b724ff9b2512c288a691167427Arnold Schwaighofer      }
114337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ++NumLoadsWantToKeepOrder;
1144369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      newTreeEntry(VL, true);
1145369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      DEBUG(dbgs() << "SLP: added a vector of loads.\n");
1146369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return;
1147369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
1148369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::ZExt:
1149369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::SExt:
1150369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FPToUI:
1151369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FPToSI:
1152369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FPExt:
1153369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::PtrToInt:
1154369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::IntToPtr:
1155369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::SIToFP:
1156369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::UIToFP:
1157369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Trunc:
1158369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FPTrunc:
1159369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::BitCast: {
1160369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Type *SrcTy = VL0->getOperand(0)->getType();
1161369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      for (unsigned i = 0; i < VL.size(); ++i) {
1162369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        Type *Ty = cast<Instruction>(VL[i])->getOperand(0)->getType();
1163ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        if (Ty != SrcTy || !isValidElementType(Ty)) {
116437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          BS.cancelScheduling(VL);
1165369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem          newTreeEntry(VL, false);
1166369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem          DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n");
1167369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem          return;
1168369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        }
1169369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
1170369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      newTreeEntry(VL, true);
1171369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      DEBUG(dbgs() << "SLP: added a vector of casts.\n");
117253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1173369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
1174369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        ValueList Operands;
1175369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        // Prepare the operand vector.
1176369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        for (unsigned j = 0; j < VL.size(); ++j)
1177369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem          Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
117853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1179369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        buildTree_rec(Operands, Depth+1);
1180369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
118153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem      return;
1182369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
1183369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::ICmp:
1184369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FCmp: {
1185369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      // Check that all of the compares have the same predicate.
11860c7f116bb6950ef819323d855415b2f2b0aad987Pirama Arumuga Nainar      CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
1187135e81efe3c1848a308c96dfd65e4d88b0d8667bNadav Rotem      Type *ComparedTy = cast<Instruction>(VL[0])->getOperand(0)->getType();
1188369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      for (unsigned i = 1, e = VL.size(); i < e; ++i) {
1189369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        CmpInst *Cmp = cast<CmpInst>(VL[i]);
1190135e81efe3c1848a308c96dfd65e4d88b0d8667bNadav Rotem        if (Cmp->getPredicate() != P0 ||
1191135e81efe3c1848a308c96dfd65e4d88b0d8667bNadav Rotem            Cmp->getOperand(0)->getType() != ComparedTy) {
119237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          BS.cancelScheduling(VL);
1193369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem          newTreeEntry(VL, false);
1194369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem          DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n");
1195369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem          return;
1196369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        }
1197369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
119853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1199369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      newTreeEntry(VL, true);
1200369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      DEBUG(dbgs() << "SLP: added a vector of compares.\n");
120153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1202369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
1203369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        ValueList Operands;
1204369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        // Prepare the operand vector.
1205369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        for (unsigned j = 0; j < VL.size(); ++j)
1206369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem          Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
120753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1208369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        buildTree_rec(Operands, Depth+1);
1209805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem      }
1210369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return;
121153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    }
1212369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Select:
1213369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Add:
1214369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FAdd:
1215369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Sub:
1216369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FSub:
1217369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Mul:
1218369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FMul:
1219369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::UDiv:
1220369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::SDiv:
1221369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FDiv:
1222369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::URem:
1223369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::SRem:
1224369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FRem:
1225369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Shl:
1226369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::LShr:
1227369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::AShr:
1228369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::And:
1229369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Or:
1230369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Xor: {
1231369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      newTreeEntry(VL, true);
1232369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      DEBUG(dbgs() << "SLP: added a vector of bin op.\n");
1233369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
1234af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer      // Sort operands of the instructions so that each side is more likely to
1235af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer      // have the same opcode.
1236af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer      if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
1237af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer        ValueList Left, Right;
1238af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer        reorderInputsAccordingToOpcode(VL, Left, Right);
123937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        buildTree_rec(Left, Depth + 1);
124037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        buildTree_rec(Right, Depth + 1);
1241af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer        return;
1242af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer      }
1243af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer
1244369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
1245369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        ValueList Operands;
1246369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        // Prepare the operand vector.
1247369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        for (unsigned j = 0; j < VL.size(); ++j)
1248369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem          Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
1249369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
1250369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        buildTree_rec(Operands, Depth+1);
1251369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
1252369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return;
125353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    }
1254c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    case Instruction::GetElementPtr: {
1255c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      // We don't combine GEPs with complicated (nested) indexing.
1256c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      for (unsigned j = 0; j < VL.size(); ++j) {
1257c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        if (cast<Instruction>(VL[j])->getNumOperands() != 2) {
1258c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n");
125937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          BS.cancelScheduling(VL);
1260c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          newTreeEntry(VL, false);
1261c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          return;
1262c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        }
1263c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      }
1264c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
1265c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      // We can't combine several GEPs into one vector if they operate on
1266c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      // different types.
1267c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Type *Ty0 = cast<Instruction>(VL0)->getOperand(0)->getType();
1268c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      for (unsigned j = 0; j < VL.size(); ++j) {
1269c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        Type *CurTy = cast<Instruction>(VL[j])->getOperand(0)->getType();
1270c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        if (Ty0 != CurTy) {
1271c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n");
127237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          BS.cancelScheduling(VL);
1273c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          newTreeEntry(VL, false);
1274c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          return;
1275c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        }
1276c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      }
1277c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
1278c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      // We don't combine GEPs with non-constant indexes.
1279c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      for (unsigned j = 0; j < VL.size(); ++j) {
1280c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        auto Op = cast<Instruction>(VL[j])->getOperand(1);
1281c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        if (!isa<ConstantInt>(Op)) {
1282c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          DEBUG(
1283c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines              dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n");
128437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          BS.cancelScheduling(VL);
1285c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          newTreeEntry(VL, false);
1286c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          return;
1287c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        }
1288c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      }
1289c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
1290c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      newTreeEntry(VL, true);
1291c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
1292c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      for (unsigned i = 0, e = 2; i < e; ++i) {
1293c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        ValueList Operands;
1294c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        // Prepare the operand vector.
1295c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        for (unsigned j = 0; j < VL.size(); ++j)
1296c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
1297c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
1298c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        buildTree_rec(Operands, Depth + 1);
1299c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      }
1300c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      return;
1301c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    }
1302369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Store: {
13034c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar      const DataLayout &DL = F->getParent()->getDataLayout();
1304369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      // Check if the stores are consecutive or of we need to swizzle them.
1305369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
13064c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar        if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) {
130737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          BS.cancelScheduling(VL);
1308369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem          newTreeEntry(VL, false);
130936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
1310369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem          return;
1311369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        }
1312805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem
1313369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      newTreeEntry(VL, true);
1314369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      DEBUG(dbgs() << "SLP: added a vector of stores.\n");
1315805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem
1316805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem      ValueList Operands;
1317805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem      for (unsigned j = 0; j < VL.size(); ++j)
1318369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
1319805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem
1320369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      buildTree_rec(Operands, Depth + 1);
132153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem      return;
132253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    }
132336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    case Instruction::Call: {
132436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // Check if the calls are all to the same vectorizable intrinsic.
1325dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      CallInst *CI = cast<CallInst>(VL[0]);
1326dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      // Check if this is an Intrinsic call or something that can be
1327dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      // represented by an intrinsic call
1328dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
1329dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      if (!isTriviallyVectorizable(ID)) {
133037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        BS.cancelScheduling(VL);
133136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        newTreeEntry(VL, false);
133236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
133336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        return;
133436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      }
1335dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      Function *Int = CI->getCalledFunction();
1336c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Value *A1I = nullptr;
1337c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      if (hasVectorInstrinsicScalarOpd(ID, 1))
1338c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        A1I = CI->getArgOperand(1);
133936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      for (unsigned i = 1, e = VL.size(); i != e; ++i) {
1340dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        CallInst *CI2 = dyn_cast<CallInst>(VL[i]);
1341dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        if (!CI2 || CI2->getCalledFunction() != Int ||
1342dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines            getIntrinsicIDForCall(CI2, TLI) != ID) {
134337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          BS.cancelScheduling(VL);
134436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          newTreeEntry(VL, false);
1345dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i]
134636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                       << "\n");
134736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          return;
134836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        }
1349c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        // ctlz,cttz and powi are special intrinsics whose second argument
1350c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        // should be same in order for them to be vectorized.
1351c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        if (hasVectorInstrinsicScalarOpd(ID, 1)) {
1352c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          Value *A1J = CI2->getArgOperand(1);
1353c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          if (A1I != A1J) {
135437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            BS.cancelScheduling(VL);
1355c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines            newTreeEntry(VL, false);
1356c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines            DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
1357c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines                         << " argument "<< A1I<<"!=" << A1J
1358c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines                         << "\n");
1359c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines            return;
1360c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          }
1361c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        }
136236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      }
136336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
136436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      newTreeEntry(VL, true);
1365dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
136636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        ValueList Operands;
136736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // Prepare the operand vector.
136836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        for (unsigned j = 0; j < VL.size(); ++j) {
1369dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          CallInst *CI2 = dyn_cast<CallInst>(VL[j]);
1370dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          Operands.push_back(CI2->getArgOperand(i));
137136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        }
137236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        buildTree_rec(Operands, Depth + 1);
137336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      }
137436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      return;
137536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    }
1376c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    case Instruction::ShuffleVector: {
1377c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      // If this is not an alternate sequence of opcode like add-sub
1378c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      // then do not vectorize this instruction.
1379c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      if (!isAltShuffle) {
138037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        BS.cancelScheduling(VL);
1381c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        newTreeEntry(VL, false);
1382c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
1383c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        return;
1384c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      }
1385c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      newTreeEntry(VL, true);
1386c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
1387ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
1388ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      // Reorder operands if reordering would enable vectorization.
1389ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      if (isa<BinaryOperator>(VL0)) {
1390ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        ValueList Left, Right;
1391ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        reorderAltShuffleOperands(VL, Left, Right);
1392ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        buildTree_rec(Left, Depth + 1);
1393ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        buildTree_rec(Right, Depth + 1);
1394ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        return;
1395ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      }
1396ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
1397c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
1398c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        ValueList Operands;
1399c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        // Prepare the operand vector.
1400c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        for (unsigned j = 0; j < VL.size(); ++j)
1401c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
1402c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
1403c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        buildTree_rec(Operands, Depth + 1);
1404c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      }
1405c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      return;
1406c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    }
1407369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    default:
140837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      BS.cancelScheduling(VL);
1409369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      newTreeEntry(VL, false);
1410369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n");
1411369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return;
141253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  }
141353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem}
141453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1415369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemint BoUpSLP::getEntryCost(TreeEntry *E) {
1416369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  ArrayRef<Value*> VL = E->Scalars;
141753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
141853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  Type *ScalarTy = VL[0]->getType();
141953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
142053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    ScalarTy = SI->getValueOperand()->getType();
142125961b469a9debe69b915bcb4fa49d35d2ee9544Nadav Rotem  VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
142225961b469a9debe69b915bcb4fa49d35d2ee9544Nadav Rotem
1423369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (E->NeedToGather) {
1424369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    if (allConstant(VL))
1425369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return 0;
1426369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    if (isSplat(VL)) {
1427369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);
142853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    }
1429369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    return getGatherCost(E->Scalars);
143053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  }
1431c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  unsigned Opcode = getSameOpcode(VL);
1432c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  assert(Opcode && getSameType(VL) && getSameBlock(VL) && "Invalid VL");
143353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  Instruction *VL0 = cast<Instruction>(VL[0]);
143453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  switch (Opcode) {
1435369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::PHI: {
143653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem      return 0;
143753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    }
1438369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::ExtractElement: {
143936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      if (CanReuseExtract(VL)) {
144036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        int DeadCost = 0;
144136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        for (unsigned i = 0, e = VL.size(); i < e; ++i) {
144236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          ExtractElementInst *E = cast<ExtractElementInst>(VL[i]);
144336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          if (E->hasOneUse())
144436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines            // Take credit for instruction that will become dead.
144536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines            DeadCost +=
144636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);
144736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        }
144836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        return -DeadCost;
144936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      }
1450369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return getGatherCost(VecTy);
145125961b469a9debe69b915bcb4fa49d35d2ee9544Nadav Rotem    }
1452369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::ZExt:
1453369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::SExt:
1454369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FPToUI:
1455369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FPToSI:
1456369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FPExt:
1457369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::PtrToInt:
1458369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::IntToPtr:
1459369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::SIToFP:
1460369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::UIToFP:
1461369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Trunc:
1462369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FPTrunc:
1463369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::BitCast: {
1464369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Type *SrcTy = VL0->getOperand(0)->getType();
1465369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
1466369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      // Calculate the cost of this instruction.
1467369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(),
1468369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem                                                         VL0->getType(), SrcTy);
1469369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
1470369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size());
1471369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy);
1472369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return VecCost - ScalarCost;
147353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    }
1474369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FCmp:
1475369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::ICmp:
1476369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Select:
1477369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Add:
1478369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FAdd:
1479369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Sub:
1480369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FSub:
1481369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Mul:
1482369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FMul:
1483369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::UDiv:
1484369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::SDiv:
1485369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FDiv:
1486369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::URem:
1487369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::SRem:
1488369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FRem:
1489369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Shl:
1490369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::LShr:
1491369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::AShr:
1492369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::And:
1493369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Or:
1494369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Xor: {
1495369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      // Calculate the cost of this instruction.
1496369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      int ScalarCost = 0;
1497369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      int VecCost = 0;
1498369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      if (Opcode == Instruction::FCmp || Opcode == Instruction::ICmp ||
1499369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem          Opcode == Instruction::Select) {
1500369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size());
1501369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        ScalarCost = VecTy->getNumElements() *
1502369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.getInt1Ty());
1503369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        VecCost = TTI->getCmpSelInstrCost(Opcode, VecTy, MaskTy);
1504369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      } else {
15057e8cebf22d170769b0bf0c2a69309faa0e36ac4cArnold Schwaighofer        // Certain instructions can be cheaper to vectorize if they have a
15067e8cebf22d170769b0bf0c2a69309faa0e36ac4cArnold Schwaighofer        // constant second vector operand.
15077e8cebf22d170769b0bf0c2a69309faa0e36ac4cArnold Schwaighofer        TargetTransformInfo::OperandValueKind Op1VK =
15087e8cebf22d170769b0bf0c2a69309faa0e36ac4cArnold Schwaighofer            TargetTransformInfo::OK_AnyValue;
15097e8cebf22d170769b0bf0c2a69309faa0e36ac4cArnold Schwaighofer        TargetTransformInfo::OperandValueKind Op2VK =
15107e8cebf22d170769b0bf0c2a69309faa0e36ac4cArnold Schwaighofer            TargetTransformInfo::OK_UniformConstantValue;
151137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        TargetTransformInfo::OperandValueProperties Op1VP =
151237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            TargetTransformInfo::OP_None;
151337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        TargetTransformInfo::OperandValueProperties Op2VP =
151437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            TargetTransformInfo::OP_None;
15157e8cebf22d170769b0bf0c2a69309faa0e36ac4cArnold Schwaighofer
151636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // If all operands are exactly the same ConstantInt then set the
151736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // operand kind to OK_UniformConstantValue.
151836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // If instead not all operands are constants, then set the operand kind
151936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // to OK_AnyValue. If all operands are constants but not the same,
152036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // then set the operand kind to OK_NonUniformConstantValue.
1521dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        ConstantInt *CInt = nullptr;
152236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        for (unsigned i = 0; i < VL.size(); ++i) {
152336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          const Instruction *I = cast<Instruction>(VL[i]);
152436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          if (!isa<ConstantInt>(I->getOperand(1))) {
15257e8cebf22d170769b0bf0c2a69309faa0e36ac4cArnold Schwaighofer            Op2VK = TargetTransformInfo::OK_AnyValue;
15267e8cebf22d170769b0bf0c2a69309faa0e36ac4cArnold Schwaighofer            break;
15277e8cebf22d170769b0bf0c2a69309faa0e36ac4cArnold Schwaighofer          }
152836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          if (i == 0) {
152936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines            CInt = cast<ConstantInt>(I->getOperand(1));
153036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines            continue;
153136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          }
153236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          if (Op2VK == TargetTransformInfo::OK_UniformConstantValue &&
153336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines              CInt != cast<ConstantInt>(I->getOperand(1)))
153436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines            Op2VK = TargetTransformInfo::OK_NonUniformConstantValue;
153536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        }
153637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        // FIXME: Currently cost of model modification for division by
153737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        // power of 2 is handled only for X86. Add support for other targets.
153837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        if (Op2VK == TargetTransformInfo::OK_UniformConstantValue && CInt &&
153937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            CInt->getValue().isPowerOf2())
154037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          Op2VP = TargetTransformInfo::OP_PowerOf2;
15417e8cebf22d170769b0bf0c2a69309faa0e36ac4cArnold Schwaighofer
154237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        ScalarCost = VecTy->getNumElements() *
154337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                     TTI->getArithmeticInstrCost(Opcode, ScalarTy, Op1VK, Op2VK,
154437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                                                 Op1VP, Op2VP);
154537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy, Op1VK, Op2VK,
154637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                                              Op1VP, Op2VP);
1547369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
1548369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return VecCost - ScalarCost;
154953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    }
1550c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    case Instruction::GetElementPtr: {
1551c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      TargetTransformInfo::OperandValueKind Op1VK =
1552c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          TargetTransformInfo::OK_AnyValue;
1553c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      TargetTransformInfo::OperandValueKind Op2VK =
1554c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          TargetTransformInfo::OK_UniformConstantValue;
1555c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
1556c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      int ScalarCost =
1557c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          VecTy->getNumElements() *
1558c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, Op1VK, Op2VK);
1559c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      int VecCost =
1560c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          TTI->getArithmeticInstrCost(Instruction::Add, VecTy, Op1VK, Op2VK);
1561c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
1562c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      return VecCost - ScalarCost;
1563c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    }
1564369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Load: {
1565369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      // Cost of wide load - cost of scalar loads.
1566369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      int ScalarLdCost = VecTy->getNumElements() *
1567369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
15684a6b3a9a770ec2064fb5975ff2d57419c1339a21Arnold Schwaighofer      int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, VecTy, 1, 0);
1569369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return VecLdCost - ScalarLdCost;
157053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    }
1571369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Store: {
1572369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      // We know that we can merge the stores. Calculate the cost.
1573369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      int ScalarStCost = VecTy->getNumElements() *
1574369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
15754a6b3a9a770ec2064fb5975ff2d57419c1339a21Arnold Schwaighofer      int VecStCost = TTI->getMemoryOpCost(Instruction::Store, VecTy, 1, 0);
1576369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return VecStCost - ScalarStCost;
157725961b469a9debe69b915bcb4fa49d35d2ee9544Nadav Rotem    }
157836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    case Instruction::Call: {
157936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      CallInst *CI = cast<CallInst>(VL0);
1580dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
158136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
158236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // Calculate the cost of the scalar and vector calls.
158336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      SmallVector<Type*, 4> ScalarTys, VecTys;
1584dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      for (unsigned op = 0, opc = CI->getNumArgOperands(); op!= opc; ++op) {
158536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        ScalarTys.push_back(CI->getArgOperand(op)->getType());
158636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        VecTys.push_back(VectorType::get(CI->getArgOperand(op)->getType(),
158736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                         VecTy->getNumElements()));
158836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      }
158936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
159036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      int ScalarCallCost = VecTy->getNumElements() *
159136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys);
159236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
159336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      int VecCallCost = TTI->getIntrinsicInstrCost(ID, VecTy, VecTys);
159436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
159536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      DEBUG(dbgs() << "SLP: Call cost "<< VecCallCost - ScalarCallCost
159636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines            << " (" << VecCallCost  << "-" <<  ScalarCallCost << ")"
1597dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines            << " for " << *CI << "\n");
159836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
159936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      return VecCallCost - ScalarCallCost;
160036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    }
1601c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    case Instruction::ShuffleVector: {
1602c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      TargetTransformInfo::OperandValueKind Op1VK =
1603c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          TargetTransformInfo::OK_AnyValue;
1604c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      TargetTransformInfo::OperandValueKind Op2VK =
1605c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          TargetTransformInfo::OK_AnyValue;
1606c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      int ScalarCost = 0;
1607c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      int VecCost = 0;
1608c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      for (unsigned i = 0; i < VL.size(); ++i) {
1609c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        Instruction *I = cast<Instruction>(VL[i]);
1610c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        if (!I)
1611c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          break;
1612c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        ScalarCost +=
1613c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines            TTI->getArithmeticInstrCost(I->getOpcode(), ScalarTy, Op1VK, Op2VK);
1614c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      }
1615c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      // VecCost is equal to sum of the cost of creating 2 vectors
1616c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      // and the cost of creating shuffle.
1617c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Instruction *I0 = cast<Instruction>(VL[0]);
1618c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      VecCost =
1619c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          TTI->getArithmeticInstrCost(I0->getOpcode(), VecTy, Op1VK, Op2VK);
1620c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Instruction *I1 = cast<Instruction>(VL[1]);
1621c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      VecCost +=
1622c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          TTI->getArithmeticInstrCost(I1->getOpcode(), VecTy, Op1VK, Op2VK);
1623c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      VecCost +=
1624c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, VecTy, 0);
1625c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      return VecCost - ScalarCost;
1626c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    }
1627369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    default:
1628369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      llvm_unreachable("Unknown instruction");
162953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  }
1630369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
163125961b469a9debe69b915bcb4fa49d35d2ee9544Nadav Rotem
1632d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiangbool BoUpSLP::isFullyVectorizableTinyTree() {
1633d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang  DEBUG(dbgs() << "SLP: Check whether the tree with height " <<
1634d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang        VectorizableTree.size() << " is fully vectorizable .\n");
1635d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang
1636d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang  // We only handle trees of height 2.
1637d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang  if (VectorizableTree.size() != 2)
1638d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang    return false;
1639d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang
164036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // Handle splat stores.
164136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (!VectorizableTree[0].NeedToGather && isSplat(VectorizableTree[1].Scalars))
164236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return true;
164336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
1644d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang  // Gathering cost would be too much for tiny trees.
164536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (VectorizableTree[0].NeedToGather || VectorizableTree[1].NeedToGather)
164636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return false;
1647d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang
164836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  return true;
1649d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang}
1650d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang
165137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesint BoUpSLP::getSpillCost() {
165237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // Walk from the bottom of the tree to the top, tracking which values are
165337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // live. When we see a call instruction that is not part of our tree,
165437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // query TTI to see if there is a cost to keeping values live over it
165537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // (for example, if spills and fills are required).
165637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  unsigned BundleWidth = VectorizableTree.front().Scalars.size();
165737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  int Cost = 0;
165837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
165937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  SmallPtrSet<Instruction*, 4> LiveValues;
166037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  Instruction *PrevInst = nullptr;
166137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
166237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  for (unsigned N = 0; N < VectorizableTree.size(); ++N) {
166337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    Instruction *Inst = dyn_cast<Instruction>(VectorizableTree[N].Scalars[0]);
166437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (!Inst)
166537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      continue;
166637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
166737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (!PrevInst) {
166837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      PrevInst = Inst;
166937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      continue;
167037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
167137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
167237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    DEBUG(
167337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      dbgs() << "SLP: #LV: " << LiveValues.size();
167437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      for (auto *X : LiveValues)
167537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        dbgs() << " " << X->getName();
167637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      dbgs() << ", Looking at ";
167737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      Inst->dump();
167837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      );
167937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
168037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // Update LiveValues.
168137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    LiveValues.erase(PrevInst);
168237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    for (auto &J : PrevInst->operands()) {
168337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (isa<Instruction>(&*J) && ScalarToTreeEntry.count(&*J))
168437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        LiveValues.insert(cast<Instruction>(&*J));
168537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
168637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
168737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // Now find the sequence of instructions between PrevInst and Inst.
168837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    BasicBlock::reverse_iterator InstIt(Inst), PrevInstIt(PrevInst);
168937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    --PrevInstIt;
169037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    while (InstIt != PrevInstIt) {
169137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (PrevInstIt == PrevInst->getParent()->rend()) {
169237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        PrevInstIt = Inst->getParent()->rbegin();
169337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        continue;
169437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
169537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
169637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (isa<CallInst>(&*PrevInstIt) && &*PrevInstIt != PrevInst) {
169737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        SmallVector<Type*, 4> V;
169837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        for (auto *II : LiveValues)
169937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          V.push_back(VectorType::get(II->getType(), BundleWidth));
170037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        Cost += TTI->getCostOfKeepingLiveOverCall(V);
170137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
170237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
170337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ++PrevInstIt;
170437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
170537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
170637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    PrevInst = Inst;
170737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
170837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
170937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  DEBUG(dbgs() << "SLP: SpillCost=" << Cost << "\n");
171037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  return Cost;
171137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
171237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
1713369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemint BoUpSLP::getTreeCost() {
1714369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  int Cost = 0;
1715369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  DEBUG(dbgs() << "SLP: Calculating cost for tree of size " <<
1716369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        VectorizableTree.size() << ".\n");
1717369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
1718d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang  // We only vectorize tiny trees if it is fully vectorizable.
1719d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang  if (VectorizableTree.size() < 3 && !isFullyVectorizableTinyTree()) {
1720ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if (VectorizableTree.empty()) {
172167a38a2875f05ea9c219ab73c4398ee675eb4292Nadav Rotem      assert(!ExternalUses.size() && "We should not have any external users");
172267a38a2875f05ea9c219ab73c4398ee675eb4292Nadav Rotem    }
1723085e23841e9c4f4682385fce456704a5f75f9cdcYi Jiang    return INT_MAX;
1724a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  }
1725a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
1726a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  unsigned BundleWidth = VectorizableTree[0].Scalars.size();
1727a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
1728369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  for (unsigned i = 0, e = VectorizableTree.size(); i != e; ++i) {
1729369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    int C = getEntryCost(&VectorizableTree[i]);
1730369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    DEBUG(dbgs() << "SLP: Adding cost " << C << " for bundle that starts with "
1731369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem          << *VectorizableTree[i].Scalars[0] << " .\n");
1732369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    Cost += C;
173353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  }
1734a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
173536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  SmallSet<Value *, 16> ExtractCostCalculated;
1736a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  int ExtractCost = 0;
1737a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  for (UserList::iterator I = ExternalUses.begin(), E = ExternalUses.end();
1738a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem       I != E; ++I) {
173936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // We only add extract cost once for the same scalar.
174037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (!ExtractCostCalculated.insert(I->Scalar).second)
174137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      continue;
174237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
174337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // Uses by ephemeral values are free (because the ephemeral value will be
174437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // removed prior to code generation, and so the extraction will be
174537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // removed as well).
174637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (EphValues.count(I->User))
174736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      continue;
1748a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
1749a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    VectorType *VecTy = VectorType::get(I->Scalar->getType(), BundleWidth);
1750a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    ExtractCost += TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy,
1751a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem                                           I->Lane);
1752a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  }
1753a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
175437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  Cost += getSpillCost();
175537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
1756a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  DEBUG(dbgs() << "SLP: Total Cost " << Cost + ExtractCost<< ".\n");
1757a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  return  Cost + ExtractCost;
1758369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
175953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1760369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemint BoUpSLP::getGatherCost(Type *Ty) {
1761369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  int Cost = 0;
1762369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
1763369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
1764369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  return Cost;
1765369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
176653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1767369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemint BoUpSLP::getGatherCost(ArrayRef<Value *> VL) {
1768369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // Find the type of the operands in VL.
1769369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  Type *ScalarTy = VL[0]->getType();
1770369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
1771369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    ScalarTy = SI->getValueOperand()->getType();
1772369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
1773369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // Find the cost of inserting/extracting values from the vector.
1774369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  return getGatherCost(VecTy);
177553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem}
177653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1777369cc938d261de3295eb70d0738f54ef1a82806cNadav RotemValue *BoUpSLP::getPointerOperand(Value *I) {
1778369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (LoadInst *LI = dyn_cast<LoadInst>(I))
1779369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    return LI->getPointerOperand();
1780369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (StoreInst *SI = dyn_cast<StoreInst>(I))
1781369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    return SI->getPointerOperand();
1782dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  return nullptr;
1783369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
1784ac2678684689448d1c88f644f3fa88aa4f93997cNadav Rotem
1785369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemunsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
1786369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (LoadInst *L = dyn_cast<LoadInst>(I))
1787369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    return L->getPointerAddressSpace();
1788369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (StoreInst *S = dyn_cast<StoreInst>(I))
1789369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    return S->getPointerAddressSpace();
1790369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  return -1;
1791369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
1792ac2678684689448d1c88f644f3fa88aa4f93997cNadav Rotem
17934c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainarbool BoUpSLP::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL) {
1794369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  Value *PtrA = getPointerOperand(A);
1795369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  Value *PtrB = getPointerOperand(B);
1796369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  unsigned ASA = getAddressSpaceOperand(A);
1797369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  unsigned ASB = getAddressSpaceOperand(B);
1798ac2678684689448d1c88f644f3fa88aa4f93997cNadav Rotem
1799369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // Check that the address spaces match and that the pointers are valid.
1800369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (!PtrA || !PtrB || (ASA != ASB))
1801369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    return false;
180253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
18033a7997516982117382b9023ea1176fd53caa948dNadav Rotem  // Make sure that A and B are different pointers of the same type.
1804e65b219edbf5d18ed235dc8a5919580f71d2327bNadav Rotem  if (PtrA == PtrB || PtrA->getType() != PtrB->getType())
1805369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    return false;
180653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
18074c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
18085b35d4459222f46000194102bf04d5102c6960cdNadav Rotem  Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
18094c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty));
18105b35d4459222f46000194102bf04d5102c6960cdNadav Rotem
1811474be0d0f83eb6543bd4091946b40bb4967a3c11Chandler Carruth  APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
18124c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
18134c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
18145b35d4459222f46000194102bf04d5102c6960cdNadav Rotem
1815474be0d0f83eb6543bd4091946b40bb4967a3c11Chandler Carruth  APInt OffsetDelta = OffsetB - OffsetA;
18165b35d4459222f46000194102bf04d5102c6960cdNadav Rotem
1817474be0d0f83eb6543bd4091946b40bb4967a3c11Chandler Carruth  // Check if they are based on the same pointer. That makes the offsets
1818474be0d0f83eb6543bd4091946b40bb4967a3c11Chandler Carruth  // sufficient.
1819474be0d0f83eb6543bd4091946b40bb4967a3c11Chandler Carruth  if (PtrA == PtrB)
1820474be0d0f83eb6543bd4091946b40bb4967a3c11Chandler Carruth    return OffsetDelta == Size;
1821dfacdd04cd2dd3b474fcabc5497255548f5506d5Nadav Rotem
1822474be0d0f83eb6543bd4091946b40bb4967a3c11Chandler Carruth  // Compute the necessary base pointer delta to have the necessary final delta
1823474be0d0f83eb6543bd4091946b40bb4967a3c11Chandler Carruth  // equal to the size.
1824474be0d0f83eb6543bd4091946b40bb4967a3c11Chandler Carruth  APInt BaseDelta = Size - OffsetDelta;
182539f59f4d95de11c3c39bf6753a555ac32cacf7b7Nadav Rotem
1826474be0d0f83eb6543bd4091946b40bb4967a3c11Chandler Carruth  // Otherwise compute the distance with SCEV between the base pointers.
1827369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
1828369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
1829474be0d0f83eb6543bd4091946b40bb4967a3c11Chandler Carruth  const SCEV *C = SE->getConstant(BaseDelta);
1830a38edf071dbc76b2e0525485ea4c368cee908373Nadav Rotem  const SCEV *X = SE->getAddExpr(PtrSCEVA, C);
1831a38edf071dbc76b2e0525485ea4c368cee908373Nadav Rotem  return X == PtrSCEVB;
1832369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
183353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1834ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// Reorder commutative operations in alternate shuffle if the resulting vectors
1835ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// are consecutive loads. This would allow us to vectorize the tree.
1836ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// If we have something like-
1837ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// load a[0] - load b[0]
1838ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// load b[1] + load a[1]
1839ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// load a[2] - load b[2]
1840ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// load a[3] + load b[3]
1841ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// Reordering the second load b[1]  load a[1] would allow us to vectorize this
1842ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// code.
1843ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesvoid BoUpSLP::reorderAltShuffleOperands(ArrayRef<Value *> VL,
1844ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                                        SmallVectorImpl<Value *> &Left,
1845ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                                        SmallVectorImpl<Value *> &Right) {
18464c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  const DataLayout &DL = F->getParent()->getDataLayout();
1847ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
1848ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // Push left and right operands of binary operation into Left and Right
1849ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
1850ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    Left.push_back(cast<Instruction>(VL[i])->getOperand(0));
1851ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    Right.push_back(cast<Instruction>(VL[i])->getOperand(1));
1852ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  }
1853ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
1854ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // Reorder if we have a commutative operation and consecutive access
1855ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // are on either side of the alternate instructions.
1856ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  for (unsigned j = 0; j < VL.size() - 1; ++j) {
1857ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
1858ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
1859ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        Instruction *VL1 = cast<Instruction>(VL[j]);
1860ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        Instruction *VL2 = cast<Instruction>(VL[j + 1]);
18614c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar        if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) {
1862ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          std::swap(Left[j], Right[j]);
1863ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          continue;
18644c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar        } else if (isConsecutiveAccess(L, L1, DL) && VL2->isCommutative()) {
1865ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          std::swap(Left[j + 1], Right[j + 1]);
1866ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          continue;
1867ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        }
1868ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        // else unchanged
1869ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      }
1870ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    }
1871ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
1872ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
1873ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        Instruction *VL1 = cast<Instruction>(VL[j]);
1874ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        Instruction *VL2 = cast<Instruction>(VL[j + 1]);
18754c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar        if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) {
1876ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          std::swap(Left[j], Right[j]);
1877ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          continue;
18784c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar        } else if (isConsecutiveAccess(L, L1, DL) && VL2->isCommutative()) {
1879ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          std::swap(Left[j + 1], Right[j + 1]);
1880ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          continue;
1881ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        }
1882ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        // else unchanged
1883ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      }
1884ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    }
1885ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  }
1886ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines}
1887ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
1888ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesvoid BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
1889ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                                             SmallVectorImpl<Value *> &Left,
1890ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                                             SmallVectorImpl<Value *> &Right) {
1891ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
1892ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  SmallVector<Value *, 16> OrigLeft, OrigRight;
1893ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
1894ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  bool AllSameOpcodeLeft = true;
1895ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  bool AllSameOpcodeRight = true;
1896ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  for (unsigned i = 0, e = VL.size(); i != e; ++i) {
1897ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    Instruction *I = cast<Instruction>(VL[i]);
1898ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    Value *VLeft = I->getOperand(0);
1899ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    Value *VRight = I->getOperand(1);
1900ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
1901ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    OrigLeft.push_back(VLeft);
1902ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    OrigRight.push_back(VRight);
1903ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
1904ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    Instruction *ILeft = dyn_cast<Instruction>(VLeft);
1905ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    Instruction *IRight = dyn_cast<Instruction>(VRight);
1906ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
1907ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    // Check whether all operands on one side have the same opcode. In this case
1908ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    // we want to preserve the original order and not make things worse by
1909ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    // reordering.
1910ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if (i && AllSameOpcodeLeft && ILeft) {
1911ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      if (Instruction *PLeft = dyn_cast<Instruction>(OrigLeft[i - 1])) {
1912ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        if (PLeft->getOpcode() != ILeft->getOpcode())
1913ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          AllSameOpcodeLeft = false;
1914ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      } else
1915ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        AllSameOpcodeLeft = false;
1916ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    }
1917ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if (i && AllSameOpcodeRight && IRight) {
1918ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      if (Instruction *PRight = dyn_cast<Instruction>(OrigRight[i - 1])) {
1919ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        if (PRight->getOpcode() != IRight->getOpcode())
1920ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          AllSameOpcodeRight = false;
1921ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      } else
1922ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        AllSameOpcodeRight = false;
1923ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    }
1924ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
1925ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    // Sort two opcodes. In the code below we try to preserve the ability to use
1926ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    // broadcast of values instead of individual inserts.
1927ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    // vl1 = load
1928ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    // vl2 = phi
1929ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    // vr1 = load
1930ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    // vr2 = vr2
1931ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    //    = vl1 x vr1
1932ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    //    = vl2 x vr2
1933ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    // If we just sorted according to opcode we would leave the first line in
1934ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    // tact but we would swap vl2 with vr2 because opcode(phi) > opcode(load).
1935ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    //    = vl1 x vr1
1936ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    //    = vr2 x vl2
1937ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    // Because vr2 and vr1 are from the same load we loose the opportunity of a
1938ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    // broadcast for the packed right side in the backend: we have [vr1, vl2]
1939ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    // instead of [vr1, vr2=vr1].
1940ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if (ILeft && IRight) {
1941ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      if (!i && ILeft->getOpcode() > IRight->getOpcode()) {
1942ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        Left.push_back(IRight);
1943ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        Right.push_back(ILeft);
1944ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      } else if (i && ILeft->getOpcode() > IRight->getOpcode() &&
1945ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                 Right[i - 1] != IRight) {
1946ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        // Try not to destroy a broad cast for no apparent benefit.
1947ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        Left.push_back(IRight);
1948ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        Right.push_back(ILeft);
1949ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      } else if (i && ILeft->getOpcode() == IRight->getOpcode() &&
1950ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                 Right[i - 1] == ILeft) {
1951ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        // Try preserve broadcasts.
1952ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        Left.push_back(IRight);
1953ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        Right.push_back(ILeft);
1954ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      } else if (i && ILeft->getOpcode() == IRight->getOpcode() &&
1955ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                 Left[i - 1] == IRight) {
1956ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        // Try preserve broadcasts.
1957ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        Left.push_back(IRight);
1958ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        Right.push_back(ILeft);
1959ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      } else {
1960ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        Left.push_back(ILeft);
1961ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        Right.push_back(IRight);
1962ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      }
1963ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      continue;
1964ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    }
1965ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    // One opcode, put the instruction on the right.
1966ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if (ILeft) {
1967ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      Left.push_back(VRight);
1968ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      Right.push_back(ILeft);
1969ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      continue;
1970ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    }
1971ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    Left.push_back(VLeft);
1972ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    Right.push_back(VRight);
1973ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  }
1974ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
1975ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  bool LeftBroadcast = isSplat(Left);
1976ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  bool RightBroadcast = isSplat(Right);
1977ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
1978ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // If operands end up being broadcast return this operand order.
1979ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  if (LeftBroadcast || RightBroadcast)
1980ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    return;
1981ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
1982ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // Don't reorder if the operands where good to begin.
1983ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  if (AllSameOpcodeRight || AllSameOpcodeLeft) {
1984ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    Left = OrigLeft;
1985ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    Right = OrigRight;
1986ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  }
1987ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
19884c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  const DataLayout &DL = F->getParent()->getDataLayout();
19894c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar
1990ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // Finally check if we can get longer vectorizable chain by reordering
1991ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // without breaking the good operand order detected above.
1992ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // E.g. If we have something like-
1993ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // load a[0]  load b[0]
1994ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // load b[1]  load a[1]
1995ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // load a[2]  load b[2]
1996ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // load a[3]  load b[3]
1997ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // Reordering the second load b[1]  load a[1] would allow us to vectorize
1998ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // this code and we still retain AllSameOpcode property.
1999ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // FIXME: This load reordering might break AllSameOpcode in some rare cases
2000ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // such as-
2001ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // add a[0],c[0]  load b[0]
2002ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // add a[1],c[2]  load b[1]
2003ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // b[2]           load b[2]
2004ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // add a[3],c[3]  load b[3]
2005ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  for (unsigned j = 0; j < VL.size() - 1; ++j) {
2006ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
2007ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
20084c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar        if (isConsecutiveAccess(L, L1, DL)) {
2009ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          std::swap(Left[j + 1], Right[j + 1]);
2010ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          continue;
2011ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        }
2012ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      }
2013ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    }
2014ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
2015ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
20164c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar        if (isConsecutiveAccess(L, L1, DL)) {
2017ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          std::swap(Left[j + 1], Right[j + 1]);
2018ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          continue;
2019ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        }
2020ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      }
2021ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    }
2022ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    // else unchanged
2023ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  }
2024ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines}
2025ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
20264b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenaultvoid BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) {
20274b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault  Instruction *VL0 = cast<Instruction>(VL[0]);
202837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  BasicBlock::iterator NextInst = VL0;
20294b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault  ++NextInst;
20304b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault  Builder.SetInsertPoint(VL0->getParent(), NextInst);
20314b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault  Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
20324b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault}
20334b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault
2034369cc938d261de3295eb70d0738f54ef1a82806cNadav RotemValue *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
203553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  Value *Vec = UndefValue::get(Ty);
203653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  // Generate the 'InsertElement' instruction.
203753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  for (unsigned i = 0; i < Ty->getNumElements(); ++i) {
203853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
2039a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    if (Instruction *Insrt = dyn_cast<Instruction>(Vec)) {
2040a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem      GatherSeq.insert(Insrt);
2041a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling      CSEBlocks.insert(Insrt->getParent());
2042a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
2043a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem      // Add to our 'need-to-extract' list.
2044a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem      if (ScalarToTreeEntry.count(VL[i])) {
2045a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem        int Idx = ScalarToTreeEntry[VL[i]];
2046a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem        TreeEntry *E = &VectorizableTree[Idx];
2047a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem        // Find which lane we need to extract.
2048a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem        int FoundLane = -1;
2049a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem        for (unsigned Lane = 0, LE = VL.size(); Lane != LE; ++Lane) {
2050a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem          // Is this the lane of the scalar that we are looking for ?
2051a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem          if (E->Scalars[Lane] == VL[i]) {
2052a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem            FoundLane = Lane;
2053a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem            break;
2054a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem          }
2055a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem        }
2056a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem        assert(FoundLane >= 0 && "Could not find the correct lane");
2057a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem        ExternalUses.push_back(ExternalUser(VL[i], Insrt, FoundLane));
2058a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem      }
2059a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    }
206053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  }
206153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
206253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  return Vec;
206353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem}
206453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
20656a804acc4ae77c014e4ef97c37f8e720ef360394Matt ArsenaultValue *BoUpSLP::alreadyVectorized(ArrayRef<Value *> VL) const {
20666a804acc4ae77c014e4ef97c37f8e720ef360394Matt Arsenault  SmallDenseMap<Value*, int>::const_iterator Entry
20676a804acc4ae77c014e4ef97c37f8e720ef360394Matt Arsenault    = ScalarToTreeEntry.find(VL[0]);
20686a804acc4ae77c014e4ef97c37f8e720ef360394Matt Arsenault  if (Entry != ScalarToTreeEntry.end()) {
20696a804acc4ae77c014e4ef97c37f8e720ef360394Matt Arsenault    int Idx = Entry->second;
20706a804acc4ae77c014e4ef97c37f8e720ef360394Matt Arsenault    const TreeEntry *En = &VectorizableTree[Idx];
207162657090de3a5731bf644437701ccd78c247119fNadav Rotem    if (En->isSame(VL) && En->VectorizedValue)
207262657090de3a5731bf644437701ccd78c247119fNadav Rotem      return En->VectorizedValue;
207362657090de3a5731bf644437701ccd78c247119fNadav Rotem  }
2074dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  return nullptr;
207562657090de3a5731bf644437701ccd78c247119fNadav Rotem}
207662657090de3a5731bf644437701ccd78c247119fNadav Rotem
2077369cc938d261de3295eb70d0738f54ef1a82806cNadav RotemValue *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
2078369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (ScalarToTreeEntry.count(VL[0])) {
2079369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    int Idx = ScalarToTreeEntry[VL[0]];
2080369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    TreeEntry *E = &VectorizableTree[Idx];
2081369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    if (E->isSame(VL))
2082369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return vectorizeTree(E);
2083369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  }
208453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
208553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  Type *ScalarTy = VL[0]->getType();
208653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
208753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    ScalarTy = SI->getValueOperand()->getType();
208853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
208953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
2090369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  return Gather(VL, VecTy);
2091369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
2092369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
2093369cc938d261de3295eb70d0738f54ef1a82806cNadav RotemValue *BoUpSLP::vectorizeTree(TreeEntry *E) {
2094adb412daa41aef94a9f724dfd1ade9f579bb3a84Benjamin Kramer  IRBuilder<>::InsertPointGuard Guard(Builder);
209553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
2096369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (E->VectorizedValue) {
2097369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    DEBUG(dbgs() << "SLP: Diamond merged for " << *E->Scalars[0] << ".\n");
2098369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    return E->VectorizedValue;
209953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  }
210053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
21011b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault  Instruction *VL0 = cast<Instruction>(E->Scalars[0]);
21021b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault  Type *ScalarTy = VL0->getType();
21031b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault  if (StoreInst *SI = dyn_cast<StoreInst>(VL0))
2104369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    ScalarTy = SI->getValueOperand()->getType();
2105369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  VectorType *VecTy = VectorType::get(ScalarTy, E->Scalars.size());
210653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
2107369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (E->NeedToGather) {
21084b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault    setInsertPointAfterBundle(E->Scalars);
2109369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    return Gather(E->Scalars, VecTy);
2110369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  }
211137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
21124c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  const DataLayout &DL = F->getParent()->getDataLayout();
2113c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  unsigned Opcode = getSameOpcode(E->Scalars);
2114805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem
2115369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  switch (Opcode) {
2116369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::PHI: {
2117369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      PHINode *PH = dyn_cast<PHINode>(VL0);
2118d237e834a816399b7e1561dd4db2c501f5095712Justin Bogner      Builder.SetInsertPoint(PH->getParent()->getFirstNonPHI());
211979c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem      Builder.SetCurrentDebugLocation(PH->getDebugLoc());
2120369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues());
2121369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      E->VectorizedValue = NewPhi;
2122369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
2123353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem      // PHINodes may have multiple entries from the same block. We want to
2124353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem      // visit every block once.
2125353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem      SmallSet<BasicBlock*, 4> VisitedBBs;
2126353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem
2127369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
2128369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        ValueList Operands;
2129369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        BasicBlock *IBB = PH->getIncomingBlock(i);
2130369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
213137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        if (!VisitedBBs.insert(IBB).second) {
2132353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem          NewPhi->addIncoming(NewPhi->getIncomingValueForBlock(IBB), IBB);
2133353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem          continue;
2134353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem        }
2135353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem
2136369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        // Prepare the operand vector.
2137369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        for (unsigned j = 0; j < E->Scalars.size(); ++j)
2138369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem          Operands.push_back(cast<PHINode>(E->Scalars[j])->
2139369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem                             getIncomingValueForBlock(IBB));
2140369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
2141369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        Builder.SetInsertPoint(IBB->getTerminator());
214279c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem        Builder.SetCurrentDebugLocation(PH->getDebugLoc());
2143369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        Value *Vec = vectorizeTree(Operands);
2144369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        NewPhi->addIncoming(Vec, IBB);
2145369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
2146805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem
2147369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      assert(NewPhi->getNumIncomingValues() == PH->getNumIncomingValues() &&
2148369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem             "Invalid number of incoming values");
2149369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return NewPhi;
2150805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem    }
2151805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem
2152369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::ExtractElement: {
2153369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      if (CanReuseExtract(E->Scalars)) {
2154369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        Value *V = VL0->getOperand(0);
2155369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        E->VectorizedValue = V;
2156369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        return V;
2157369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
2158369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return Gather(E->Scalars, VecTy);
215953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    }
2160369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::ZExt:
2161369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::SExt:
2162369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FPToUI:
2163369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FPToSI:
2164369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FPExt:
2165369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::PtrToInt:
2166369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::IntToPtr:
2167369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::SIToFP:
2168369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::UIToFP:
2169369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Trunc:
2170369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FPTrunc:
2171369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::BitCast: {
2172369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      ValueList INVL;
2173369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      for (int i = 0, e = E->Scalars.size(); i < e; ++i)
2174369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        INVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
2175369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
21764b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault      setInsertPointAfterBundle(E->Scalars);
217779c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem
2178369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Value *InVec = vectorizeTree(INVL);
217962657090de3a5731bf644437701ccd78c247119fNadav Rotem
218062657090de3a5731bf644437701ccd78c247119fNadav Rotem      if (Value *V = alreadyVectorized(E->Scalars))
218162657090de3a5731bf644437701ccd78c247119fNadav Rotem        return V;
218262657090de3a5731bf644437701ccd78c247119fNadav Rotem
2183369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      CastInst *CI = dyn_cast<CastInst>(VL0);
2184369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
2185369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      E->VectorizedValue = V;
218637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ++NumVectorInstructions;
2187369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return V;
218853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    }
2189369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FCmp:
2190369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::ICmp: {
2191369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      ValueList LHSV, RHSV;
2192369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
2193369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        LHSV.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
2194369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        RHSV.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
2195369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
219653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
21974b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault      setInsertPointAfterBundle(E->Scalars);
219879c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem
2199369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Value *L = vectorizeTree(LHSV);
2200369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Value *R = vectorizeTree(RHSV);
220162657090de3a5731bf644437701ccd78c247119fNadav Rotem
220262657090de3a5731bf644437701ccd78c247119fNadav Rotem      if (Value *V = alreadyVectorized(E->Scalars))
220362657090de3a5731bf644437701ccd78c247119fNadav Rotem        return V;
220453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
22050c7f116bb6950ef819323d855415b2f2b0aad987Pirama Arumuga Nainar      CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
220662657090de3a5731bf644437701ccd78c247119fNadav Rotem      Value *V;
2207369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      if (Opcode == Instruction::FCmp)
2208369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        V = Builder.CreateFCmp(P0, L, R);
2209369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      else
2210369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        V = Builder.CreateICmp(P0, L, R);
221153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
2212369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      E->VectorizedValue = V;
221337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ++NumVectorInstructions;
2214369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return V;
221553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    }
2216369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Select: {
2217369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      ValueList TrueVec, FalseVec, CondVec;
2218369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
2219369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        CondVec.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
2220369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        TrueVec.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
2221369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        FalseVec.push_back(cast<Instruction>(E->Scalars[i])->getOperand(2));
2222369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
222353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
22244b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault      setInsertPointAfterBundle(E->Scalars);
222579c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem
2226369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Value *Cond = vectorizeTree(CondVec);
2227369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Value *True = vectorizeTree(TrueVec);
2228369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Value *False = vectorizeTree(FalseVec);
222962657090de3a5731bf644437701ccd78c247119fNadav Rotem
223062657090de3a5731bf644437701ccd78c247119fNadav Rotem      if (Value *V = alreadyVectorized(E->Scalars))
223162657090de3a5731bf644437701ccd78c247119fNadav Rotem        return V;
223257aa3aad33b50583d5a82735777d0f0dc03ff122Matt Arsenault
2233369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Value *V = Builder.CreateSelect(Cond, True, False);
2234369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      E->VectorizedValue = V;
223537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ++NumVectorInstructions;
2236369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return V;
223753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    }
2238369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Add:
2239369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FAdd:
2240369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Sub:
2241369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FSub:
2242369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Mul:
2243369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FMul:
2244369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::UDiv:
2245369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::SDiv:
2246369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FDiv:
2247369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::URem:
2248369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::SRem:
2249369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FRem:
2250369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Shl:
2251369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::LShr:
2252369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::AShr:
2253369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::And:
2254369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Or:
2255369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Xor: {
2256369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      ValueList LHSVL, RHSVL;
2257af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer      if (isa<BinaryOperator>(VL0) && VL0->isCommutative())
2258af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer        reorderInputsAccordingToOpcode(E->Scalars, LHSVL, RHSVL);
2259af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer      else
2260af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer        for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
2261af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer          LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
2262af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer          RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
2263af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer        }
226453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
22654b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault      setInsertPointAfterBundle(E->Scalars);
226679c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem
2267369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Value *LHS = vectorizeTree(LHSVL);
2268369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Value *RHS = vectorizeTree(RHSVL);
226953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
2270369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      if (LHS == RHS && isa<Instruction>(LHS)) {
2271369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        assert((VL0->getOperand(0) == VL0->getOperand(1)) && "Invalid order");
2272369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
227353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
227462657090de3a5731bf644437701ccd78c247119fNadav Rotem      if (Value *V = alreadyVectorized(E->Scalars))
227562657090de3a5731bf644437701ccd78c247119fNadav Rotem        return V;
227662657090de3a5731bf644437701ccd78c247119fNadav Rotem
2277369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
2278369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS, RHS);
2279369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      E->VectorizedValue = V;
228037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      propagateIRFlags(E->VectorizedValue, E->Scalars);
228137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ++NumVectorInstructions;
2282fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling
2283fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling      if (Instruction *I = dyn_cast<Instruction>(V))
2284fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling        return propagateMetadata(I, E->Scalars);
2285fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling
2286369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return V;
2287369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
2288369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Load: {
2289369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      // Loads are inserted at the head of the tree because we don't want to
2290369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      // sink them all the way down past store instructions.
22914b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault      setInsertPointAfterBundle(E->Scalars);
229279c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem
2293369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      LoadInst *LI = cast<LoadInst>(VL0);
229437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      Type *ScalarLoadTy = LI->getType();
22959e93ba225f9f4d0cfab175b73d2a744d01104451Matt Arsenault      unsigned AS = LI->getPointerAddressSpace();
22969e93ba225f9f4d0cfab175b73d2a744d01104451Matt Arsenault
22979e93ba225f9f4d0cfab175b73d2a744d01104451Matt Arsenault      Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
22989e93ba225f9f4d0cfab175b73d2a744d01104451Matt Arsenault                                            VecTy->getPointerTo(AS));
229937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
230037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // The pointer operand uses an in-tree scalar so we add the new BitCast to
230137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // ExternalUses list to make sure that an extract will be generated in the
230237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // future.
230337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (ScalarToTreeEntry.count(LI->getPointerOperand()))
230437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        ExternalUses.push_back(
230537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            ExternalUser(LI->getPointerOperand(), cast<User>(VecPtr), 0));
230637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
2307369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      unsigned Alignment = LI->getAlignment();
2308369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      LI = Builder.CreateLoad(VecPtr);
23094c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar      if (!Alignment) {
23104c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar        Alignment = DL.getABITypeAlignment(ScalarLoadTy);
23114c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar      }
2312369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      LI->setAlignment(Alignment);
2313369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      E->VectorizedValue = LI;
231437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ++NumVectorInstructions;
2315fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling      return propagateMetadata(LI, E->Scalars);
2316369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
2317369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Store: {
2318369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      StoreInst *SI = cast<StoreInst>(VL0);
2319369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      unsigned Alignment = SI->getAlignment();
23209e93ba225f9f4d0cfab175b73d2a744d01104451Matt Arsenault      unsigned AS = SI->getPointerAddressSpace();
2321369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
2322369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      ValueList ValueOp;
2323369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      for (int i = 0, e = E->Scalars.size(); i < e; ++i)
2324369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        ValueOp.push_back(cast<StoreInst>(E->Scalars[i])->getValueOperand());
2325369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
23264b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault      setInsertPointAfterBundle(E->Scalars);
232779c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem
2328369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Value *VecValue = vectorizeTree(ValueOp);
23299e93ba225f9f4d0cfab175b73d2a744d01104451Matt Arsenault      Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
23309e93ba225f9f4d0cfab175b73d2a744d01104451Matt Arsenault                                            VecTy->getPointerTo(AS));
2331369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      StoreInst *S = Builder.CreateStore(VecValue, VecPtr);
233237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
233337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // The pointer operand uses an in-tree scalar so we add the new BitCast to
233437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // ExternalUses list to make sure that an extract will be generated in the
233537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // future.
233637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (ScalarToTreeEntry.count(SI->getPointerOperand()))
233737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        ExternalUses.push_back(
233837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            ExternalUser(SI->getPointerOperand(), cast<User>(VecPtr), 0));
233937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
23404c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar      if (!Alignment) {
23414c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar        Alignment = DL.getABITypeAlignment(SI->getValueOperand()->getType());
23424c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar      }
2343369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      S->setAlignment(Alignment);
2344369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      E->VectorizedValue = S;
234537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ++NumVectorInstructions;
2346fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling      return propagateMetadata(S, E->Scalars);
2347369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
2348c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    case Instruction::GetElementPtr: {
2349c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      setInsertPointAfterBundle(E->Scalars);
2350c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
2351c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      ValueList Op0VL;
2352c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      for (int i = 0, e = E->Scalars.size(); i < e; ++i)
2353c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        Op0VL.push_back(cast<GetElementPtrInst>(E->Scalars[i])->getOperand(0));
2354c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
2355c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Value *Op0 = vectorizeTree(Op0VL);
2356c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
2357c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      std::vector<Value *> OpVecs;
2358c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      for (int j = 1, e = cast<GetElementPtrInst>(VL0)->getNumOperands(); j < e;
2359c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines           ++j) {
2360c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        ValueList OpVL;
2361c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        for (int i = 0, e = E->Scalars.size(); i < e; ++i)
2362c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          OpVL.push_back(cast<GetElementPtrInst>(E->Scalars[i])->getOperand(j));
2363c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
2364c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        Value *OpVec = vectorizeTree(OpVL);
2365c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        OpVecs.push_back(OpVec);
2366c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      }
2367c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
23684c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar      Value *V = Builder.CreateGEP(
23694c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar          cast<GetElementPtrInst>(VL0)->getSourceElementType(), Op0, OpVecs);
2370c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      E->VectorizedValue = V;
237137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ++NumVectorInstructions;
2372c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
2373c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      if (Instruction *I = dyn_cast<Instruction>(V))
2374c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        return propagateMetadata(I, E->Scalars);
2375c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
2376c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      return V;
2377c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    }
237836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    case Instruction::Call: {
237936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      CallInst *CI = cast<CallInst>(VL0);
238036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      setInsertPointAfterBundle(E->Scalars);
2381c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Function *FI;
2382c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Intrinsic::ID IID  = Intrinsic::not_intrinsic;
238337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      Value *ScalarArg = nullptr;
2384c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      if (CI && (FI = CI->getCalledFunction())) {
2385c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        IID = (Intrinsic::ID) FI->getIntrinsicID();
2386c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      }
238736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      std::vector<Value *> OpVecs;
238836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) {
238936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        ValueList OpVL;
2390c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        // ctlz,cttz and powi are special intrinsics whose second argument is
2391c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        // a scalar. This argument should not be vectorized.
2392c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        if (hasVectorInstrinsicScalarOpd(IID, 1) && j == 1) {
2393c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          CallInst *CEI = cast<CallInst>(E->Scalars[0]);
239437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          ScalarArg = CEI->getArgOperand(j);
2395c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          OpVecs.push_back(CEI->getArgOperand(j));
2396c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          continue;
2397c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        }
239836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
239936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          CallInst *CEI = cast<CallInst>(E->Scalars[i]);
240036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          OpVL.push_back(CEI->getArgOperand(j));
240136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        }
240236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
240336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        Value *OpVec = vectorizeTree(OpVL);
240436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        DEBUG(dbgs() << "SLP: OpVec[" << j << "]: " << *OpVec << "\n");
240536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        OpVecs.push_back(OpVec);
240636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      }
240736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
240836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      Module *M = F->getParent();
2409dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
241036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      Type *Tys[] = { VectorType::get(CI->getType(), E->Scalars.size()) };
241136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      Function *CF = Intrinsic::getDeclaration(M, ID, Tys);
241236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      Value *V = Builder.CreateCall(CF, OpVecs);
241337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
241437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // The scalar argument uses an in-tree scalar so we add the new vectorized
241537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // call to ExternalUses list to make sure that an extract will be
241637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // generated in the future.
241737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (ScalarArg && ScalarToTreeEntry.count(ScalarArg))
241837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        ExternalUses.push_back(ExternalUser(ScalarArg, cast<User>(V), 0));
241937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
242036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      E->VectorizedValue = V;
242137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ++NumVectorInstructions;
242236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      return V;
242336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    }
2424c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    case Instruction::ShuffleVector: {
2425c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      ValueList LHSVL, RHSVL;
2426ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      assert(isa<BinaryOperator>(VL0) && "Invalid Shuffle Vector Operand");
2427ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      reorderAltShuffleOperands(E->Scalars, LHSVL, RHSVL);
2428c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      setInsertPointAfterBundle(E->Scalars);
2429c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
2430c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Value *LHS = vectorizeTree(LHSVL);
2431c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Value *RHS = vectorizeTree(RHSVL);
2432c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
2433c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      if (Value *V = alreadyVectorized(E->Scalars))
2434c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        return V;
2435c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
2436c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      // Create a vector of LHS op1 RHS
2437c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      BinaryOperator *BinOp0 = cast<BinaryOperator>(VL0);
2438c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Value *V0 = Builder.CreateBinOp(BinOp0->getOpcode(), LHS, RHS);
2439c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
2440c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      // Create a vector of LHS op2 RHS
2441c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Instruction *VL1 = cast<Instruction>(E->Scalars[1]);
2442c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      BinaryOperator *BinOp1 = cast<BinaryOperator>(VL1);
2443c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Value *V1 = Builder.CreateBinOp(BinOp1->getOpcode(), LHS, RHS);
2444c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
244537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // Create shuffle to take alternate operations from the vector.
244637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // Also, gather up odd and even scalar ops to propagate IR flags to
244737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // each vector operation.
244837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ValueList OddScalars, EvenScalars;
2449c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      unsigned e = E->Scalars.size();
245037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      SmallVector<Constant *, 8> Mask(e);
2451c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      for (unsigned i = 0; i < e; ++i) {
245237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        if (i & 1) {
2453c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          Mask[i] = Builder.getInt32(e + i);
245437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          OddScalars.push_back(E->Scalars[i]);
245537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        } else {
2456c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          Mask[i] = Builder.getInt32(i);
245737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          EvenScalars.push_back(E->Scalars[i]);
245837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        }
2459c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      }
2460c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
2461c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Value *ShuffleMask = ConstantVector::get(Mask);
246237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      propagateIRFlags(V0, EvenScalars);
246337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      propagateIRFlags(V1, OddScalars);
2464c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
2465c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Value *V = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2466c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      E->VectorizedValue = V;
246737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ++NumVectorInstructions;
2468c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      if (Instruction *I = dyn_cast<Instruction>(V))
2469c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        return propagateMetadata(I, E->Scalars);
2470c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
2471c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      return V;
2472c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    }
2473369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    default:
2474369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    llvm_unreachable("unknown inst");
247553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  }
2476dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  return nullptr;
2477369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
247853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
2479a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold SchwaighoferValue *BoUpSLP::vectorizeTree() {
248037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
248137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // All blocks must be scheduled before any instructions are inserted.
248237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  for (auto &BSIter : BlocksSchedules) {
248337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    scheduleBlock(BSIter.second.get());
248437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
248537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
248629b741942807bc4c0441d98f1330b70446794b88Nadav Rotem  Builder.SetInsertPoint(F->getEntryBlock().begin());
2487369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  vectorizeTree(&VectorizableTree[0]);
248853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
2489a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size() << " values .\n");
2490a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
2491a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  // Extract all of the elements with the external uses.
2492a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  for (UserList::iterator it = ExternalUses.begin(), e = ExternalUses.end();
2493a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem       it != e; ++it) {
2494a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    Value *Scalar = it->Scalar;
2495a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    llvm::User *User = it->User;
2496523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem
2497523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem    // Skip users that we already RAUW. This happens when one instruction
2498523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem    // has multiple uses of the same value.
249936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (std::find(Scalar->user_begin(), Scalar->user_end(), User) ==
250036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        Scalar->user_end())
2501a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem      continue;
2502a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    assert(ScalarToTreeEntry.count(Scalar) && "Invalid scalar");
2503a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
2504a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    int Idx = ScalarToTreeEntry[Scalar];
2505a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    TreeEntry *E = &VectorizableTree[Idx];
2506a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    assert(!E->NeedToGather && "Extracting from a gather list");
2507a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
2508a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    Value *Vec = E->VectorizedValue;
2509a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    assert(Vec && "Can't find vectorizable value");
2510a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
2511f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem    Value *Lane = Builder.getInt32(it->Lane);
2512a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    // Generate extracts for out-of-tree users.
2513a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    // Find the insertion point for the extractelement lane.
251436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (isa<Instruction>(Vec)){
2515523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem      if (PHINode *PH = dyn_cast<PHINode>(User)) {
2516523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem        for (int i = 0, e = PH->getNumIncomingValues(); i != e; ++i) {
2517523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem          if (PH->getIncomingValue(i) == Scalar) {
2518f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem            Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator());
2519f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem            Value *Ex = Builder.CreateExtractElement(Vec, Lane);
2520a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling            CSEBlocks.insert(PH->getIncomingBlock(i));
2521f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem            PH->setOperand(i, Ex);
2522523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem          }
2523523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem        }
2524523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem      } else {
2525f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem        Builder.SetInsertPoint(cast<Instruction>(User));
2526f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem        Value *Ex = Builder.CreateExtractElement(Vec, Lane);
2527a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling        CSEBlocks.insert(cast<Instruction>(User)->getParent());
2528f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem        User->replaceUsesOfWith(Scalar, Ex);
2529523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem     }
2530a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    } else {
2531f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem      Builder.SetInsertPoint(F->getEntryBlock().begin());
2532f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem      Value *Ex = Builder.CreateExtractElement(Vec, Lane);
2533a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling      CSEBlocks.insert(&F->getEntryBlock());
2534f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem      User->replaceUsesOfWith(Scalar, Ex);
2535a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    }
2536a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
2537a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    DEBUG(dbgs() << "SLP: Replaced:" << *User << ".\n");
2538a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  }
2539a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
2540369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // For each vectorized value:
2541369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  for (int EIdx = 0, EE = VectorizableTree.size(); EIdx < EE; ++EIdx) {
2542369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    TreeEntry *Entry = &VectorizableTree[EIdx];
254353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
2544369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    // For each lane:
2545369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
2546369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Value *Scalar = Entry->Scalars[Lane];
2547369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      // No need to handle users of gathered values.
2548369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      if (Entry->NeedToGather)
2549369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        continue;
255053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
2551ace9ed50b549667eff8e19eb76f7714a3a6161aeNadav Rotem      assert(Entry->VectorizedValue && "Can't find vectorizable value");
2552ac2678684689448d1c88f644f3fa88aa4f93997cNadav Rotem
2553369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Type *Ty = Scalar->getType();
2554369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      if (!Ty->isVoidTy()) {
255536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#ifndef NDEBUG
255636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        for (User *U : Scalar->users()) {
255736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n");
2558a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
255936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          assert((ScalarToTreeEntry.count(U) ||
2560dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                  // It is legal to replace users in the ignorelist by undef.
2561dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                  (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), U) !=
2562dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                   UserIgnoreList.end())) &&
2563369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem                 "Replacing out-of-tree value with undef");
2564369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        }
256536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#endif
2566369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        Value *Undef = UndefValue::get(Ty);
2567369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        Scalar->replaceAllUsesWith(Undef);
2568369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
2569369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n");
2570ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      eraseInstruction(cast<Instruction>(Scalar));
2571ac2678684689448d1c88f644f3fa88aa4f93997cNadav Rotem    }
2572ac2678684689448d1c88f644f3fa88aa4f93997cNadav Rotem  }
2573ac2678684689448d1c88f644f3fa88aa4f93997cNadav Rotem
2574c7ffbc019fdc6ae5265f1841eaabae34e301f59bNadav Rotem  Builder.ClearInsertionPoint();
2575a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
2576a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  return VectorizableTree[0].VectorizedValue;
257753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem}
257853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
2579369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemvoid BoUpSLP::optimizeGatherSequence() {
2580369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size()
2581369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        << " gather sequences instructions.\n");
25826959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem  // LICM InsertElementInst sequences.
258353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  for (SetVector<Instruction *>::iterator it = GatherSeq.begin(),
25846959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem       e = GatherSeq.end(); it != e; ++it) {
25856959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem    InsertElementInst *Insert = dyn_cast<InsertElementInst>(*it);
258653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
258753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    if (!Insert)
258853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem      continue;
258953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
259053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    // Check if this block is inside a loop.
25916959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem    Loop *L = LI->getLoopFor(Insert->getParent());
259253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    if (!L)
25936959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem      continue;
259453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
259553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    // Check if it has a preheader.
259653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    BasicBlock *PreHeader = L->getLoopPreheader();
259753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    if (!PreHeader)
259829acf7e03af9b5524daa1e7523e0296cc766ff24Nadav Rotem      continue;
259953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
260053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    // If the vector or the element that we insert into it are
260153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    // instructions that are defined in this basic block then we can't
260253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    // hoist this instruction.
260353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    Instruction *CurrVec = dyn_cast<Instruction>(Insert->getOperand(0));
260453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    Instruction *NewElem = dyn_cast<Instruction>(Insert->getOperand(1));
260553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    if (CurrVec && L->contains(CurrVec))
260653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem      continue;
260753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    if (NewElem && L->contains(NewElem))
260853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem      continue;
260953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
261053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    // We can hoist this instruction. Move it to the pre-header.
26116959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem    Insert->moveBefore(PreHeader->getTerminator());
26126959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem  }
26136959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem
2614dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // Make a list of all reachable blocks in our CSE queue.
2615dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  SmallVector<const DomTreeNode *, 8> CSEWorkList;
2616dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  CSEWorkList.reserve(CSEBlocks.size());
2617dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  for (BasicBlock *BB : CSEBlocks)
2618dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    if (DomTreeNode *N = DT->getNode(BB)) {
2619dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      assert(DT->isReachableFromEntry(N));
2620dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      CSEWorkList.push_back(N);
2621dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    }
2622dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
26230c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer  // Sort blocks by domination. This ensures we visit a block after all blocks
26240c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer  // dominating it are visited.
262536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  std::stable_sort(CSEWorkList.begin(), CSEWorkList.end(),
2626dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                   [this](const DomTreeNode *A, const DomTreeNode *B) {
262736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return DT->properlyDominates(A, B);
262836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  });
26290c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer
26306959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem  // Perform O(N^2) search over the gather sequences and merge identical
26316959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem  // instructions. TODO: We can further optimize this scan if we split the
26326959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem  // instructions into different buckets based on the insert lane.
26330c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer  SmallVector<Instruction *, 16> Visited;
2634dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  for (auto I = CSEWorkList.begin(), E = CSEWorkList.end(); I != E; ++I) {
263536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    assert((I == CSEWorkList.begin() || !DT->dominates(*I, *std::prev(I))) &&
26360c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer           "Worklist not sorted properly!");
2637dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    BasicBlock *BB = (*I)->getBlock();
26380c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer    // For all instructions in blocks containing gather sequences:
26390c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer    for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) {
26400c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer      Instruction *In = it++;
2641a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling      if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In))
26426959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem        continue;
26436959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem
264429acf7e03af9b5524daa1e7523e0296cc766ff24Nadav Rotem      // Check if we can replace this instruction with any of the
264529acf7e03af9b5524daa1e7523e0296cc766ff24Nadav Rotem      // visited instructions.
26460c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer      for (SmallVectorImpl<Instruction *>::iterator v = Visited.begin(),
26470c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer                                                    ve = Visited.end();
26480c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer           v != ve; ++v) {
2649523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem        if (In->isIdenticalTo(*v) &&
2650523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem            DT->dominates((*v)->getParent(), In->getParent())) {
2651523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem          In->replaceAllUsesWith(*v);
2652ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          eraseInstruction(In);
2653dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          In = nullptr;
26546959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem          break;
26556959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem        }
26566959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem      }
26570c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer      if (In) {
26580c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer        assert(std::find(Visited.begin(), Visited.end(), In) == Visited.end());
26590c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer        Visited.push_back(In);
26600c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer      }
26616959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem    }
266253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  }
2663a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling  CSEBlocks.clear();
2664a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling  GatherSeq.clear();
266553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem}
266653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
266737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// Groups the instructions to a bundle (which is then a single scheduling entity)
266837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// and schedules instructions until the bundle gets ready.
266937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesbool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
2670ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                                                 BoUpSLP *SLP) {
267137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  if (isa<PHINode>(VL[0]))
267237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    return true;
267337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
267437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // Initialize the instruction bundle.
267537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  Instruction *OldScheduleEnd = ScheduleEnd;
267637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  ScheduleData *PrevInBundle = nullptr;
267737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  ScheduleData *Bundle = nullptr;
267837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  bool ReSchedule = false;
267937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  DEBUG(dbgs() << "SLP:  bundle: " << *VL[0] << "\n");
268037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  for (Value *V : VL) {
268137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    extendSchedulingRegion(V);
268237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *BundleMember = getScheduleData(V);
268337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    assert(BundleMember &&
268437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines           "no ScheduleData for bundle member (maybe not in same basic block)");
268537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (BundleMember->IsScheduled) {
268637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // A bundle member was scheduled as single instruction before and now
268737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // needs to be scheduled as part of the bundle. We just get rid of the
268837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // existing schedule.
268937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      DEBUG(dbgs() << "SLP:  reset schedule because " << *BundleMember
269037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                   << " was already scheduled\n");
269137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ReSchedule = true;
269237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
269337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    assert(BundleMember->isSchedulingEntity() &&
269437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines           "bundle member already part of other bundle");
269537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (PrevInBundle) {
269637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      PrevInBundle->NextInBundle = BundleMember;
269737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    } else {
269837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      Bundle = BundleMember;
269937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
270037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    BundleMember->UnscheduledDepsInBundle = 0;
270137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    Bundle->UnscheduledDepsInBundle += BundleMember->UnscheduledDeps;
270237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
270337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // Group the instructions to a bundle.
270437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    BundleMember->FirstInBundle = Bundle;
270537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    PrevInBundle = BundleMember;
270637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
270737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  if (ScheduleEnd != OldScheduleEnd) {
270837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // The scheduling region got new instructions at the lower end (or it is a
270937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // new region for the first bundle). This makes it necessary to
271037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // recalculate all dependencies.
271137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // It is seldom that this needs to be done a second time after adding the
271237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // initial bundle to the region.
271337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) {
271437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ScheduleData *SD = getScheduleData(I);
271537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      SD->clearDependencies();
271637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
271737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ReSchedule = true;
271837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
271937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  if (ReSchedule) {
272037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    resetSchedule();
272137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    initialFillReadyList(ReadyInsts);
272237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
272337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
272437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle << " in block "
272537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines               << BB->getName() << "\n");
272637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
2727ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  calculateDependencies(Bundle, true, SLP);
272837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
272937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // Now try to schedule the new bundle. As soon as the bundle is "ready" it
273037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // means that there are no cyclic dependencies and we can schedule it.
273137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // Note that's important that we don't "schedule" the bundle yet (see
273237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // cancelScheduling).
273337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  while (!Bundle->isReady() && !ReadyInsts.empty()) {
273437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
273537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *pickedSD = ReadyInsts.back();
273637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ReadyInsts.pop_back();
273737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
273837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (pickedSD->isSchedulingEntity() && pickedSD->isReady()) {
273937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      schedule(pickedSD, ReadyInsts);
274037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
274137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
274237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  return Bundle->isReady();
274337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
274437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
274537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesvoid BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL) {
274637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  if (isa<PHINode>(VL[0]))
274737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    return;
274837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
274937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  ScheduleData *Bundle = getScheduleData(VL[0]);
275037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  DEBUG(dbgs() << "SLP:  cancel scheduling of " << *Bundle << "\n");
275137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  assert(!Bundle->IsScheduled &&
275237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines         "Can't cancel bundle which is already scheduled");
275337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  assert(Bundle->isSchedulingEntity() && Bundle->isPartOfBundle() &&
275437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines         "tried to unbundle something which is not a bundle");
275537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
275637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // Un-bundle: make single instructions out of the bundle.
275737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  ScheduleData *BundleMember = Bundle;
275837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  while (BundleMember) {
275937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    assert(BundleMember->FirstInBundle == Bundle && "corrupt bundle links");
276037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    BundleMember->FirstInBundle = BundleMember;
276137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *Next = BundleMember->NextInBundle;
276237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    BundleMember->NextInBundle = nullptr;
276337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    BundleMember->UnscheduledDepsInBundle = BundleMember->UnscheduledDeps;
276437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (BundleMember->UnscheduledDepsInBundle == 0) {
276537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ReadyInsts.insert(BundleMember);
276637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
276737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    BundleMember = Next;
276837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
276937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
277037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
277137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesvoid BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) {
277237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  if (getScheduleData(V))
277337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    return;
277437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  Instruction *I = dyn_cast<Instruction>(V);
277537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  assert(I && "bundle member must be an instruction");
277637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  assert(!isa<PHINode>(I) && "phi nodes don't need to be scheduled");
277737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  if (!ScheduleStart) {
277837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // It's the first instruction in the new region.
277937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    initScheduleData(I, I->getNextNode(), nullptr, nullptr);
278037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleStart = I;
278137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleEnd = I->getNextNode();
278237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    assert(ScheduleEnd && "tried to vectorize a TerminatorInst?");
278337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    DEBUG(dbgs() << "SLP:  initialize schedule region to " << *I << "\n");
278437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    return;
278537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
278637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // Search up and down at the same time, because we don't know if the new
278737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // instruction is above or below the existing scheduling region.
278837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  BasicBlock::reverse_iterator UpIter(ScheduleStart);
278937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  BasicBlock::reverse_iterator UpperEnd = BB->rend();
279037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  BasicBlock::iterator DownIter(ScheduleEnd);
279137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  BasicBlock::iterator LowerEnd = BB->end();
279237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  for (;;) {
279337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (UpIter != UpperEnd) {
279437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (&*UpIter == I) {
279537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        initScheduleData(I, ScheduleStart, nullptr, FirstLoadStoreInRegion);
279637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        ScheduleStart = I;
279737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        DEBUG(dbgs() << "SLP:  extend schedule region start to " << *I << "\n");
279837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        return;
279937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
280037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      UpIter++;
280137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
280237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (DownIter != LowerEnd) {
280337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (&*DownIter == I) {
280437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        initScheduleData(ScheduleEnd, I->getNextNode(), LastLoadStoreInRegion,
280537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                         nullptr);
280637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        ScheduleEnd = I->getNextNode();
280737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        assert(ScheduleEnd && "tried to vectorize a TerminatorInst?");
280837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        DEBUG(dbgs() << "SLP:  extend schedule region end to " << *I << "\n");
280937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        return;
281037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
281137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      DownIter++;
281237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
281337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    assert((UpIter != UpperEnd || DownIter != LowerEnd) &&
281437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines           "instruction not found in block");
281537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
281637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
281737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
281837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesvoid BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI,
281937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                                                Instruction *ToI,
282037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                                                ScheduleData *PrevLoadStore,
282137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                                                ScheduleData *NextLoadStore) {
282237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  ScheduleData *CurrentLoadStore = PrevLoadStore;
282337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  for (Instruction *I = FromI; I != ToI; I = I->getNextNode()) {
282437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *SD = ScheduleDataMap[I];
282537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (!SD) {
282637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // Allocate a new ScheduleData for the instruction.
282737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (ChunkPos >= ChunkSize) {
282837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        ScheduleDataChunks.push_back(
282937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            llvm::make_unique<ScheduleData[]>(ChunkSize));
283037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        ChunkPos = 0;
283137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
283237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      SD = &(ScheduleDataChunks.back()[ChunkPos++]);
283337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ScheduleDataMap[I] = SD;
283437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      SD->Inst = I;
283537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
283637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    assert(!isInSchedulingRegion(SD) &&
283737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines           "new ScheduleData already in scheduling region");
283837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    SD->init(SchedulingRegionID);
283937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
284037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (I->mayReadOrWriteMemory()) {
284137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // Update the linked list of memory accessing instructions.
284237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (CurrentLoadStore) {
284337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        CurrentLoadStore->NextLoadStore = SD;
284437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      } else {
284537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        FirstLoadStoreInRegion = SD;
284637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
284737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      CurrentLoadStore = SD;
284837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
284937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
285037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  if (NextLoadStore) {
285137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (CurrentLoadStore)
285237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      CurrentLoadStore->NextLoadStore = NextLoadStore;
285337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  } else {
285437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    LastLoadStoreInRegion = CurrentLoadStore;
285537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
285637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
285737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
285837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesvoid BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
285937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                                                     bool InsertInReadyList,
2860ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                                                     BoUpSLP *SLP) {
286137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  assert(SD->isSchedulingEntity());
286237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
286337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  SmallVector<ScheduleData *, 10> WorkList;
286437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  WorkList.push_back(SD);
286537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
286637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  while (!WorkList.empty()) {
286737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *SD = WorkList.back();
286837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    WorkList.pop_back();
286937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
287037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *BundleMember = SD;
287137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    while (BundleMember) {
287237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      assert(isInSchedulingRegion(BundleMember));
287337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (!BundleMember->hasValidDependencies()) {
287437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
287537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        DEBUG(dbgs() << "SLP:       update deps of " << *BundleMember << "\n");
287637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        BundleMember->Dependencies = 0;
287737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        BundleMember->resetUnscheduledDeps();
287837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
287937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        // Handle def-use chain dependencies.
288037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        for (User *U : BundleMember->Inst->users()) {
288137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          if (isa<Instruction>(U)) {
288237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            ScheduleData *UseSD = getScheduleData(U);
288337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) {
288437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines              BundleMember->Dependencies++;
288537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines              ScheduleData *DestBundle = UseSD->FirstInBundle;
288637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines              if (!DestBundle->IsScheduled) {
288737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                BundleMember->incrementUnscheduledDeps(1);
288837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines              }
288937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines              if (!DestBundle->hasValidDependencies()) {
289037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                WorkList.push_back(DestBundle);
289137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines              }
289237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            }
289337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          } else {
289437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            // I'm not sure if this can ever happen. But we need to be safe.
289537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            // This lets the instruction/bundle never be scheduled and eventally
289637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            // disable vectorization.
289737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            BundleMember->Dependencies++;
289837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            BundleMember->incrementUnscheduledDeps(1);
289937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          }
290037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        }
290137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
290237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        // Handle the memory dependencies.
290337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        ScheduleData *DepDest = BundleMember->NextLoadStore;
290437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        if (DepDest) {
2905ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          Instruction *SrcInst = BundleMember->Inst;
2906ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          AliasAnalysis::Location SrcLoc = getLocation(SrcInst, SLP->AA);
290737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          bool SrcMayWrite = BundleMember->Inst->mayWriteToMemory();
2908ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          unsigned numAliased = 0;
2909ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          unsigned DistToSrc = 1;
291037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
291137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          while (DepDest) {
291237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            assert(isInSchedulingRegion(DepDest));
2913ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
2914ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            // We have two limits to reduce the complexity:
2915ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            // 1) AliasedCheckLimit: It's a small limit to reduce calls to
2916ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            //    SLP->isAliased (which is the expensive part in this loop).
2917ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            // 2) MaxMemDepDistance: It's for very large blocks and it aborts
2918ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            //    the whole loop (even if the loop is fast, it's quadratic).
2919ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            //    It's important for the loop break condition (see below) to
2920ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            //    check this limit even between two read-only instructions.
2921ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            if (DistToSrc >= MaxMemDepDistance ||
2922ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                    ((SrcMayWrite || DepDest->Inst->mayWriteToMemory()) &&
2923ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                     (numAliased >= AliasedCheckLimit ||
2924ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                      SLP->isAliased(SrcLoc, SrcInst, DepDest->Inst)))) {
2925ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
2926ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines              // We increment the counter only if the locations are aliased
2927ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines              // (instead of counting all alias checks). This gives a better
2928ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines              // balance between reduced runtime and accurate dependencies.
2929ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines              numAliased++;
2930ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
2931ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines              DepDest->MemoryDependencies.push_back(BundleMember);
2932ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines              BundleMember->Dependencies++;
2933ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines              ScheduleData *DestBundle = DepDest->FirstInBundle;
2934ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines              if (!DestBundle->IsScheduled) {
2935ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                BundleMember->incrementUnscheduledDeps(1);
2936ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines              }
2937ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines              if (!DestBundle->hasValidDependencies()) {
2938ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                WorkList.push_back(DestBundle);
293937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines              }
294037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            }
294137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            DepDest = DepDest->NextLoadStore;
2942ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
2943ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            // Example, explaining the loop break condition: Let's assume our
2944ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            // starting instruction is i0 and MaxMemDepDistance = 3.
2945ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            //
2946ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            //                      +--------v--v--v
2947ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            //             i0,i1,i2,i3,i4,i5,i6,i7,i8
2948ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            //             +--------^--^--^
2949ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            //
2950ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            // MaxMemDepDistance let us stop alias-checking at i3 and we add
2951ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            // dependencies from i0 to i3,i4,.. (even if they are not aliased).
2952ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            // Previously we already added dependencies from i3 to i6,i7,i8
2953ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            // (because of MaxMemDepDistance). As we added a dependency from
2954ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            // i0 to i3, we have transitive dependencies from i0 to i6,i7,i8
2955ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            // and we can abort this loop at i6.
2956ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            if (DistToSrc >= 2 * MaxMemDepDistance)
2957ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                break;
2958ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            DistToSrc++;
295937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          }
296037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        }
296137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
296237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      BundleMember = BundleMember->NextInBundle;
296337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
296437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (InsertInReadyList && SD->isReady()) {
296537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ReadyInsts.push_back(SD);
296637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      DEBUG(dbgs() << "SLP:     gets ready on update: " << *SD->Inst << "\n");
296737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
296837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
296937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
297037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
297137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesvoid BoUpSLP::BlockScheduling::resetSchedule() {
297237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  assert(ScheduleStart &&
297337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines         "tried to reset schedule on block which has not been scheduled");
297437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  for (Instruction *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) {
297537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *SD = getScheduleData(I);
297637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    assert(isInSchedulingRegion(SD));
297737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    SD->IsScheduled = false;
297837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    SD->resetUnscheduledDeps();
297937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
298037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  ReadyInsts.clear();
298137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
298237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
298337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesvoid BoUpSLP::scheduleBlock(BlockScheduling *BS) {
298437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
298537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  if (!BS->ScheduleStart)
298637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    return;
298737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
298837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  DEBUG(dbgs() << "SLP: schedule block " << BS->BB->getName() << "\n");
298937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
299037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  BS->resetSchedule();
299137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
299237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // For the real scheduling we use a more sophisticated ready-list: it is
299337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // sorted by the original instruction location. This lets the final schedule
299437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // be as  close as possible to the original instruction order.
299537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  struct ScheduleDataCompare {
299637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    bool operator()(ScheduleData *SD1, ScheduleData *SD2) {
299737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      return SD2->SchedulingPriority < SD1->SchedulingPriority;
299837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
299937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  };
300037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  std::set<ScheduleData *, ScheduleDataCompare> ReadyInsts;
300137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
300237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // Ensure that all depencency data is updated and fill the ready-list with
300337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // initial instructions.
300437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  int Idx = 0;
300537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  int NumToSchedule = 0;
300637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  for (auto *I = BS->ScheduleStart; I != BS->ScheduleEnd;
300737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines       I = I->getNextNode()) {
300837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *SD = BS->getScheduleData(I);
300937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    assert(
301037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        SD->isPartOfBundle() == (ScalarToTreeEntry.count(SD->Inst) != 0) &&
301137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        "scheduler and vectorizer have different opinion on what is a bundle");
301237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    SD->FirstInBundle->SchedulingPriority = Idx++;
301337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (SD->isSchedulingEntity()) {
3014ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      BS->calculateDependencies(SD, false, this);
301537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      NumToSchedule++;
301637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
301737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
301837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  BS->initialFillReadyList(ReadyInsts);
301937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
302037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  Instruction *LastScheduledInst = BS->ScheduleEnd;
302137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
302237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // Do the "real" scheduling.
302337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  while (!ReadyInsts.empty()) {
302437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *picked = *ReadyInsts.begin();
302537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ReadyInsts.erase(ReadyInsts.begin());
302637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
302737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // Move the scheduled instruction(s) to their dedicated places, if not
302837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // there yet.
302937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *BundleMember = picked;
303037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    while (BundleMember) {
303137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      Instruction *pickedInst = BundleMember->Inst;
303237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (LastScheduledInst->getNextNode() != pickedInst) {
303337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        BS->BB->getInstList().remove(pickedInst);
303437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        BS->BB->getInstList().insert(LastScheduledInst, pickedInst);
303537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
303637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      LastScheduledInst = pickedInst;
303737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      BundleMember = BundleMember->NextInBundle;
303837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
303937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
304037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    BS->schedule(picked, ReadyInsts);
304137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    NumToSchedule--;
304237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
304337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  assert(NumToSchedule == 0 && "could not schedule all instructions");
304437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
304537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // Avoid duplicate scheduling of the block.
304637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  BS->ScheduleStart = nullptr;
304737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
304837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
30498383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem/// The SLPVectorizer Pass.
3050e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotemstruct SLPVectorizer : public FunctionPass {
305153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  typedef SmallVector<StoreInst *, 8> StoreList;
305253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  typedef MapVector<Value *, StoreList> StoreListMap;
30538383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
30548383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem  /// Pass identification, replacement for typeid
30558383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem  static char ID;
30568383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
3057e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  explicit SLPVectorizer() : FunctionPass(ID) {
30588383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem    initializeSLPVectorizerPass(*PassRegistry::getPassRegistry());
30598383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem  }
30608383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
30618383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem  ScalarEvolution *SE;
30628383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem  TargetTransformInfo *TTI;
3063dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  TargetLibraryInfo *TLI;
30648383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem  AliasAnalysis *AA;
3065e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  LoopInfo *LI;
3066722b0a4d293b16eebaed94ae65d5f11743cbcea5Nadav Rotem  DominatorTree *DT;
3067ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  AssumptionCache *AC;
3068e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem
306936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  bool runOnFunction(Function &F) override {
307036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (skipOptnoneFunction(F))
307136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      return false;
307236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
3073e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    SE = &getAnalysis<ScalarEvolution>();
3074ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
3075ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
3076ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    TLI = TLIP ? &TLIP->getTLI() : nullptr;
3077e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    AA = &getAnalysis<AliasAnalysis>();
3078ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
307936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3080ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
3081e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem
3082e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    StoreRefs.clear();
3083e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    bool Changed = false;
3084e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem
3085d072d1b2a36f6c6e1b4b513dc69e91b61ad8c21aRobert Lytton    // If the target claims to have no vector registers don't attempt
3086d072d1b2a36f6c6e1b4b513dc69e91b61ad8c21aRobert Lytton    // vectorization.
3087d072d1b2a36f6c6e1b4b513dc69e91b61ad8c21aRobert Lytton    if (!TTI->getNumberOfRegisters(true))
3088d072d1b2a36f6c6e1b4b513dc69e91b61ad8c21aRobert Lytton      return false;
3089d072d1b2a36f6c6e1b4b513dc69e91b61ad8c21aRobert Lytton
30903202f6cdb9193fe5365462118f499f6e164a1738Nadav Rotem    // Don't vectorize when the attribute NoImplicitFloat is used.
3091551dac1f62026ef32ad294d8c1cc5b545b05935aMatt Arsenault    if (F.hasFnAttribute(Attribute::NoImplicitFloat))
30923202f6cdb9193fe5365462118f499f6e164a1738Nadav Rotem      return false;
30933202f6cdb9193fe5365462118f499f6e164a1738Nadav Rotem
30940b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem    DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n");
309509ec4b21648700f9d4ef5bc90d732f90f32c930cNadav Rotem
309636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // Use the bottom up slp vectorizer to construct chains that start with
3097dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    // store instructions.
30984c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC);
3099ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
3100ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    // A general note: the vectorizer must use BoUpSLP::eraseInstruction() to
3101ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    // delete instructions.
310253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
310370d695801a2bd5eed4bd6ea05d40516e6e6fa276Nadav Rotem    // Scan the blocks in the function in post order.
31040c7f116bb6950ef819323d855415b2f2b0aad987Pirama Arumuga Nainar    for (auto BB : post_order(&F.getEntryBlock())) {
3105e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem      // Vectorize trees that end at stores.
3106ef332b1ca1721be962c73e76b4c4e0e44ffaf5d9Nadav Rotem      if (unsigned count = collectStores(BB, R)) {
3107d7e8cce287616c1cc4dcbab6a43328b01fbe7be4Nadav Rotem        (void)count;
31080b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem        DEBUG(dbgs() << "SLP: Found " << count << " stores to vectorize.\n");
310953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem        Changed |= vectorizeStoreChains(R);
3110e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem      }
31116611eaa32f7941dd50a3ffe608f3f4a7665dbe91Nadav Rotem
31126611eaa32f7941dd50a3ffe608f3f4a7665dbe91Nadav Rotem      // Vectorize trees that end at reductions.
31136611eaa32f7941dd50a3ffe608f3f4a7665dbe91Nadav Rotem      Changed |= vectorizeChainsInBlock(BB, R);
3114e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    }
3115e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem
3116e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    if (Changed) {
31176959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem      R.optimizeGatherSequence();
31180b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem      DEBUG(dbgs() << "SLP: vectorized \"" << F.getName() << "\"\n");
3119e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem      DEBUG(verifyFunction(F));
3120e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    }
3121e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    return Changed;
3122e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  }
3123e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem
312436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  void getAnalysisUsage(AnalysisUsage &AU) const override {
3125e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    FunctionPass::getAnalysisUsage(AU);
3126ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    AU.addRequired<AssumptionCacheTracker>();
3127e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    AU.addRequired<ScalarEvolution>();
3128e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    AU.addRequired<AliasAnalysis>();
3129ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    AU.addRequired<TargetTransformInfoWrapperPass>();
3130ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    AU.addRequired<LoopInfoWrapperPass>();
313136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    AU.addRequired<DominatorTreeWrapperPass>();
3132ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    AU.addPreserved<LoopInfoWrapperPass>();
313336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    AU.addPreserved<DominatorTreeWrapperPass>();
3134d4a9ebc7341a1ed066fcdff8e7e4e9cbf1bc4368Nadav Rotem    AU.setPreservesCFG();
3135e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  }
3136e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem
3137e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotemprivate:
31388383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
31398383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem  /// \brief Collect memory references and sort them according to their base
31408383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem  /// object. We sort the stores to their base objects to reduce the cost of the
31418383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem  /// quadratic search on the stores. TODO: We can further reduce this cost
31428383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem  /// if we flush the chain creation every time we run into a memory barrier.
3143369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  unsigned collectStores(BasicBlock *BB, BoUpSLP &R);
31448383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
3145e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  /// \brief Try to vectorize a chain that starts at two arithmetic instrs.
3146369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R);
31478383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
3148931b861e3dcf966fde46d57683013e74736eb448Nadav Rotem  /// \brief Try to vectorize a list of operands.
3149dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  /// \@param BuildVector A list of users to ignore for the purpose of
3150dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  ///                     scheduling and that don't need extracting.
3151d69d9f20bc3acee0fc233853745c1de015b541f2Nadav Rotem  /// \returns true if a value was vectorized.
3152dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
315337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                          ArrayRef<Value *> BuildVector = None,
315437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                          bool allowReorder = false);
3155444e33e8987110c6669bc2d9b8efd768bb17faa1Nadav Rotem
3156e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  /// \brief Try to vectorize a chain that may start at the operands of \V;
3157369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);
31588383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
3159e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  /// \brief Vectorize the stores that were collected in StoreRefs.
3160369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  bool vectorizeStoreChains(BoUpSLP &R);
3161d69d9f20bc3acee0fc233853745c1de015b541f2Nadav Rotem
31625cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem  /// \brief Scan the basic block and look for patterns that are likely to start
31635cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem  /// a vectorization chain.
3164369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  bool vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R);
3165369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3166369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  bool vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold,
3167369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem                           BoUpSLP &R);
3168e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem
3169369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  bool vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold,
3170369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem                       BoUpSLP &R);
3171e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotemprivate:
3172e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  StoreListMap StoreRefs;
3173e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem};
3174e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem
317536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines/// \brief Check that the Values in the slice in VL array are still existent in
3176dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling/// the WeakVH array.
3177dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling/// Vectorization of part of the VL array may cause later values in the VL array
3178dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling/// to become invalid. We track when this has happened in the WeakVH array.
31794c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainarstatic bool hasValueBeenRAUWed(ArrayRef<Value *> VL, ArrayRef<WeakVH> VH,
31804c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar                               unsigned SliceBegin, unsigned SliceSize) {
31814c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  VL = VL.slice(SliceBegin, SliceSize);
31824c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  VH = VH.slice(SliceBegin, SliceSize);
31834c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  return !std::equal(VL.begin(), VL.end(), VH.begin());
3184dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling}
3185dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling
3186369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotembool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain,
3187369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem                                          int CostThreshold, BoUpSLP &R) {
3188369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  unsigned ChainLen = Chain.size();
3189369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen
3190369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        << "\n");
3191369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();
31924c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  auto &DL = cast<StoreInst>(Chain[0])->getModule()->getDataLayout();
31934c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  unsigned Sz = DL.getTypeSizeInBits(StoreTy);
3194369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  unsigned VF = MinVecRegSize / Sz;
3195369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3196369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (!isPowerOf2_32(Sz) || VF < 2)
3197369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    return false;
3198369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
319936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // Keep track of values that were deleted by vectorizing in the loop below.
3200dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling  SmallVector<WeakVH, 8> TrackValues(Chain.begin(), Chain.end());
3201dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling
3202369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  bool Changed = false;
3203369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // Look for profitable vectorizable trees at all offsets, starting at zero.
3204369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  for (unsigned i = 0, e = ChainLen; i < e; ++i) {
3205369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    if (i + VF > e)
3206369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      break;
3207dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling
3208dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling    // Check that a previous iteration of this loop did not delete the Value.
3209dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling    if (hasValueBeenRAUWed(Chain, TrackValues, i, VF))
3210dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling      continue;
3211dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling
3212369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i
3213369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem          << "\n");
3214369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    ArrayRef<Value *> Operands = Chain.slice(i, VF);
3215369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3216369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    R.buildTree(Operands);
3217369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3218369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    int Cost = R.getTreeCost();
3219369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3220369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
3221369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    if (Cost < CostThreshold) {
3222369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
3223369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      R.vectorizeTree();
3224369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3225369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      // Move to the next bundle.
3226369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      i += VF - 1;
3227369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Changed = true;
3228369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
3229369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  }
3230369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
32318e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling  return Changed;
3232369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
3233369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3234369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotembool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
323521508bf853354343266dbe6d830ff30bed006a68Nadav Rotem                                    int costThreshold, BoUpSLP &R) {
32364c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  SetVector<StoreInst *> Heads, Tails;
32374c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain;
3238369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3239369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // We may run into multiple chains that merge into a single chain. We mark the
3240369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // stores that we vectorized so that we don't visit the same store twice.
3241369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  BoUpSLP::ValueSet VectorizedStores;
3242369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  bool Changed = false;
3243369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3244369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // Do a quadratic search on all of the given stores and find
32456611eaa32f7941dd50a3ffe608f3f4a7665dbe91Nadav Rotem  // all of the pairs of stores that follow each other.
324621508bf853354343266dbe6d830ff30bed006a68Nadav Rotem  for (unsigned i = 0, e = Stores.size(); i < e; ++i) {
3247369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    for (unsigned j = 0; j < e; ++j) {
324868ccbf648ea544faad29115cdda929920739e154Nadav Rotem      if (i == j)
3249369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        continue;
32504c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar      const DataLayout &DL = Stores[i]->getModule()->getDataLayout();
32514c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar      if (R.isConsecutiveAccess(Stores[i], Stores[j], DL)) {
3252369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        Tails.insert(Stores[j]);
3253369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        Heads.insert(Stores[i]);
3254369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        ConsecutiveChain[Stores[i]] = Stores[j];
3255369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
3256369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
325721508bf853354343266dbe6d830ff30bed006a68Nadav Rotem  }
3258369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3259369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // For stores that start but don't end a link in the chain:
32604c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end();
3261369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem       it != e; ++it) {
3262369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    if (Tails.count(*it))
3263369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      continue;
3264369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3265369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    // We found a store instr that starts a chain. Now follow the chain and try
3266369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    // to vectorize it.
3267369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    BoUpSLP::ValueList Operands;
32684c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    StoreInst *I = *it;
3269369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    // Collect the chain into a list.
3270369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    while (Tails.count(I) || Heads.count(I)) {
3271369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      if (VectorizedStores.count(I))
3272369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        break;
3273369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Operands.push_back(I);
3274369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      // Move to the next value in the chain.
3275369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      I = ConsecutiveChain[I];
3276369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
3277369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3278369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    bool Vectorized = vectorizeStoreChain(Operands, costThreshold, R);
3279369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3280369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    // Mark the vectorized stores so that we don't vectorize them again.
3281369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    if (Vectorized)
3282369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      VectorizedStores.insert(Operands.begin(), Operands.end());
3283369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    Changed |= Vectorized;
3284369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  }
3285369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3286369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  return Changed;
3287369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
3288369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3289369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3290369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemunsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
3291ef332b1ca1721be962c73e76b4c4e0e44ffaf5d9Nadav Rotem  unsigned count = 0;
3292e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  StoreRefs.clear();
32934c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  const DataLayout &DL = BB->getModule()->getDataLayout();
3294e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
3295e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    StoreInst *SI = dyn_cast<StoreInst>(it);
3296e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    if (!SI)
3297e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem      continue;
3298e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem
3299fc1604ec7274e9b724ff9b2512c288a691167427Arnold Schwaighofer    // Don't touch volatile stores.
3300fc1604ec7274e9b724ff9b2512c288a691167427Arnold Schwaighofer    if (!SI->isSimple())
3301fc1604ec7274e9b724ff9b2512c288a691167427Arnold Schwaighofer      continue;
3302fc1604ec7274e9b724ff9b2512c288a691167427Arnold Schwaighofer
3303e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    // Check that the pointer points to scalars.
33044f38e16b89895b795ece58742195d0d95cbd4187Nadav Rotem    Type *Ty = SI->getValueOperand()->getType();
3305ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if (!isValidElementType(Ty))
3306dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      continue;
3307e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem
33086623d050c6f4351293bc1849e49bc0e37ec04596Benjamin Kramer    // Find the base pointer.
33096623d050c6f4351293bc1849e49bc0e37ec04596Benjamin Kramer    Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), DL);
3310e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem
3311e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    // Save the store locations.
3312e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    StoreRefs[Ptr].push_back(SI);
3313ef332b1ca1721be962c73e76b4c4e0e44ffaf5d9Nadav Rotem    count++;
33148383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem  }
3315ef332b1ca1721be962c73e76b4c4e0e44ffaf5d9Nadav Rotem  return count;
3316e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem}
3317e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem
3318369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotembool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
33190b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem  if (!A || !B)
33200b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem    return false;
33216fe5cc49d88c9dd48a1eefe4c1bdba1567b8eef2Benjamin Kramer  Value *VL[] = { A, B };
332237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  return tryToVectorizeList(VL, R, None, true);
3323444e33e8987110c6669bc2d9b8efd768bb17faa1Nadav Rotem}
3324444e33e8987110c6669bc2d9b8efd768bb17faa1Nadav Rotem
3325dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesbool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
332637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                                       ArrayRef<Value *> BuildVector,
332737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                                       bool allowReorder) {
33285cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem  if (VL.size() < 2)
33295cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem    return false;
33305cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem
33310b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem  DEBUG(dbgs() << "SLP: Vectorizing a list of length = " << VL.size() << ".\n");
33324f38e16b89895b795ece58742195d0d95cbd4187Nadav Rotem
33335cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem  // Check that all of the parts are scalar instructions of the same type.
33345cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem  Instruction *I0 = dyn_cast<Instruction>(VL[0]);
33350b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem  if (!I0)
333689008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang    return false;
33375cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem
33385cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem  unsigned Opcode0 = I0->getOpcode();
33394c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  const DataLayout &DL = I0->getModule()->getDataLayout();
33408e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling
334189008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang  Type *Ty0 = I0->getType();
33424c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  unsigned Sz = DL.getTypeSizeInBits(Ty0);
334389008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang  unsigned VF = MinVecRegSize / Sz;
33445cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem
33454f38e16b89895b795ece58742195d0d95cbd4187Nadav Rotem  for (int i = 0, e = VL.size(); i < e; ++i) {
33464f38e16b89895b795ece58742195d0d95cbd4187Nadav Rotem    Type *Ty = VL[i]->getType();
3347ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if (!isValidElementType(Ty))
334889008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang      return false;
33495cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem    Instruction *Inst = dyn_cast<Instruction>(VL[i]);
33505cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem    if (!Inst || Inst->getOpcode() != Opcode0)
335189008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang      return false;
33524f38e16b89895b795ece58742195d0d95cbd4187Nadav Rotem  }
33534f38e16b89895b795ece58742195d0d95cbd4187Nadav Rotem
335489008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang  bool Changed = false;
33558e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling
3356dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // Keep track of values that were deleted by vectorizing in the loop below.
3357dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling  SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end());
3358dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling
335989008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
336089008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang    unsigned OpsWidth = 0;
33618e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling
33628e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling    if (i + VF > e)
336389008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang      OpsWidth = e - i;
336489008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang    else
336589008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang      OpsWidth = VF;
336689008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang
336789008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang    if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2)
336889008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang      break;
3369d6f0c34273dd3536102f2d643403252468dfc4a3Nadav Rotem
3370dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling    // Check that a previous iteration of this loop did not delete the Value.
3371dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling    if (hasValueBeenRAUWed(VL, TrackValues, i, OpsWidth))
3372dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling      continue;
3373dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling
33748e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling    DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations "
33758e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling                 << "\n");
337689008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang    ArrayRef<Value *> Ops = VL.slice(i, OpsWidth);
33778e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling
3378dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    ArrayRef<Value *> BuildVectorSlice;
3379dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    if (!BuildVector.empty())
3380dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      BuildVectorSlice = BuildVector.slice(i, OpsWidth);
3381dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
3382dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    R.buildTree(Ops, BuildVectorSlice);
338337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // TODO: check if we can allow reordering also for other cases than
338437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // tryToVectorizePair()
338537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (allowReorder && R.shouldReorder()) {
338637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      assert(Ops.size() == 2);
338737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      assert(BuildVectorSlice.empty());
338837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      Value *ReorderedOps[] = { Ops[1], Ops[0] };
338937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      R.buildTree(ReorderedOps, None);
339037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
339189008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang    int Cost = R.getTreeCost();
33928e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling
339389008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang    if (Cost < -SLPCostThreshold) {
339436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");
3395dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      Value *VectorizedRoot = R.vectorizeTree();
3396dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
3397dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      // Reconstruct the build vector by extracting the vectorized root. This
3398dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      // way we handle the case where some elements of the vector are undefined.
3399dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      //  (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2))
3400dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      if (!BuildVectorSlice.empty()) {
3401dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        // The insert point is the last build vector instruction. The vectorized
3402dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        // root will precede it. This guarantees that we get an instruction. The
3403dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        // vectorized tree could have been constant folded.
3404dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back());
3405dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        unsigned VecIdx = 0;
3406dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        for (auto &V : BuildVectorSlice) {
3407dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          IRBuilder<true, NoFolder> Builder(
3408dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines              ++BasicBlock::iterator(InsertAfter));
3409dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          InsertElementInst *IE = cast<InsertElementInst>(V);
3410dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          Instruction *Extract = cast<Instruction>(Builder.CreateExtractElement(
3411dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines              VectorizedRoot, Builder.getInt32(VecIdx++)));
3412dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          IE->setOperand(1, Extract);
3413dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          IE->removeFromParent();
3414dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          IE->insertAfter(Extract);
3415dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          InsertAfter = IE;
3416dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        }
3417dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      }
341889008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang      // Move to the next bundle.
341989008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang      i += VF - 1;
342089008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang      Changed = true;
342189008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang    }
342289008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang  }
34238e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling
34248e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling  return Changed;
3425e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem}
34268383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
3427369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotembool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
34280b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem  if (!V)
34290b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem    return false;
343053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
3431e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  // Try to vectorize V.
3432e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R))
3433f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem    return true;
3434f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem
3435e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0));
3436e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1));
3437e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  // Try to skip B.
3438e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  if (B && B->hasOneUse()) {
3439e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
3440e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    BinaryOperator *B1 = dyn_cast<BinaryOperator>(B->getOperand(1));
3441e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    if (tryToVectorizePair(A, B0, R)) {
3442ab105ae95fc473c19d9f0b019fc7c7a16d17b1a5Nadav Rotem      return true;
3443f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem    }
3444e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    if (tryToVectorizePair(A, B1, R)) {
3445e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem      return true;
3446f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem    }
3447f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem  }
3448f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem
34497fac0ef71cfaeafd91b9520b553d00d91f83a442Nadav Rotem  // Try to skip A.
3450e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  if (A && A->hasOneUse()) {
3451e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    BinaryOperator *A0 = dyn_cast<BinaryOperator>(A->getOperand(0));
3452e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    BinaryOperator *A1 = dyn_cast<BinaryOperator>(A->getOperand(1));
3453e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    if (tryToVectorizePair(A0, B, R)) {
3454e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem      return true;
3455e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    }
3456e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    if (tryToVectorizePair(A1, B, R)) {
3457e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem      return true;
3458f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem    }
3459f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem  }
3460e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  return 0;
3461e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem}
3462f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem
3463a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// \brief Generate a shuffle mask to be used in a reduction tree.
3464a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///
3465a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// \param VecLen The length of the vector to be reduced.
3466a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// \param NumEltsToRdx The number of elements that should be reduced in the
3467a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///        vector.
3468a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// \param IsPairwise Whether the reduction is a pairwise or splitting
3469a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///        reduction. A pairwise reduction will generate a mask of
3470a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///        <0,2,...> or <1,3,..> while a splitting reduction will generate
3471a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///        <2,3, undef,undef> for a vector of 4 and NumElts = 2.
3472a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// \param IsLeft True will generate a mask of even elements, odd otherwise.
3473a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighoferstatic Value *createRdxShuffleMask(unsigned VecLen, unsigned NumEltsToRdx,
3474a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer                                   bool IsPairwise, bool IsLeft,
3475a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer                                   IRBuilder<> &Builder) {
3476a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  assert((IsPairwise || !IsLeft) && "Don't support a <0,1,undef,...> mask");
3477a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3478a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  SmallVector<Constant *, 32> ShuffleMask(
3479a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      VecLen, UndefValue::get(Builder.getInt32Ty()));
3480a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3481a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  if (IsPairwise)
3482a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    // Build a mask of 0, 2, ... (left) or 1, 3, ... (right).
3483a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    for (unsigned i = 0; i != NumEltsToRdx; ++i)
3484a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      ShuffleMask[i] = Builder.getInt32(2 * i + !IsLeft);
3485a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  else
3486a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    // Move the upper half of the vector to the lower half.
3487a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    for (unsigned i = 0; i != NumEltsToRdx; ++i)
3488a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      ShuffleMask[i] = Builder.getInt32(NumEltsToRdx + i);
3489a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3490a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  return ConstantVector::get(ShuffleMask);
3491a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer}
3492a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3493a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3494a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// Model horizontal reductions.
3495a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///
3496a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// A horizontal reduction is a tree of reduction operations (currently add and
3497a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// fadd) that has operations that can be put into a vector as its leaf.
3498a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// For example, this tree:
3499a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///
3500a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// mul mul mul mul
3501a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///  \  /    \  /
3502a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///   +       +
3503a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///    \     /
3504a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///       +
3505a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// This tree has "mul" as its reduced values and "+" as its reduction
3506a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// operations. A reduction might be feeding into a store or a binary operation
3507a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// feeding a phi.
3508a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///    ...
3509a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///    \  /
3510a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///     +
351138bf2d62b6be4b496e8f6d176578699b9c6e08bbArnold Schwaighofer///     |
3512a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///  phi +=
3513a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///
3514a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///  Or:
3515a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///    ...
3516a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///    \  /
3517a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///     +
351838bf2d62b6be4b496e8f6d176578699b9c6e08bbArnold Schwaighofer///     |
3519a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///   *p =
3520a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///
3521a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighoferclass HorizontalReduction {
3522dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  SmallVector<Value *, 16> ReductionOps;
3523a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  SmallVector<Value *, 32> ReducedVals;
3524a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3525a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  BinaryOperator *ReductionRoot;
3526a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  PHINode *ReductionPHI;
3527a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3528a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  /// The opcode of the reduction.
3529a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  unsigned ReductionOpcode;
3530a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  /// The opcode of the values we perform a reduction on.
3531a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  unsigned ReducedValueOpcode;
3532a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  /// The width of one full horizontal reduction operation.
3533a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  unsigned ReduxWidth;
3534a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  /// Should we model this reduction as a pairwise reduction tree or a tree that
3535a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  /// splits the vector in halves and adds those halves.
3536a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  bool IsPairwiseReduction;
3537a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3538a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighoferpublic:
3539a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  HorizontalReduction()
3540dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    : ReductionRoot(nullptr), ReductionPHI(nullptr), ReductionOpcode(0),
3541a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    ReducedValueOpcode(0), ReduxWidth(0), IsPairwiseReduction(false) {}
3542a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3543a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  /// \brief Try to find a reduction tree.
35444c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B) {
3545a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    assert((!Phi ||
3546a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer            std::find(Phi->op_begin(), Phi->op_end(), B) != Phi->op_end()) &&
3547a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer           "Thi phi needs to use the binary operator");
3548a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3549a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    // We could have a initial reductions that is not an add.
3550a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    //  r *= v1 + v2 + v3 + v4
3551a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    // In such a case start looking for a tree rooted in the first '+'.
3552a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    if (Phi) {
3553a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      if (B->getOperand(0) == Phi) {
3554dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        Phi = nullptr;
3555a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        B = dyn_cast<BinaryOperator>(B->getOperand(1));
3556a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      } else if (B->getOperand(1) == Phi) {
3557dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        Phi = nullptr;
3558a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        B = dyn_cast<BinaryOperator>(B->getOperand(0));
3559a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      }
3560a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    }
3561a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3562a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    if (!B)
3563a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      return false;
3564a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3565a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    Type *Ty = B->getType();
3566ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if (!isValidElementType(Ty))
3567a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      return false;
3568a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
35694c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    const DataLayout &DL = B->getModule()->getDataLayout();
3570a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    ReductionOpcode = B->getOpcode();
3571a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    ReducedValueOpcode = 0;
35724c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    ReduxWidth = MinVecRegSize / DL.getTypeSizeInBits(Ty);
3573a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    ReductionRoot = B;
3574a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    ReductionPHI = Phi;
3575a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3576a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    if (ReduxWidth < 4)
3577a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      return false;
3578a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3579a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    // We currently only support adds.
3580a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    if (ReductionOpcode != Instruction::Add &&
3581a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        ReductionOpcode != Instruction::FAdd)
3582a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      return false;
3583a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3584a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    // Post order traverse the reduction tree starting at B. We only handle true
3585a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    // trees containing only binary operators.
3586a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    SmallVector<std::pair<BinaryOperator *, unsigned>, 32> Stack;
3587a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    Stack.push_back(std::make_pair(B, 0));
3588a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    while (!Stack.empty()) {
3589a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      BinaryOperator *TreeN = Stack.back().first;
3590a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      unsigned EdgeToVist = Stack.back().second++;
3591a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      bool IsReducedValue = TreeN->getOpcode() != ReductionOpcode;
3592a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3593a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      // Only handle trees in the current basic block.
3594a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      if (TreeN->getParent() != B->getParent())
3595a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        return false;
3596a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3597a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      // Each tree node needs to have one user except for the ultimate
3598a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      // reduction.
3599a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      if (!TreeN->hasOneUse() && TreeN != B)
3600a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        return false;
3601a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3602a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      // Postorder vist.
3603a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      if (EdgeToVist == 2 || IsReducedValue) {
3604a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        if (IsReducedValue) {
3605a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          // Make sure that the opcodes of the operations that we are going to
3606a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          // reduce match.
3607a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          if (!ReducedValueOpcode)
3608a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer            ReducedValueOpcode = TreeN->getOpcode();
3609a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          else if (ReducedValueOpcode != TreeN->getOpcode())
3610a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer            return false;
3611a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          ReducedVals.push_back(TreeN);
3612a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        } else {
3613a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          // We need to be able to reassociate the adds.
3614a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          if (!TreeN->isAssociative())
3615a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer            return false;
3616dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          ReductionOps.push_back(TreeN);
3617a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        }
3618a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        // Retract.
3619a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        Stack.pop_back();
3620a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        continue;
3621a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      }
3622a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3623a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      // Visit left or right.
3624a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      Value *NextV = TreeN->getOperand(EdgeToVist);
3625a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      BinaryOperator *Next = dyn_cast<BinaryOperator>(NextV);
3626a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      if (Next)
3627a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        Stack.push_back(std::make_pair(Next, 0));
3628a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      else if (NextV != Phi)
3629a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        return false;
3630a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    }
3631a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    return true;
3632a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  }
3633a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3634a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  /// \brief Attempt to vectorize the tree found by
3635a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  /// matchAssociativeReduction.
3636a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  bool tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) {
3637a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    if (ReducedVals.empty())
3638a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      return false;
3639a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3640a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    unsigned NumReducedVals = ReducedVals.size();
3641a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    if (NumReducedVals < ReduxWidth)
3642a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      return false;
3643a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3644dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    Value *VectorizedTree = nullptr;
3645a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    IRBuilder<> Builder(ReductionRoot);
3646a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    FastMathFlags Unsafe;
3647a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    Unsafe.setUnsafeAlgebra();
3648a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    Builder.SetFastMathFlags(Unsafe);
3649a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    unsigned i = 0;
3650a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3651a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) {
365237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      V.buildTree(makeArrayRef(&ReducedVals[i], ReduxWidth), ReductionOps);
3653a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3654a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      // Estimate cost.
3655a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i]);
3656a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      if (Cost >= -SLPCostThreshold)
3657a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        break;
3658a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3659a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:" << Cost
3660a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer                   << ". (HorRdx)\n");
3661a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3662a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      // Vectorize a tree.
3663a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      DebugLoc Loc = cast<Instruction>(ReducedVals[i])->getDebugLoc();
3664a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      Value *VectorizedRoot = V.vectorizeTree();
3665a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3666a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      // Emit a reduction.
3667a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      Value *ReducedSubTree = emitReduction(VectorizedRoot, Builder);
3668a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      if (VectorizedTree) {
3669a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        Builder.SetCurrentDebugLocation(Loc);
3670a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        VectorizedTree = createBinOp(Builder, ReductionOpcode, VectorizedTree,
3671a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer                                     ReducedSubTree, "bin.rdx");
3672a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      } else
3673a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        VectorizedTree = ReducedSubTree;
3674a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    }
3675a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3676a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    if (VectorizedTree) {
3677a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      // Finish the reduction.
3678a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      for (; i < NumReducedVals; ++i) {
3679a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        Builder.SetCurrentDebugLocation(
3680a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          cast<Instruction>(ReducedVals[i])->getDebugLoc());
3681a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        VectorizedTree = createBinOp(Builder, ReductionOpcode, VectorizedTree,
3682a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer                                     ReducedVals[i]);
3683a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      }
3684a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      // Update users.
3685a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      if (ReductionPHI) {
3686dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        assert(ReductionRoot && "Need a reduction operation");
3687a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        ReductionRoot->setOperand(0, VectorizedTree);
3688a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        ReductionRoot->setOperand(1, ReductionPHI);
3689a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      } else
3690a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        ReductionRoot->replaceAllUsesWith(VectorizedTree);
3691a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    }
3692dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return VectorizedTree != nullptr;
3693a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  }
3694a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3695a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighoferprivate:
3696a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3697a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  /// \brief Calcuate the cost of a reduction.
3698a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  int getReductionCost(TargetTransformInfo *TTI, Value *FirstReducedVal) {
3699a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    Type *ScalarTy = FirstReducedVal->getType();
3700a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    Type *VecTy = VectorType::get(ScalarTy, ReduxWidth);
3701a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3702a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    int PairwiseRdxCost = TTI->getReductionCost(ReductionOpcode, VecTy, true);
3703a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    int SplittingRdxCost = TTI->getReductionCost(ReductionOpcode, VecTy, false);
3704a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3705a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    IsPairwiseReduction = PairwiseRdxCost < SplittingRdxCost;
3706a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    int VecReduxCost = IsPairwiseReduction ? PairwiseRdxCost : SplittingRdxCost;
3707a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3708a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    int ScalarReduxCost =
3709a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        ReduxWidth * TTI->getArithmeticInstrCost(ReductionOpcode, VecTy);
3710a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3711a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    DEBUG(dbgs() << "SLP: Adding cost " << VecReduxCost - ScalarReduxCost
3712a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer                 << " for reduction that starts with " << *FirstReducedVal
3713a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer                 << " (It is a "
3714a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer                 << (IsPairwiseReduction ? "pairwise" : "splitting")
3715a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer                 << " reduction)\n");
3716a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3717a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    return VecReduxCost - ScalarReduxCost;
3718a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  }
3719a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3720a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  static Value *createBinOp(IRBuilder<> &Builder, unsigned Opcode, Value *L,
3721a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer                            Value *R, const Twine &Name = "") {
3722a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    if (Opcode == Instruction::FAdd)
3723a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      return Builder.CreateFAdd(L, R, Name);
3724a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    return Builder.CreateBinOp((Instruction::BinaryOps)Opcode, L, R, Name);
3725a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  }
3726a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3727a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  /// \brief Emit a horizontal reduction of the vectorized value.
3728a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  Value *emitReduction(Value *VectorizedValue, IRBuilder<> &Builder) {
3729a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    assert(VectorizedValue && "Need to have a vectorized tree node");
3730a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    assert(isPowerOf2_32(ReduxWidth) &&
3731a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer           "We only handle power-of-two reductions for now");
3732a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3733ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    Value *TmpVec = VectorizedValue;
3734a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) {
3735a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      if (IsPairwiseReduction) {
3736a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        Value *LeftMask =
3737a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          createRdxShuffleMask(ReduxWidth, i, true, true, Builder);
3738a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        Value *RightMask =
3739a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          createRdxShuffleMask(ReduxWidth, i, true, false, Builder);
3740a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3741a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        Value *LeftShuf = Builder.CreateShuffleVector(
3742a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          TmpVec, UndefValue::get(TmpVec->getType()), LeftMask, "rdx.shuf.l");
3743a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        Value *RightShuf = Builder.CreateShuffleVector(
3744a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          TmpVec, UndefValue::get(TmpVec->getType()), (RightMask),
3745a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          "rdx.shuf.r");
3746a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        TmpVec = createBinOp(Builder, ReductionOpcode, LeftShuf, RightShuf,
3747a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer                             "bin.rdx");
3748a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      } else {
3749a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        Value *UpperHalf =
3750a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          createRdxShuffleMask(ReduxWidth, i, false, false, Builder);
3751a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        Value *Shuf = Builder.CreateShuffleVector(
3752a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          TmpVec, UndefValue::get(TmpVec->getType()), UpperHalf, "rdx.shuf");
3753a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        TmpVec = createBinOp(Builder, ReductionOpcode, TmpVec, Shuf, "bin.rdx");
3754a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      }
3755a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    }
3756a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3757a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    // The result is in the first element of the vector.
3758a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
3759a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  }
3760a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer};
3761a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
37621b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault/// \brief Recognize construction of vectors like
37631b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault///  %ra = insertelement <4 x float> undef, float %s0, i32 0
37641b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault///  %rb = insertelement <4 x float> %ra, float %s1, i32 1
37651b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault///  %rc = insertelement <4 x float> %rb, float %s2, i32 2
37661b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault///  %rd = insertelement <4 x float> %rc, float %s3, i32 3
37671b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault///
37681b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault/// Returns true if it matches
37691b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault///
3770dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesstatic bool findBuildVector(InsertElementInst *FirstInsertElem,
3771dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                            SmallVectorImpl<Value *> &BuildVector,
3772dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                            SmallVectorImpl<Value *> &BuildVectorOpds) {
3773dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (!isa<UndefValue>(FirstInsertElem->getOperand(0)))
37741b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault    return false;
37751b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault
3776dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  InsertElementInst *IE = FirstInsertElem;
37771b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault  while (true) {
3778dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    BuildVector.push_back(IE);
3779dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    BuildVectorOpds.push_back(IE->getOperand(1));
37801b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault
37811b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault    if (IE->use_empty())
37821b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault      return false;
37831b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault
378436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    InsertElementInst *NextUse = dyn_cast<InsertElementInst>(IE->user_back());
37851b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault    if (!NextUse)
37861b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault      return true;
37871b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault
37881b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault    // If this isn't the final use, make sure the next insertelement is the only
37891b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault    // use. It's OK if the final constructed vector is used multiple times
37901b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault    if (!IE->hasOneUse())
37911b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault      return false;
37921b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault
37931b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault    IE = NextUse;
37941b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault  }
37951b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault
37961b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault  return false;
37971b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault}
37981b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault
379924732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighoferstatic bool PhiTypeSorterFunc(Value *V, Value *V2) {
380024732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer  return V->getType() < V2->getType();
380124732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer}
380224732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer
3803369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotembool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
3804e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  bool Changed = false;
3805931b861e3dcf966fde46d57683013e74736eb448Nadav Rotem  SmallVector<Value *, 4> Incoming;
380624732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer  SmallSet<Value *, 16> VisitedInstrs;
380724732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer
380824732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer  bool HaveVectorizedPhiNodes = true;
380924732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer  while (HaveVectorizedPhiNodes) {
381024732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer    HaveVectorizedPhiNodes = false;
381124732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer
381224732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer    // Collect the incoming values from the PHIs.
381324732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer    Incoming.clear();
381424732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer    for (BasicBlock::iterator instr = BB->begin(), ie = BB->end(); instr != ie;
381524732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer         ++instr) {
381624732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer      PHINode *P = dyn_cast<PHINode>(instr);
381724732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer      if (!P)
381824732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer        break;
381916a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer
382024732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer      if (!VisitedInstrs.count(P))
382124732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer        Incoming.push_back(P);
382224732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer    }
3823931b861e3dcf966fde46d57683013e74736eb448Nadav Rotem
382424732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer    // Sort by type.
382524732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer    std::stable_sort(Incoming.begin(), Incoming.end(), PhiTypeSorterFunc);
3826931b861e3dcf966fde46d57683013e74736eb448Nadav Rotem
382724732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer    // Try to vectorize elements base on their type.
382824732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer    for (SmallVector<Value *, 4>::iterator IncIt = Incoming.begin(),
382924732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer                                           E = Incoming.end();
383024732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer         IncIt != E;) {
383124732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer
383224732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer      // Look for the next elements with the same type.
383324732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer      SmallVector<Value *, 4>::iterator SameTypeIt = IncIt;
383424732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer      while (SameTypeIt != E &&
383524732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer             (*SameTypeIt)->getType() == (*IncIt)->getType()) {
383624732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer        VisitedInstrs.insert(*SameTypeIt);
383724732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer        ++SameTypeIt;
383824732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer      }
383916a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer
384024732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer      // Try to vectorize them.
384124732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer      unsigned NumElts = (SameTypeIt - IncIt);
384224732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer      DEBUG(errs() << "SLP: Trying to vectorize starting at PHIs (" << NumElts << ")\n");
384337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R)) {
384424732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer        // Success start over because instructions might have been changed.
384524732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer        HaveVectorizedPhiNodes = true;
384616a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer        Changed = true;
384724732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer        break;
384816a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer      }
384916a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer
385036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // Start over at the next instruction of a different type (or the end).
385124732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer      IncIt = SameTypeIt;
3852931b861e3dcf966fde46d57683013e74736eb448Nadav Rotem    }
3853931b861e3dcf966fde46d57683013e74736eb448Nadav Rotem  }
3854931b861e3dcf966fde46d57683013e74736eb448Nadav Rotem
385516a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer  VisitedInstrs.clear();
385616a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer
385716a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer  for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) {
385816a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer    // We may go through BB multiple times so skip the one we have checked.
385937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (!VisitedInstrs.insert(it).second)
386016a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer      continue;
386116a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer
386216a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer    if (isa<DbgInfoIntrinsic>(it))
38630b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem      continue;
3864e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem
3865e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    // Try to vectorize reductions that use PHINodes.
386616a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer    if (PHINode *P = dyn_cast<PHINode>(it)) {
3867e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem      // Check that the PHI is a reduction PHI.
38680b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem      if (P->getNumIncomingValues() != 2)
38690b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem        return Changed;
38700b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem      Value *Rdx =
38710b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem          (P->getIncomingBlock(0) == BB
38720b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem               ? (P->getIncomingValue(0))
3873dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines               : (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1)
3874dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                                               : nullptr));
3875e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem      // Check if this is a Binary Operator.
3876e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem      BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);
3877e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem      if (!BI)
38788383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem        continue;
3879196ee11f85ce0148d2c2e33fbe1f1171ac5a8828Nadav Rotem
3880a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      // Try to match and vectorize a horizontal reduction.
3881a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      HorizontalReduction HorRdx;
38824c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar      if (ShouldVectorizeHor && HorRdx.matchAssociativeReduction(P, BI) &&
3883a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          HorRdx.tryToReduce(R, TTI)) {
3884a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        Changed = true;
3885a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        it = BB->begin();
3886a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        e = BB->end();
3887a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        continue;
3888a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      }
3889a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3890a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer     Value *Inst = BI->getOperand(0);
38910b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem      if (Inst == P)
38920b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem        Inst = BI->getOperand(1);
389353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
389416a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer      if (tryToVectorize(dyn_cast<BinaryOperator>(Inst), R)) {
389516a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer        // We would like to start over since some instructions are deleted
389616a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer        // and the iterator may become invalid value.
389716a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer        Changed = true;
389816a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer        it = BB->begin();
389916a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer        e = BB->end();
3900a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        continue;
390116a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer      }
3902a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3903e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem      continue;
3904e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    }
3905196ee11f85ce0148d2c2e33fbe1f1171ac5a8828Nadav Rotem
3906a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    // Try to vectorize horizontal reductions feeding into a store.
39079660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer    if (ShouldStartVectorizeHorAtStore)
39089660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer      if (StoreInst *SI = dyn_cast<StoreInst>(it))
39099660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer        if (BinaryOperator *BinOp =
39109660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer                dyn_cast<BinaryOperator>(SI->getValueOperand())) {
39119660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer          HorizontalReduction HorRdx;
39124c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar          if (((HorRdx.matchAssociativeReduction(nullptr, BinOp) &&
39139660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer                HorRdx.tryToReduce(R, TTI)) ||
39149660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer               tryToVectorize(BinOp, R))) {
39159660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer            Changed = true;
39169660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer            it = BB->begin();
39179660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer            e = BB->end();
39189660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer            continue;
39199660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer          }
3920a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        }
3921a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
392237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // Try to vectorize horizontal reductions feeding into a return.
392337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (ReturnInst *RI = dyn_cast<ReturnInst>(it))
392437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (RI->getNumOperands() != 0)
392537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        if (BinaryOperator *BinOp =
392637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                dyn_cast<BinaryOperator>(RI->getOperand(0))) {
392737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          DEBUG(dbgs() << "SLP: Found a return to vectorize.\n");
392837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          if (tryToVectorizePair(BinOp->getOperand(0),
392937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                                 BinOp->getOperand(1), R)) {
393037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            Changed = true;
393137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            it = BB->begin();
393237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            e = BB->end();
393337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            continue;
393437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          }
393537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        }
393637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
3937e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    // Try to vectorize trees that start at compare instructions.
393816a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer    if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
3939e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem      if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) {
394016a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer        Changed = true;
394116a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer        // We would like to start over since some instructions are deleted
394216a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer        // and the iterator may become invalid value.
394316a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer        it = BB->begin();
394416a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer        e = BB->end();
3945e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem        continue;
3946e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem      }
394716a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer
394816a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer      for (int i = 0; i < 2; ++i) {
394937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i))) {
395037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          if (tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R)) {
395137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            Changed = true;
395237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            // We would like to start over since some instructions are deleted
395337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            // and the iterator may become invalid value.
395437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            it = BB->begin();
395537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            e = BB->end();
3956ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            break;
395737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          }
395837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        }
395916a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer      }
3960e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem      continue;
39618383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem    }
39621b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault
39631b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault    // Try to vectorize trees that start at insertelement instructions.
3964dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    if (InsertElementInst *FirstInsertElem = dyn_cast<InsertElementInst>(it)) {
3965dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      SmallVector<Value *, 16> BuildVector;
3966dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      SmallVector<Value *, 16> BuildVectorOpds;
3967dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      if (!findBuildVector(FirstInsertElem, BuildVector, BuildVectorOpds))
39681b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault        continue;
39691b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault
3970dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      // Vectorize starting with the build vector operands ignoring the
3971dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      // BuildVector instructions for the purpose of scheduling and user
3972dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      // extraction.
3973dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      if (tryToVectorizeList(BuildVectorOpds, R, BuildVector)) {
39741b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault        Changed = true;
39751b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault        it = BB->begin();
39761b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault        e = BB->end();
39771b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault      }
39781b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault
39791b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault      continue;
39801b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault    }
39818383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem  }
39828383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
3983e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  return Changed;
3984e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem}
39858383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
3986369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotembool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {
3987e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  bool Changed = false;
3988e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  // Attempt to sort and vectorize each of the store-groups.
3989e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  for (StoreListMap::iterator it = StoreRefs.begin(), e = StoreRefs.end();
3990e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem       it != e; ++it) {
3991e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    if (it->second.size() < 2)
3992e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem      continue;
3993f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem
39940b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem    DEBUG(dbgs() << "SLP: Analyzing a store chain of length "
399521508bf853354343266dbe6d830ff30bed006a68Nadav Rotem          << it->second.size() << ".\n");
39968383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
399721508bf853354343266dbe6d830ff30bed006a68Nadav Rotem    // Process the stores in chunks of 16.
399821508bf853354343266dbe6d830ff30bed006a68Nadav Rotem    for (unsigned CI = 0, CE = it->second.size(); CI < CE; CI+=16) {
399921508bf853354343266dbe6d830ff30bed006a68Nadav Rotem      unsigned Len = std::min<unsigned>(CE - CI, 16);
400037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      Changed |= vectorizeStores(makeArrayRef(&it->second[CI], Len),
400137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                                 -SLPCostThreshold, R);
400221508bf853354343266dbe6d830ff30bed006a68Nadav Rotem    }
40038383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem  }
4004e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  return Changed;
4005e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem}
40068383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
40078383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem} // end anonymous namespace
40088383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
40098383b539ff4c039108ee0c202a27b787621d96cfNadav Rotemchar SLPVectorizer::ID = 0;
40108383b539ff4c039108ee0c202a27b787621d96cfNadav Rotemstatic const char lv_name[] = "SLP Vectorizer";
40118383b539ff4c039108ee0c202a27b787621d96cfNadav RotemINITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false)
40128383b539ff4c039108ee0c202a27b787621d96cfNadav RotemINITIALIZE_AG_DEPENDENCY(AliasAnalysis)
4013ebe69fe11e48d322045d5949c83283927a0d790bStephen HinesINITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
4014ebe69fe11e48d322045d5949c83283927a0d790bStephen HinesINITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
40158383b539ff4c039108ee0c202a27b787621d96cfNadav RotemINITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
40168383b539ff4c039108ee0c202a27b787621d96cfNadav RotemINITIALIZE_PASS_DEPENDENCY(LoopSimplify)
40178383b539ff4c039108ee0c202a27b787621d96cfNadav RotemINITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false)
40188383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
40198383b539ff4c039108ee0c202a27b787621d96cfNadav Rotemnamespace llvm {
40200b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav RotemPass *createSLPVectorizerPass() { return new SLPVectorizer(); }
40218383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem}
4022