18383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem//===- SLPVectorizer.cpp - A bottom up SLP Vectorizer ---------------------===//
28383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem//
38383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem//                     The LLVM Compiler Infrastructure
48383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem//
58383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// This file is distributed under the University of Illinois Open Source
68383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// License. See LICENSE.TXT for details.
78383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem//
88383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem//===----------------------------------------------------------------------===//
98383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// This pass implements the Bottom Up SLP vectorizer. It detects consecutive
108383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// stores that can be put together into vector-stores. Next, it attempts to
118383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// construct vectorizable tree using the use-def chains. If a profitable tree
128383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// was found, the SLP vectorizer performs vectorization on the tree.
138383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem//
148383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// The pass is inspired by the work described in the paper:
158383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem//  "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks.
168383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem//
178383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem//===----------------------------------------------------------------------===//
18de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
194c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar#include "llvm/ADT/Optional.h"
206959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem#include "llvm/ADT/PostOrderIterator.h"
2153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem#include "llvm/ADT/SetVector.h"
2237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/ADT/Statistic.h"
2337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/Analysis/CodeMetrics.h"
24de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar#include "llvm/Analysis/GlobalsModRef.h"
25de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar#include "llvm/Analysis/LoopAccessAnalysis.h"
2653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem#include "llvm/Analysis/ScalarEvolutionExpressions.h"
276623d050c6f4351293bc1849e49bc0e37ec04596Benjamin Kramer#include "llvm/Analysis/ValueTracking.h"
28de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar#include "llvm/Analysis/VectorUtils.h"
298383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/IR/DataLayout.h"
3036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/IR/Dominators.h"
3136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/IR/IRBuilder.h"
328383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/IR/Instructions.h"
33f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem#include "llvm/IR/IntrinsicInst.h"
348383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/IR/Module.h"
35dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/IR/NoFolder.h"
368383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/IR/Type.h"
378383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/IR/Value.h"
3836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/IR/Verifier.h"
398383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/Pass.h"
408383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/Support/CommandLine.h"
418383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/Support/Debug.h"
428383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/Support/raw_ostream.h"
43de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar#include "llvm/Transforms/Vectorize.h"
4453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem#include <algorithm>
4537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include <memory>
468383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
478383b539ff4c039108ee0c202a27b787621d96cfNadav Rotemusing namespace llvm;
48de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarusing namespace slpvectorizer;
498383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
50dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define SV_NAME "slp-vectorizer"
51dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define DEBUG_TYPE "SLP"
52dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
5337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen HinesSTATISTIC(NumVectorInstructions, "Number of vector instructions generated");
5437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
558383b539ff4c039108ee0c202a27b787621d96cfNadav Rotemstatic cl::opt<int>
560b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem    SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
5708e20fbea18922e9f5f1cfb0ea7931c90006100eNadav Rotem                     cl::desc("Only vectorize if you gain more than this "
5808e20fbea18922e9f5f1cfb0ea7931c90006100eNadav Rotem                              "number "));
59a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
60a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighoferstatic cl::opt<bool>
61f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga NainarShouldVectorizeHor("slp-vectorize-hor", cl::init(true), cl::Hidden,
62a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer                   cl::desc("Attempt to vectorize horizontal reductions"));
63a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
649660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighoferstatic cl::opt<bool> ShouldStartVectorizeHorAtStore(
659660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer    "slp-vectorize-hor-store", cl::init(false), cl::Hidden,
669660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer    cl::desc(
679660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer        "Attempt to vectorize horizontal reductions feeding into a store"));
689660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer
69f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarstatic cl::opt<int>
70f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga NainarMaxVectorRegSizeOption("slp-max-reg-size", cl::init(128), cl::Hidden,
71f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    cl::desc("Attempt to vectorize for this register size in bits"));
72f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
73f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// Limits the size of scheduling regions in a block.
74f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// It avoid long compile times for _very_ large blocks where vector
75f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// instructions are spread over a wide range.
76f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// This limit is way higher than needed by real-world functions.
77f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarstatic cl::opt<int>
78f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga NainarScheduleRegionSizeBudget("slp-schedule-budget", cl::init(100000), cl::Hidden,
79f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    cl::desc("Limit the size of the SLP scheduling region per block"));
80f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
81de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarstatic cl::opt<int> MinVectorRegSizeOption(
82de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    "slp-min-reg-size", cl::init(128), cl::Hidden,
83de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    cl::desc("Attempt to vectorize for this register size in bits"));
848383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
85f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar// FIXME: Set this via cl::opt to allow overriding.
8625961b469a9debe69b915bcb4fa49d35d2ee9544Nadav Rotemstatic const unsigned RecursionMaxDepth = 12;
8753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
88ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// Limit the number of alias checks. The limit is chosen so that
89ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// it has no negative effect on the llvm benchmarks.
90ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesstatic const unsigned AliasedCheckLimit = 10;
91ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
92ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// Another limit for the alias checks: The maximum distance between load/store
93ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// instructions where alias checks are done.
94ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// This limit is useful for very large basic blocks.
95ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesstatic const unsigned MaxMemDepDistance = 160;
96ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
97f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// If the ScheduleRegionSizeBudget is exhausted, we allow small scheduling
98f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// regions to be handled.
99f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarstatic const int MinScheduleRegionSize = 16;
100f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
101ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// \brief Predicate for the element types that the SLP vectorizer supports.
102ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines///
103ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// The most important thing to filter here are types which are invalid in LLVM
104ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// vectors. We also filter target specific types which have absolutely no
105ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// meaningful vectorization path such as x86_fp80 and ppc_f128. This just
106ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// avoids spending time checking the cost model and realizing that they will
107ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// be inevitably scalarized.
108ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesstatic bool isValidElementType(Type *Ty) {
109ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  return VectorType::isValidElementType(Ty) && !Ty->isX86_FP80Ty() &&
110ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines         !Ty->isPPC_FP128Ty();
111ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines}
112ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
113369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// \returns the parent basic block if all of the instructions in \p VL
114369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// are in the same block or null otherwise.
115369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemstatic BasicBlock *getSameBlock(ArrayRef<Value *> VL) {
116369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  Instruction *I0 = dyn_cast<Instruction>(VL[0]);
117369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (!I0)
118dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return nullptr;
119369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  BasicBlock *BB = I0->getParent();
120369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  for (int i = 1, e = VL.size(); i < e; i++) {
121369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    Instruction *I = dyn_cast<Instruction>(VL[i]);
122369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    if (!I)
123dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      return nullptr;
124369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
125369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    if (BB != I->getParent())
126dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      return nullptr;
127369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  }
128369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  return BB;
129369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
130369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
131369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// \returns True if all of the values in \p VL are constants.
132369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemstatic bool allConstant(ArrayRef<Value *> VL) {
133de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  for (Value *i : VL)
134de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    if (!isa<Constant>(i))
135369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return false;
136369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  return true;
137369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
138369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
139369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// \returns True if all of the values in \p VL are identical.
140369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemstatic bool isSplat(ArrayRef<Value *> VL) {
141369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  for (unsigned i = 1, e = VL.size(); i < e; ++i)
142369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    if (VL[i] != VL[0])
143369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return false;
144369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  return true;
145369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
146369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
147c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines///\returns Opcode that can be clubbed with \p Op to create an alternate
148c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines/// sequence which can later be merged as a ShuffleVector instruction.
149c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hinesstatic unsigned getAltOpcode(unsigned Op) {
150c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  switch (Op) {
151c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  case Instruction::FAdd:
152c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    return Instruction::FSub;
153c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  case Instruction::FSub:
154c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    return Instruction::FAdd;
155c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  case Instruction::Add:
156c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    return Instruction::Sub;
157c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  case Instruction::Sub:
158c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    return Instruction::Add;
159c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  default:
160c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    return 0;
161c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  }
162c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines}
163c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
164c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines///\returns bool representing if Opcode \p Op can be part
165c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines/// of an alternate sequence which can later be merged as
166c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines/// a ShuffleVector instruction.
167c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hinesstatic bool canCombineAsAltInst(unsigned Op) {
168f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  return Op == Instruction::FAdd || Op == Instruction::FSub ||
169f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar         Op == Instruction::Sub || Op == Instruction::Add;
170c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines}
171c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
172f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// \returns ShuffleVector instruction if instructions in \p VL have
173c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines///  alternate fadd,fsub / fsub,fadd/add,sub/sub,add sequence.
174c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines/// (i.e. e.g. opcodes of fadd,fsub,fadd,fsub...)
175c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hinesstatic unsigned isAltInst(ArrayRef<Value *> VL) {
176c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  Instruction *I0 = dyn_cast<Instruction>(VL[0]);
177c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  unsigned Opcode = I0->getOpcode();
178c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  unsigned AltOpcode = getAltOpcode(Opcode);
179c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  for (int i = 1, e = VL.size(); i < e; i++) {
180c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    Instruction *I = dyn_cast<Instruction>(VL[i]);
181c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    if (!I || I->getOpcode() != ((i & 1) ? AltOpcode : Opcode))
182c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      return 0;
183c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  }
184c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  return Instruction::ShuffleVector;
185c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines}
186c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
187369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// \returns The opcode if all of the Instructions in \p VL have the same
188369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// opcode, or zero.
189369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemstatic unsigned getSameOpcode(ArrayRef<Value *> VL) {
190369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  Instruction *I0 = dyn_cast<Instruction>(VL[0]);
191369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (!I0)
192369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    return 0;
193369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  unsigned Opcode = I0->getOpcode();
194369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  for (int i = 1, e = VL.size(); i < e; i++) {
195369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    Instruction *I = dyn_cast<Instruction>(VL[i]);
196c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    if (!I || Opcode != I->getOpcode()) {
197c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      if (canCombineAsAltInst(Opcode) && i == 1)
198c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        return isAltInst(VL);
199369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return 0;
200c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    }
201369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  }
202369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  return Opcode;
203369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
204369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
20537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// Get the intersection (logical and) of all of the potential IR flags
20637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// of each scalar operation (VL) that will be converted into a vector (I).
20737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// Flag set: NSW, NUW, exact, and all of fast-math.
20837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesstatic void propagateIRFlags(Value *I, ArrayRef<Value *> VL) {
20937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  if (auto *VecOp = dyn_cast<BinaryOperator>(I)) {
21037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (auto *Intersection = dyn_cast<BinaryOperator>(VL[0])) {
21137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // Intersection is initialized to the 0th scalar,
21237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // so start counting from index '1'.
21337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      for (int i = 1, e = VL.size(); i < e; ++i) {
21437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        if (auto *Scalar = dyn_cast<BinaryOperator>(VL[i]))
21537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          Intersection->andIRFlags(Scalar);
21637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
21737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      VecOp->copyIRFlags(Intersection);
21837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
21937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
22037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
221fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling
222369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// \returns The type that all of the values in \p VL have or null if there
223369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// are different types.
224369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemstatic Type* getSameType(ArrayRef<Value *> VL) {
225369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  Type *Ty = VL[0]->getType();
226369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  for (int i = 1, e = VL.size(); i < e; i++)
22730bbf070a2683fc95c105ad78f921ca59c56bb35Nadav Rotem    if (VL[i]->getType() != Ty)
228dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      return nullptr;
229369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
230369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  return Ty;
231369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
232369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
233de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar/// \returns True if Extract{Value,Element} instruction extracts element Idx.
234de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarstatic bool matchExtractIndex(Instruction *E, unsigned Idx, unsigned Opcode) {
235de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  assert(Opcode == Instruction::ExtractElement ||
236de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar         Opcode == Instruction::ExtractValue);
237de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  if (Opcode == Instruction::ExtractElement) {
238369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    ConstantInt *CI = dyn_cast<ConstantInt>(E->getOperand(1));
239de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    return CI && CI->getZExtValue() == Idx;
240de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  } else {
241de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    ExtractValueInst *EI = cast<ExtractValueInst>(E);
242de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    return EI->getNumIndices() == 1 && *EI->idx_begin() == Idx;
243369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  }
244369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
245369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
24637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// \returns True if in-tree use also needs extract. This refers to
24737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// possible scalar operand in vectorized instruction.
24837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesstatic bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
24937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                                    TargetLibraryInfo *TLI) {
25037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
25137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  unsigned Opcode = UserInst->getOpcode();
25237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  switch (Opcode) {
25337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case Instruction::Load: {
25437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    LoadInst *LI = cast<LoadInst>(UserInst);
25537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    return (LI->getPointerOperand() == Scalar);
25637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
25737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case Instruction::Store: {
25837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    StoreInst *SI = cast<StoreInst>(UserInst);
25937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    return (SI->getPointerOperand() == Scalar);
26037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
26137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case Instruction::Call: {
26237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    CallInst *CI = cast<CallInst>(UserInst);
263de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
26437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (hasVectorInstrinsicScalarOpd(ID, 1)) {
26537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      return (CI->getArgOperand(1) == Scalar);
26637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
26737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
26837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  default:
26937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    return false;
27037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
27137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
27237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
273ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// \returns the AA location that is being access by the instruction.
274f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarstatic MemoryLocation getLocation(Instruction *I, AliasAnalysis *AA) {
275ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  if (StoreInst *SI = dyn_cast<StoreInst>(I))
2766948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar    return MemoryLocation::get(SI);
277ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  if (LoadInst *LI = dyn_cast<LoadInst>(I))
2786948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar    return MemoryLocation::get(LI);
279f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  return MemoryLocation();
280ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines}
281ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
282ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// \returns True if the instruction is not a volatile or atomic load/store.
283ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesstatic bool isSimple(Instruction *I) {
284ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  if (LoadInst *LI = dyn_cast<LoadInst>(I))
285ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    return LI->isSimple();
286ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  if (StoreInst *SI = dyn_cast<StoreInst>(I))
287ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    return SI->isSimple();
288ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
289ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    return !MI->isVolatile();
290ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  return true;
291ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines}
292ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
293de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarnamespace llvm {
294de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarnamespace slpvectorizer {
295369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// Bottom Up SLP Vectorizer.
296369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemclass BoUpSLP {
297369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotempublic:
29853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  typedef SmallVector<Value *, 8> ValueList;
29953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  typedef SmallVector<Instruction *, 16> InstrList;
30053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  typedef SmallPtrSet<Value *, 16> ValueSet;
30153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  typedef SmallVector<StoreInst *, 8> StoreList;
30253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
3034c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti,
3044c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar          TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li,
305de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          DominatorTree *Dt, AssumptionCache *AC, DemandedBits *DB,
306de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          const DataLayout *DL)
307ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      : NumLoadsWantToKeepOrder(0), NumLoadsWantToChangeOrder(0), F(Func),
308de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), AC(AC), DB(DB),
309de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        DL(DL), Builder(Se->getContext()) {
310ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    CodeMetrics::collectEphemeralValues(F, AC, EphValues);
311de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // Use the vector register size specified by the target unless overridden
312de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // by a command-line option.
313de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // TODO: It would be better to limit the vectorization factor based on
314de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    //       data type rather than just register size. For example, x86 AVX has
315de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    //       256-bit registers, but it does not support integer operations
316de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    //       at that width (that requires AVX2).
317de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    if (MaxVectorRegSizeOption.getNumOccurrences())
318de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      MaxVecRegSize = MaxVectorRegSizeOption;
319de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    else
320de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      MaxVecRegSize = TTI->getRegisterBitWidth(true);
321de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
322de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    MinVecRegSize = MinVectorRegSizeOption;
32337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
32453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
32553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  /// \brief Vectorize the tree that starts with the elements in \p VL.
326a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  /// Returns the vectorized root.
327a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  Value *vectorizeTree();
32853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
32937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// \returns the cost incurred by unwanted spills and fills, caused by
33037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// holding live values over call sites.
33137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  int getSpillCost();
33237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
33353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  /// \returns the vectorization cost of the subtree that starts at \p VL.
33453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  /// A negative number means that this is profitable.
335369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  int getTreeCost();
336369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
337dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  /// Construct a vectorizable tree that starts at \p Roots, ignoring users for
338dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  /// the purpose of scheduling and extraction in the \p UserIgnoreLst.
339dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  void buildTree(ArrayRef<Value *> Roots,
340dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                 ArrayRef<Value *> UserIgnoreLst = None);
341369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
342369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  /// Clear the internal data structures that are created by 'buildTree'.
343369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  void deleteTree() {
344369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    VectorizableTree.clear();
345369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    ScalarToTreeEntry.clear();
346369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    MustGather.clear();
347a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    ExternalUses.clear();
34837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    NumLoadsWantToKeepOrder = 0;
34937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    NumLoadsWantToChangeOrder = 0;
35037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    for (auto &Iter : BlocksSchedules) {
35137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      BlockScheduling *BS = Iter.second.get();
35237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      BS->clear();
35337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
354de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    MinBWs.clear();
355369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  }
35653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
357369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  /// \brief Perform LICM and CSE on the newly generated gather sequences.
358369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  void optimizeGatherSequence();
359c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
360f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  /// \returns true if it is beneficial to reverse the vector order.
36137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  bool shouldReorder() const {
36237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    return NumLoadsWantToChangeOrder > NumLoadsWantToKeepOrder;
36337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
36437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
365de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  /// \return The vector element size in bits to use when vectorizing the
366de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  /// expression tree ending at \p V. If V is a store, the size is the width of
367de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  /// the stored value. Otherwise, the size is the width of the largest loaded
368de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  /// value reaching V. This method is used by the vectorizer to calculate
369de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  /// vectorization factors.
370de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  unsigned getVectorElementSize(Value *V);
371de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
372de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  /// Compute the minimum type sizes required to represent the entries in a
373de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  /// vectorizable tree.
374de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  void computeMinimumValueSizes();
375de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
376de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // \returns maximum vector register size as set by TTI or overridden by cl::opt.
377de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  unsigned getMaxVecRegSize() const {
378de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    return MaxVecRegSize;
379de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  }
380de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
381de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // \returns minimum vector register size as set by cl::opt.
382de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  unsigned getMinVecRegSize() const {
383de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    return MinVecRegSize;
384de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  }
385de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
386de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  /// \brief Check if ArrayType or StructType is isomorphic to some VectorType.
387de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  ///
388de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  /// \returns number of elements in vector if isomorphism exists, 0 otherwise.
389de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  unsigned canMapToVector(Type *T, const DataLayout &DL) const;
390de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
391369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemprivate:
392369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  struct TreeEntry;
39353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
394369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  /// \returns the cost of the vectorizable entry.
395369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  int getEntryCost(TreeEntry *E);
39653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
397369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  /// This is the recursive part of buildTree.
398369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth);
39953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
400de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  /// \returns True if the ExtractElement/ExtractValue instructions in VL can
401de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  /// be vectorized to use the original vector (or aggregate "bitcast" to a vector).
402de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  bool canReuseExtract(ArrayRef<Value *> VL, unsigned Opcode) const;
403de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
40462657090de3a5731bf644437701ccd78c247119fNadav Rotem  /// Vectorize a single entry in the tree.
405369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  Value *vectorizeTree(TreeEntry *E);
406369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
40762657090de3a5731bf644437701ccd78c247119fNadav Rotem  /// Vectorize a single entry in the tree, starting in \p VL.
408369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  Value *vectorizeTree(ArrayRef<Value *> VL);
40953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
41062657090de3a5731bf644437701ccd78c247119fNadav Rotem  /// \returns the pointer to the vectorized value if \p VL is already
41162657090de3a5731bf644437701ccd78c247119fNadav Rotem  /// vectorized, or NULL. They may happen in cycles.
4126a804acc4ae77c014e4ef97c37f8e720ef360394Matt Arsenault  Value *alreadyVectorized(ArrayRef<Value *> VL) const;
41362657090de3a5731bf644437701ccd78c247119fNadav Rotem
41453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  /// \returns the scalarization cost for this type. Scalarization in this
41553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  /// context means the creation of vectors from a group of scalars.
41653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  int getGatherCost(Type *Ty);
41753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
418d6f0c34273dd3536102f2d643403252468dfc4a3Nadav Rotem  /// \returns the scalarization cost for this list of values. Assuming that
419d6f0c34273dd3536102f2d643403252468dfc4a3Nadav Rotem  /// this subtree gets vectorized, we may need to extract the values from the
420d6f0c34273dd3536102f2d643403252468dfc4a3Nadav Rotem  /// roots. This method calculates the cost of extracting the values.
421d6f0c34273dd3536102f2d643403252468dfc4a3Nadav Rotem  int getGatherCost(ArrayRef<Value *> VL);
422d6f0c34273dd3536102f2d643403252468dfc4a3Nadav Rotem
4234b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault  /// \brief Set the Builder insert point to one after the last instruction in
4244b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault  /// the bundle
4254b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault  void setInsertPointAfterBundle(ArrayRef<Value *> VL);
4264b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault
42753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  /// \returns a vector from a collection of scalars in \p VL.
42853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  Value *Gather(ArrayRef<Value *> VL, VectorType *Ty);
42953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
430f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  /// \returns whether the VectorizableTree is fully vectorizable and will
431d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang  /// be beneficial even the tree height is tiny.
43236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  bool isFullyVectorizableTinyTree();
433d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang
434ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// \reorder commutative operands in alt shuffle if they result in
435ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  ///  vectorized code.
436ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  void reorderAltShuffleOperands(ArrayRef<Value *> VL,
437ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                                 SmallVectorImpl<Value *> &Left,
438ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                                 SmallVectorImpl<Value *> &Right);
439ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// \reorder commutative operands to get better probability of
440ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// generating vectorized code.
441ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
442ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                                      SmallVectorImpl<Value *> &Left,
443ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                                      SmallVectorImpl<Value *> &Right);
444369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  struct TreeEntry {
44537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    TreeEntry() : Scalars(), VectorizedValue(nullptr),
446369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    NeedToGather(0) {}
44753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
448369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    /// \returns true if the scalars in VL are equal to this entry.
4496a804acc4ae77c014e4ef97c37f8e720ef360394Matt Arsenault    bool isSame(ArrayRef<Value *> VL) const {
450369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      assert(VL.size() == Scalars.size() && "Invalid size");
4516623d050c6f4351293bc1849e49bc0e37ec04596Benjamin Kramer      return std::equal(VL.begin(), VL.end(), Scalars.begin());
452369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
453369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
454369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    /// A vector of scalars.
455369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    ValueList Scalars;
456369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
457369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    /// The Scalars are vectorized into this value. It is initialized to Null.
458369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    Value *VectorizedValue;
459369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
460369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    /// Do we need to gather this sequence ?
461369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    bool NeedToGather;
462369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  };
46353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
464369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  /// Create a new VectorizableTree entry.
465369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  TreeEntry *newTreeEntry(ArrayRef<Value *> VL, bool Vectorized) {
4666948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar    VectorizableTree.emplace_back();
467369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    int idx = VectorizableTree.size() - 1;
468369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    TreeEntry *Last = &VectorizableTree[idx];
469369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end());
470369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    Last->NeedToGather = !Vectorized;
471369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    if (Vectorized) {
472369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      for (int i = 0, e = VL.size(); i != e; ++i) {
473369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        assert(!ScalarToTreeEntry.count(VL[i]) && "Scalar already in tree!");
474369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        ScalarToTreeEntry[VL[i]] = idx;
475369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
476369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    } else {
477369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      MustGather.insert(VL.begin(), VL.end());
478369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
479369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    return Last;
480ac2678684689448d1c88f644f3fa88aa4f93997cNadav Rotem  }
481de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
48253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  /// -- Vectorization State --
483369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  /// Holds all of the tree entries.
484369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  std::vector<TreeEntry> VectorizableTree;
48553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
486369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  /// Maps a specific scalar to its tree entry.
487369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  SmallDenseMap<Value*, int> ScalarToTreeEntry;
48853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
489369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  /// A list of scalars that we found that we need to keep as scalars.
49053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  ValueSet MustGather;
49153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
492a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  /// This POD struct describes one external user in the vectorized tree.
493a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  struct ExternalUser {
494a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    ExternalUser (Value *S, llvm::User *U, int L) :
495f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      Scalar(S), User(U), Lane(L){}
496a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    // Which scalar in our function.
497a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    Value *Scalar;
498a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    // Which user that uses the scalar.
499a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    llvm::User *User;
500a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    // Which lane does the scalar belong to.
501a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    int Lane;
502a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  };
503a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  typedef SmallVector<ExternalUser, 16> UserList;
504a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
505ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// Checks if two instructions may access the same memory.
506ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  ///
507ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// \p Loc1 is the location of \p Inst1. It is passed explicitly because it
508ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// is invariant in the calling loop.
509f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  bool isAliased(const MemoryLocation &Loc1, Instruction *Inst1,
510ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                 Instruction *Inst2) {
511ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
512ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    // First check if the result is already in the cache.
513ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    AliasCacheKey key = std::make_pair(Inst1, Inst2);
514ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    Optional<bool> &result = AliasCache[key];
515ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if (result.hasValue()) {
516ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      return result.getValue();
517ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    }
518f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    MemoryLocation Loc2 = getLocation(Inst2, AA);
519ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    bool aliased = true;
520ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if (Loc1.Ptr && Loc2.Ptr && isSimple(Inst1) && isSimple(Inst2)) {
521ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      // Do the alias check.
522ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      aliased = AA->alias(Loc1, Loc2);
523ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    }
524ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    // Store the result in the cache.
525ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    result = aliased;
526ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    return aliased;
527ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  }
528ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
529ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  typedef std::pair<Instruction *, Instruction *> AliasCacheKey;
530ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
531ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// Cache for alias results.
532ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// TODO: consider moving this to the AliasAnalysis itself.
533ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  DenseMap<AliasCacheKey, Optional<bool>> AliasCache;
534ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
535ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// Removes an instruction from its block and eventually deletes it.
536ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// It's like Instruction::eraseFromParent() except that the actual deletion
537ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// is delayed until BoUpSLP is destructed.
538ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// This is required to ensure that there are no incorrect collisions in the
539ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// AliasCache, which can happen if a new instruction is allocated at the
540ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// same address as a previously deleted instruction.
541ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  void eraseInstruction(Instruction *I) {
542ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    I->removeFromParent();
543ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    I->dropAllReferences();
544ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    DeletedInstructions.push_back(std::unique_ptr<Instruction>(I));
545ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  }
546ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
547ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// Temporary store for deleted instructions. Instructions will be deleted
548ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  /// eventually when the BoUpSLP is destructed.
549ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  SmallVector<std::unique_ptr<Instruction>, 8> DeletedInstructions;
550ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
551a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  /// A list of values that need to extracted out of the tree.
552a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  /// This list holds pairs of (Internal Scalar : External User).
553a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  UserList ExternalUses;
554a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
55537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// Values used only by @llvm.assume calls.
55637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  SmallPtrSet<const Value *, 32> EphValues;
55753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
55853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  /// Holds all of the instructions that we gathered.
55953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  SetVector<Instruction *> GatherSeq;
560a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling  /// A list of blocks that we are going to CSE.
56136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  SetVector<BasicBlock *> CSEBlocks;
56253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
56337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// Contains all scheduling relevant data for an instruction.
56437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// A ScheduleData either represents a single instruction or a member of an
56537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// instruction bundle (= a group of instructions which is combined into a
56637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// vector instruction).
56737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  struct ScheduleData {
56837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
56937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // The initial value for the dependency counters. It means that the
57037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // dependencies are not calculated yet.
57137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    enum { InvalidDeps = -1 };
57237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
57337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData()
57437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        : Inst(nullptr), FirstInBundle(nullptr), NextInBundle(nullptr),
57537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          NextLoadStore(nullptr), SchedulingRegionID(0), SchedulingPriority(0),
57637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          Dependencies(InvalidDeps), UnscheduledDeps(InvalidDeps),
57737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          UnscheduledDepsInBundle(InvalidDeps), IsScheduled(false) {}
57837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
57937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    void init(int BlockSchedulingRegionID) {
58037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      FirstInBundle = this;
58137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      NextInBundle = nullptr;
58237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      NextLoadStore = nullptr;
58337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      IsScheduled = false;
58437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      SchedulingRegionID = BlockSchedulingRegionID;
58537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      UnscheduledDepsInBundle = UnscheduledDeps;
58637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      clearDependencies();
58737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
58837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
58937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Returns true if the dependency information has been calculated.
59037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    bool hasValidDependencies() const { return Dependencies != InvalidDeps; }
59137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
59237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Returns true for single instructions and for bundle representatives
59337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// (= the head of a bundle).
59437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    bool isSchedulingEntity() const { return FirstInBundle == this; }
59537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
59637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Returns true if it represents an instruction bundle and not only a
59737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// single instruction.
59837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    bool isPartOfBundle() const {
59937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      return NextInBundle != nullptr || FirstInBundle != this;
60037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
60137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
60237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Returns true if it is ready for scheduling, i.e. it has no more
60337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// unscheduled depending instructions/bundles.
60437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    bool isReady() const {
60537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      assert(isSchedulingEntity() &&
60637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines             "can't consider non-scheduling entity for ready list");
60737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      return UnscheduledDepsInBundle == 0 && !IsScheduled;
60837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
60937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
61037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Modifies the number of unscheduled dependencies, also updating it for
61137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// the whole bundle.
61237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    int incrementUnscheduledDeps(int Incr) {
61337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      UnscheduledDeps += Incr;
61437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      return FirstInBundle->UnscheduledDepsInBundle += Incr;
61537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
61637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
61737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Sets the number of unscheduled dependencies to the number of
61837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// dependencies.
61937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    void resetUnscheduledDeps() {
62037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      incrementUnscheduledDeps(Dependencies - UnscheduledDeps);
62137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
62237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
62337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Clears all dependency information.
62437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    void clearDependencies() {
62537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      Dependencies = InvalidDeps;
62637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      resetUnscheduledDeps();
62737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      MemoryDependencies.clear();
62837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
62937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
63037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    void dump(raw_ostream &os) const {
63137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (!isSchedulingEntity()) {
63237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        os << "/ " << *Inst;
63337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      } else if (NextInBundle) {
63437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        os << '[' << *Inst;
63537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        ScheduleData *SD = NextInBundle;
63637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        while (SD) {
63737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          os << ';' << *SD->Inst;
63837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          SD = SD->NextInBundle;
63937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        }
64037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        os << ']';
64137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      } else {
64237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        os << *Inst;
64337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
64437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
64553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
64637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    Instruction *Inst;
64737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
64837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Points to the head in an instruction bundle (and always to this for
64937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// single instructions).
65037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *FirstInBundle;
65137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
65237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Single linked list of all instructions in a bundle. Null if it is a
65337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// single instruction.
65437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *NextInBundle;
65537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
65637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Single linked list of all memory instructions (e.g. load, store, call)
65737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// in the block - until the end of the scheduling region.
65837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *NextLoadStore;
65937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
66037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// The dependent memory instructions.
66137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// This list is derived on demand in calculateDependencies().
66237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    SmallVector<ScheduleData *, 4> MemoryDependencies;
66337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
66437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// This ScheduleData is in the current scheduling region if this matches
66537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// the current SchedulingRegionID of BlockScheduling.
66637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    int SchedulingRegionID;
66737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
66837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Used for getting a "good" final ordering of instructions.
66937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    int SchedulingPriority;
67037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
67137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// The number of dependencies. Constitutes of the number of users of the
67237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// instruction plus the number of dependent memory instructions (if any).
67337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// This value is calculated on demand.
67437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// If InvalidDeps, the number of dependencies is not calculated yet.
67537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ///
67637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    int Dependencies;
67737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
67837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// The number of dependencies minus the number of dependencies of scheduled
67937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// instructions. As soon as this is zero, the instruction/bundle gets ready
68037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// for scheduling.
68137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Note that this is negative as long as Dependencies is not calculated.
68237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    int UnscheduledDeps;
68337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
68437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// The sum of UnscheduledDeps in a bundle. Equals to UnscheduledDeps for
68537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// single instructions.
68637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    int UnscheduledDepsInBundle;
68737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
68837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// True if this instruction is scheduled (or considered as scheduled in the
68937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// dry-run).
69037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    bool IsScheduled;
69137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  };
69237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
69337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#ifndef NDEBUG
694de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  friend inline raw_ostream &operator<<(raw_ostream &os,
695de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                                        const BoUpSLP::ScheduleData &SD) {
696de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    SD.dump(os);
697de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    return os;
698de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  }
69937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#endif
70037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
70137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// Contains all scheduling data for a basic block.
70237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  ///
70337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  struct BlockScheduling {
70437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
70537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    BlockScheduling(BasicBlock *BB)
70637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        : BB(BB), ChunkSize(BB->size()), ChunkPos(ChunkSize),
70737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          ScheduleStart(nullptr), ScheduleEnd(nullptr),
70837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          FirstLoadStoreInRegion(nullptr), LastLoadStoreInRegion(nullptr),
709f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar          ScheduleRegionSize(0),
710f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar          ScheduleRegionSizeLimit(ScheduleRegionSizeBudget),
71137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          // Make sure that the initial SchedulingRegionID is greater than the
71237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          // initial SchedulingRegionID in ScheduleData (which is 0).
71337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          SchedulingRegionID(1) {}
71437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
71537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    void clear() {
71637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ReadyInsts.clear();
71737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ScheduleStart = nullptr;
71837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ScheduleEnd = nullptr;
71937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      FirstLoadStoreInRegion = nullptr;
72037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      LastLoadStoreInRegion = nullptr;
72137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
722f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      // Reduce the maximum schedule region size by the size of the
723f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      // previous scheduling run.
724f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      ScheduleRegionSizeLimit -= ScheduleRegionSize;
725f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      if (ScheduleRegionSizeLimit < MinScheduleRegionSize)
726f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        ScheduleRegionSizeLimit = MinScheduleRegionSize;
727f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      ScheduleRegionSize = 0;
728f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
72937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // Make a new scheduling region, i.e. all existing ScheduleData is not
73037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // in the new region yet.
73137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ++SchedulingRegionID;
73237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
73337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
73437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *getScheduleData(Value *V) {
73537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ScheduleData *SD = ScheduleDataMap[V];
73637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (SD && SD->SchedulingRegionID == SchedulingRegionID)
73737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        return SD;
73837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      return nullptr;
73937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
74037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
74137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    bool isInSchedulingRegion(ScheduleData *SD) {
74237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      return SD->SchedulingRegionID == SchedulingRegionID;
74337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
74437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
74537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Marks an instruction as scheduled and puts all dependent ready
74637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// instructions into the ready-list.
74737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    template <typename ReadyListType>
74837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    void schedule(ScheduleData *SD, ReadyListType &ReadyList) {
74937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      SD->IsScheduled = true;
75037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      DEBUG(dbgs() << "SLP:   schedule " << *SD << "\n");
75137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
75237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ScheduleData *BundleMember = SD;
75337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      while (BundleMember) {
75437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        // Handle the def-use chain dependencies.
75537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        for (Use &U : BundleMember->Inst->operands()) {
75637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          ScheduleData *OpDef = getScheduleData(U.get());
75737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          if (OpDef && OpDef->hasValidDependencies() &&
75837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines              OpDef->incrementUnscheduledDeps(-1) == 0) {
75937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            // There are no more unscheduled dependencies after decrementing,
76037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            // so we can put the dependent instruction into the ready list.
76137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            ScheduleData *DepBundle = OpDef->FirstInBundle;
76237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            assert(!DepBundle->IsScheduled &&
76337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                   "already scheduled bundle gets ready");
76437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            ReadyList.insert(DepBundle);
76537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            DEBUG(dbgs() << "SLP:    gets ready (def): " << *DepBundle << "\n");
76637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          }
76737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        }
76837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        // Handle the memory dependencies.
76937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        for (ScheduleData *MemoryDepSD : BundleMember->MemoryDependencies) {
77037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          if (MemoryDepSD->incrementUnscheduledDeps(-1) == 0) {
77137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            // There are no more unscheduled dependencies after decrementing,
77237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            // so we can put the dependent instruction into the ready list.
77337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            ScheduleData *DepBundle = MemoryDepSD->FirstInBundle;
77437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            assert(!DepBundle->IsScheduled &&
77537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                   "already scheduled bundle gets ready");
77637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            ReadyList.insert(DepBundle);
77737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            DEBUG(dbgs() << "SLP:    gets ready (mem): " << *DepBundle << "\n");
77837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          }
77937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        }
78037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        BundleMember = BundleMember->NextInBundle;
78137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
78237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
78337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
78437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Put all instructions into the ReadyList which are ready for scheduling.
78537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    template <typename ReadyListType>
78637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    void initialFillReadyList(ReadyListType &ReadyList) {
78737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) {
78837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        ScheduleData *SD = getScheduleData(I);
78937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        if (SD->isSchedulingEntity() && SD->isReady()) {
79037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          ReadyList.insert(SD);
79137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          DEBUG(dbgs() << "SLP:    initially in ready list: " << *I << "\n");
79237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        }
79337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
79437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
79537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
79637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Checks if a bundle of instructions can be scheduled, i.e. has no
79737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// cyclic dependencies. This is only a dry-run, no instructions are
79837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// actually moved at this stage.
799ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    bool tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP);
80037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
80137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Un-bundles a group of instructions.
80237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    void cancelScheduling(ArrayRef<Value *> VL);
80337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
80437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Extends the scheduling region so that V is inside the region.
805f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    /// \returns true if the region size is within the limit.
806f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    bool extendSchedulingRegion(Value *V);
80737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
80837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Initialize the ScheduleData structures for new instructions in the
80937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// scheduling region.
81037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    void initScheduleData(Instruction *FromI, Instruction *ToI,
81137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                          ScheduleData *PrevLoadStore,
81237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                          ScheduleData *NextLoadStore);
81337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
81437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Updates the dependency information of a bundle and of all instructions/
81537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// bundles which depend on the original bundle.
81637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    void calculateDependencies(ScheduleData *SD, bool InsertInReadyList,
817ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                               BoUpSLP *SLP);
81837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
81937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Sets all instruction in the scheduling region to un-scheduled.
82037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    void resetSchedule();
82137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
82237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    BasicBlock *BB;
82337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
82437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Simple memory allocation for ScheduleData.
82537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    std::vector<std::unique_ptr<ScheduleData[]>> ScheduleDataChunks;
82637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
82737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// The size of a ScheduleData array in ScheduleDataChunks.
82837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    int ChunkSize;
82937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
83037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// The allocator position in the current chunk, which is the last entry
83137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// of ScheduleDataChunks.
83237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    int ChunkPos;
83337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
83437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Attaches ScheduleData to Instruction.
83537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// Note that the mapping survives during all vectorization iterations, i.e.
83637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// ScheduleData structures are recycled.
83737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    DenseMap<Value *, ScheduleData *> ScheduleDataMap;
83837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
83937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    struct ReadyList : SmallVector<ScheduleData *, 8> {
84037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      void insert(ScheduleData *SD) { push_back(SD); }
84137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    };
84237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
84337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// The ready-list for scheduling (only used for the dry-run).
84437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ReadyList ReadyInsts;
84537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
84637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// The first instruction of the scheduling region.
84737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    Instruction *ScheduleStart;
84837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
84937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// The first instruction _after_ the scheduling region.
85037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    Instruction *ScheduleEnd;
85137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
85237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// The first memory accessing instruction in the scheduling region
85337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// (can be null).
85437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *FirstLoadStoreInRegion;
85537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
85637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// The last memory accessing instruction in the scheduling region
85737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// (can be null).
85837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *LastLoadStoreInRegion;
85937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
860f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    /// The current size of the scheduling region.
861f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    int ScheduleRegionSize;
862de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
863f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    /// The maximum size allowed for the scheduling region.
864f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    int ScheduleRegionSizeLimit;
865f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
86637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// The ID of the scheduling region. For a new vectorization iteration this
86737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    /// is incremented which "removes" all ScheduleData from the region.
86837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    int SchedulingRegionID;
86937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  };
87037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
87137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// Attaches the BlockScheduling structures to basic blocks.
872ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  MapVector<BasicBlock *, std::unique_ptr<BlockScheduling>> BlocksSchedules;
87337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
87437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// Performs the "real" scheduling. Done before vectorization is actually
87537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  /// performed in a basic block.
87637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  void scheduleBlock(BlockScheduling *BS);
877dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
878dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  /// List of users to ignore during scheduling and that don't need extracting.
879dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  ArrayRef<Value *> UserIgnoreList;
880a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
88137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // Number of load-bundles, which contain consecutive loads.
88237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  int NumLoadsWantToKeepOrder;
88337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
88437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // Number of load-bundles of size 2, which are consecutive loads if reversed.
88537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  int NumLoadsWantToChangeOrder;
88637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
88753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  // Analysis and block reference.
88853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  Function *F;
88953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  ScalarEvolution *SE;
89053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  TargetTransformInfo *TTI;
891dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  TargetLibraryInfo *TLI;
89253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  AliasAnalysis *AA;
89353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  LoopInfo *LI;
894722b0a4d293b16eebaed94ae65d5f11743cbcea5Nadav Rotem  DominatorTree *DT;
895de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  AssumptionCache *AC;
896de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  DemandedBits *DB;
897de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  const DataLayout *DL;
898de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  unsigned MaxVecRegSize; // This is set by TTI or overridden by cl::opt.
899de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  unsigned MinVecRegSize; // Set by cl::opt (default: 128).
90053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  /// Instruction builder to construct the vectorized tree.
90153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  IRBuilder<> Builder;
902de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
903de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  /// A map of scalar integer values to the smallest bit width with which they
904de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  /// can legally be represented.
905de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  MapVector<Value *, uint64_t> MinBWs;
90653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem};
90753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
908de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} // end namespace llvm
909de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} // end namespace slpvectorizer
91037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
911dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesvoid BoUpSLP::buildTree(ArrayRef<Value *> Roots,
912dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                        ArrayRef<Value *> UserIgnoreLst) {
913369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  deleteTree();
914dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  UserIgnoreList = UserIgnoreLst;
91530bbf070a2683fc95c105ad78f921ca59c56bb35Nadav Rotem  if (!getSameType(Roots))
91630bbf070a2683fc95c105ad78f921ca59c56bb35Nadav Rotem    return;
917369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  buildTree_rec(Roots, 0);
918a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
919a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  // Collect the values that we need to extract from the tree.
920de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  for (TreeEntry &EIdx : VectorizableTree) {
921de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    TreeEntry *Entry = &EIdx;
922a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
923a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    // For each lane:
924a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
925a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem      Value *Scalar = Entry->Scalars[Lane];
926a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
927a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem      // No need to handle users of gathered values.
928a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem      if (Entry->NeedToGather)
929a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem        continue;
930a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
93136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      for (User *U : Scalar->users()) {
93236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n");
933a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
93436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        Instruction *UserInst = dyn_cast<Instruction>(U);
935a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        if (!UserInst)
936a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          continue;
937a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
93837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        // Skip in-tree scalars that become vectors
93937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        if (ScalarToTreeEntry.count(U)) {
94037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          int Idx = ScalarToTreeEntry[U];
94137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          TreeEntry *UseEntry = &VectorizableTree[Idx];
94237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          Value *UseScalar = UseEntry->Scalars[0];
94337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          // Some in-tree scalars will remain as scalar in vectorized
94437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          // instructions. If that is the case, the one in Lane 0 will
94537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          // be used.
94637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          if (UseScalar != U ||
94737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines              !InTreeUserNeedToExtract(Scalar, UserInst, TLI)) {
94837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U
94937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                         << ".\n");
95037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            assert(!VectorizableTree[Idx].NeedToGather && "Bad state");
95137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            continue;
95237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          }
95337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        }
95437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
955dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        // Ignore users in the user ignore list.
956dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UserInst) !=
957dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines            UserIgnoreList.end())
958a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem          continue;
959a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
96036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " <<
961a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem              Lane << " from " << *Scalar << ".\n");
96236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        ExternalUses.push_back(ExternalUser(Scalar, U, Lane));
963a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem      }
964a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    }
965a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  }
96653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem}
96753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
96853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
969369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemvoid BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
970de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  bool SameTy = allConstant(VL) || getSameType(VL); (void)SameTy;
971c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  bool isAltShuffle = false;
972369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  assert(SameTy && "Invalid types!");
97353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
974369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (Depth == RecursionMaxDepth) {
975369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
976369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    newTreeEntry(VL, false);
977369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    return;
978369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  }
97953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
980369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // Don't handle vectors.
981369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (VL[0]->getType()->isVectorTy()) {
982369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
983369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    newTreeEntry(VL, false);
984369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    return;
985369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  }
98653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
987369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
988369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    if (SI->getValueOperand()->getType()->isVectorTy()) {
989369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n");
990369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      newTreeEntry(VL, false);
991369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return;
992369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
993c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  unsigned Opcode = getSameOpcode(VL);
994c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
995c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  // Check that this shuffle vector refers to the alternate
996c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  // sequence of opcodes.
997c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  if (Opcode == Instruction::ShuffleVector) {
998c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    Instruction *I0 = dyn_cast<Instruction>(VL[0]);
999c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    unsigned Op = I0->getOpcode();
1000c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    if (Op != Instruction::ShuffleVector)
1001c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      isAltShuffle = true;
1002c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  }
100353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1004369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // If all of the operands are identical or constant we have a simple solution.
1005c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  if (allConstant(VL) || isSplat(VL) || !getSameBlock(VL) || !Opcode) {
1006369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n");
1007369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    newTreeEntry(VL, false);
1008369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    return;
1009369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  }
101053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1011369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // We now know that this is a vector of instructions of the same type from
1012369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // the same block.
1013369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
101437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // Don't vectorize ephemeral values.
101537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  for (unsigned i = 0, e = VL.size(); i != e; ++i) {
101637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (EphValues.count(VL[i])) {
101737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] <<
101837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            ") is ephemeral.\n");
101937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      newTreeEntry(VL, false);
102037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      return;
102137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
102237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
102337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
1024369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // Check if this is a duplicate of another entry.
1025369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (ScalarToTreeEntry.count(VL[0])) {
1026369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    int Idx = ScalarToTreeEntry[VL[0]];
1027369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    TreeEntry *E = &VectorizableTree[Idx];
1028369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    for (unsigned i = 0, e = VL.size(); i != e; ++i) {
1029369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      DEBUG(dbgs() << "SLP: \tChecking bundle: " << *VL[i] << ".\n");
1030369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      if (E->Scalars[i] != VL[i]) {
1031369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
1032369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        newTreeEntry(VL, false);
1033369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        return;
1034369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
1035369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
1036369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *VL[0] << ".\n");
1037369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    return;
1038369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  }
103953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1040369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // Check that none of the instructions in the bundle are already in the tree.
1041369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  for (unsigned i = 0, e = VL.size(); i != e; ++i) {
1042369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    if (ScalarToTreeEntry.count(VL[i])) {
1043369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] <<
1044369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem            ") is already in tree.\n");
1045369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      newTreeEntry(VL, false);
1046369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return;
1047369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
1048369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  }
104953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1050ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // If any of the scalars is marked as a value that needs to stay scalar then
1051ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // we need to gather the scalars.
1052369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  for (unsigned i = 0, e = VL.size(); i != e; ++i) {
1053ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if (MustGather.count(VL[i])) {
1054ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
1055369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      newTreeEntry(VL, false);
1056369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return;
1057369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
1058369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  }
105953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1060369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // Check that all of the users of the scalars that we want to vectorize are
1061369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // schedulable.
1062369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  Instruction *VL0 = cast<Instruction>(VL[0]);
1063369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  BasicBlock *BB = cast<Instruction>(VL0)->getParent();
106453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
106537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  if (!DT->isReachableFromEntry(BB)) {
106637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // Don't go into unreachable blocks. They may contain instructions with
106737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // dependency cycles which confuse the final scheduling.
106837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
106937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    newTreeEntry(VL, false);
107037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    return;
107153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  }
1072de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
1073369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // Check that every instructions appears once in this bundle.
107453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  for (unsigned i = 0, e = VL.size(); i < e; ++i)
1075369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    for (unsigned j = i+1; j < e; ++j)
1076369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      if (VL[i] == VL[j]) {
1077369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
1078369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        newTreeEntry(VL, false);
1079369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        return;
1080369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
108153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
108237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  auto &BSRef = BlocksSchedules[BB];
108337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  if (!BSRef) {
108437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    BSRef = llvm::make_unique<BlockScheduling>(BB);
108553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  }
108637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  BlockScheduling &BS = *BSRef.get();
108753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1088ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  if (!BS.tryScheduleBundle(VL, this)) {
108937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n");
1090f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    assert((!BS.getScheduleData(VL[0]) ||
1091f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar            !BS.getScheduleData(VL[0])->isPartOfBundle()) &&
1092f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar           "tryScheduleBundle should cancelScheduling on failure");
109337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    newTreeEntry(VL, false);
109437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    return;
109553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  }
109637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n");
109753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1098369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  switch (Opcode) {
1099369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::PHI: {
1100369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      PHINode *PH = dyn_cast<PHINode>(VL0);
11013c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer
11023c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer      // Check for terminator values (e.g. invoke).
11033c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer      for (unsigned j = 0; j < VL.size(); ++j)
11043c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer        for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
110536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          TerminatorInst *Term = dyn_cast<TerminatorInst>(
110636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines              cast<PHINode>(VL[j])->getIncomingValueForBlock(PH->getIncomingBlock(i)));
11073c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer          if (Term) {
11083c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer            DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (TerminatorInst use).\n");
110937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            BS.cancelScheduling(VL);
11103c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer            newTreeEntry(VL, false);
11113c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer            return;
11123c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer          }
11133c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer        }
11143c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer
1115369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      newTreeEntry(VL, true);
1116369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");
1117369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
1118369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
1119369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        ValueList Operands;
1120369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        // Prepare the operand vector.
1121de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        for (Value *j : VL)
1122de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          Operands.push_back(cast<PHINode>(j)->getIncomingValueForBlock(
112336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines              PH->getIncomingBlock(i)));
1124369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
1125369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        buildTree_rec(Operands, Depth + 1);
1126369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
1127369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return;
1128369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
1129de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    case Instruction::ExtractValue:
1130369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::ExtractElement: {
1131de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      bool Reuse = canReuseExtract(VL, Opcode);
1132369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      if (Reuse) {
1133369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        DEBUG(dbgs() << "SLP: Reusing extract sequence.\n");
113437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      } else {
113537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        BS.cancelScheduling(VL);
1136369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
1137369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      newTreeEntry(VL, Reuse);
1138369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return;
1139369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
1140369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Load: {
1141f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      // Check that a vectorized load would load the same memory as a scalar
1142f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      // load.
1143f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      // For example we don't want vectorize loads that are smaller than 8 bit.
1144f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      // Even though we have a packed struct {<i2, i2, i2, i2>} LLVM treats
1145f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      // loading/storing it as an i8 struct. If we vectorize loads/stores from
1146f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      // such a struct we read/write packed bits disagreeing with the
1147f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      // unvectorized version.
1148f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      Type *ScalarTy = VL[0]->getType();
1149f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
1150de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      if (DL->getTypeSizeInBits(ScalarTy) !=
1151de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          DL->getTypeAllocSizeInBits(ScalarTy)) {
1152f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        BS.cancelScheduling(VL);
1153f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        newTreeEntry(VL, false);
1154f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
1155f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        return;
1156f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      }
1157369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      // Check if the loads are consecutive or of we need to swizzle them.
1158fc1604ec7274e9b724ff9b2512c288a691167427Arnold Schwaighofer      for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) {
1159fc1604ec7274e9b724ff9b2512c288a691167427Arnold Schwaighofer        LoadInst *L = cast<LoadInst>(VL[i]);
116037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        if (!L->isSimple()) {
116137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          BS.cancelScheduling(VL);
1162369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem          newTreeEntry(VL, false);
116337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
116437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          return;
116537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        }
1166f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
1167de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) {
1168de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], *DL, *SE)) {
116937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            ++NumLoadsWantToChangeOrder;
117037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          }
117137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          BS.cancelScheduling(VL);
117237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          newTreeEntry(VL, false);
117337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
1174369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem          return;
1175369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        }
1176fc1604ec7274e9b724ff9b2512c288a691167427Arnold Schwaighofer      }
117737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ++NumLoadsWantToKeepOrder;
1178369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      newTreeEntry(VL, true);
1179369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      DEBUG(dbgs() << "SLP: added a vector of loads.\n");
1180369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return;
1181369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
1182369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::ZExt:
1183369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::SExt:
1184369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FPToUI:
1185369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FPToSI:
1186369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FPExt:
1187369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::PtrToInt:
1188369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::IntToPtr:
1189369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::SIToFP:
1190369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::UIToFP:
1191369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Trunc:
1192369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FPTrunc:
1193369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::BitCast: {
1194369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Type *SrcTy = VL0->getOperand(0)->getType();
1195369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      for (unsigned i = 0; i < VL.size(); ++i) {
1196369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        Type *Ty = cast<Instruction>(VL[i])->getOperand(0)->getType();
1197ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        if (Ty != SrcTy || !isValidElementType(Ty)) {
119837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          BS.cancelScheduling(VL);
1199369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem          newTreeEntry(VL, false);
1200369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem          DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n");
1201369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem          return;
1202369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        }
1203369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
1204369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      newTreeEntry(VL, true);
1205369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      DEBUG(dbgs() << "SLP: added a vector of casts.\n");
120653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1207369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
1208369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        ValueList Operands;
1209369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        // Prepare the operand vector.
1210de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        for (Value *j : VL)
1211de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          Operands.push_back(cast<Instruction>(j)->getOperand(i));
121253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1213369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        buildTree_rec(Operands, Depth+1);
1214369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
121553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem      return;
1216369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
1217369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::ICmp:
1218369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FCmp: {
1219369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      // Check that all of the compares have the same predicate.
12200c7f116bb6950ef819323d855415b2f2b0aad987Pirama Arumuga Nainar      CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
1221135e81efe3c1848a308c96dfd65e4d88b0d8667bNadav Rotem      Type *ComparedTy = cast<Instruction>(VL[0])->getOperand(0)->getType();
1222369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      for (unsigned i = 1, e = VL.size(); i < e; ++i) {
1223369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        CmpInst *Cmp = cast<CmpInst>(VL[i]);
1224135e81efe3c1848a308c96dfd65e4d88b0d8667bNadav Rotem        if (Cmp->getPredicate() != P0 ||
1225135e81efe3c1848a308c96dfd65e4d88b0d8667bNadav Rotem            Cmp->getOperand(0)->getType() != ComparedTy) {
122637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          BS.cancelScheduling(VL);
1227369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem          newTreeEntry(VL, false);
1228369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem          DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n");
1229369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem          return;
1230369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        }
1231369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
123253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1233369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      newTreeEntry(VL, true);
1234369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      DEBUG(dbgs() << "SLP: added a vector of compares.\n");
123553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1236369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
1237369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        ValueList Operands;
1238369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        // Prepare the operand vector.
1239de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        for (Value *j : VL)
1240de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          Operands.push_back(cast<Instruction>(j)->getOperand(i));
124153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1242369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        buildTree_rec(Operands, Depth+1);
1243805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem      }
1244369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return;
124553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    }
1246369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Select:
1247369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Add:
1248369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FAdd:
1249369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Sub:
1250369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FSub:
1251369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Mul:
1252369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FMul:
1253369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::UDiv:
1254369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::SDiv:
1255369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FDiv:
1256369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::URem:
1257369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::SRem:
1258369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FRem:
1259369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Shl:
1260369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::LShr:
1261369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::AShr:
1262369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::And:
1263369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Or:
1264369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Xor: {
1265369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      newTreeEntry(VL, true);
1266369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      DEBUG(dbgs() << "SLP: added a vector of bin op.\n");
1267369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
1268af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer      // Sort operands of the instructions so that each side is more likely to
1269af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer      // have the same opcode.
1270af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer      if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
1271af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer        ValueList Left, Right;
1272af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer        reorderInputsAccordingToOpcode(VL, Left, Right);
127337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        buildTree_rec(Left, Depth + 1);
127437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        buildTree_rec(Right, Depth + 1);
1275af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer        return;
1276af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer      }
1277af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer
1278369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
1279369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        ValueList Operands;
1280369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        // Prepare the operand vector.
1281de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        for (Value *j : VL)
1282de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          Operands.push_back(cast<Instruction>(j)->getOperand(i));
1283369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
1284369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        buildTree_rec(Operands, Depth+1);
1285369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
1286369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return;
128753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    }
1288c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    case Instruction::GetElementPtr: {
1289c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      // We don't combine GEPs with complicated (nested) indexing.
1290c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      for (unsigned j = 0; j < VL.size(); ++j) {
1291c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        if (cast<Instruction>(VL[j])->getNumOperands() != 2) {
1292c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n");
129337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          BS.cancelScheduling(VL);
1294c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          newTreeEntry(VL, false);
1295c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          return;
1296c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        }
1297c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      }
1298c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
1299c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      // We can't combine several GEPs into one vector if they operate on
1300c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      // different types.
1301c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Type *Ty0 = cast<Instruction>(VL0)->getOperand(0)->getType();
1302c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      for (unsigned j = 0; j < VL.size(); ++j) {
1303c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        Type *CurTy = cast<Instruction>(VL[j])->getOperand(0)->getType();
1304c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        if (Ty0 != CurTy) {
1305c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n");
130637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          BS.cancelScheduling(VL);
1307c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          newTreeEntry(VL, false);
1308c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          return;
1309c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        }
1310c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      }
1311c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
1312c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      // We don't combine GEPs with non-constant indexes.
1313c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      for (unsigned j = 0; j < VL.size(); ++j) {
1314c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        auto Op = cast<Instruction>(VL[j])->getOperand(1);
1315c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        if (!isa<ConstantInt>(Op)) {
1316c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          DEBUG(
1317c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines              dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n");
131837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          BS.cancelScheduling(VL);
1319c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          newTreeEntry(VL, false);
1320c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          return;
1321c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        }
1322c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      }
1323c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
1324c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      newTreeEntry(VL, true);
1325c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
1326c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      for (unsigned i = 0, e = 2; i < e; ++i) {
1327c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        ValueList Operands;
1328c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        // Prepare the operand vector.
1329de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        for (Value *j : VL)
1330de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          Operands.push_back(cast<Instruction>(j)->getOperand(i));
1331c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
1332c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        buildTree_rec(Operands, Depth + 1);
1333c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      }
1334c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      return;
1335c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    }
1336369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Store: {
1337369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      // Check if the stores are consecutive or of we need to swizzle them.
1338369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
1339de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) {
134037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          BS.cancelScheduling(VL);
1341369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem          newTreeEntry(VL, false);
134236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
1343369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem          return;
1344369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        }
1345805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem
1346369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      newTreeEntry(VL, true);
1347369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      DEBUG(dbgs() << "SLP: added a vector of stores.\n");
1348805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem
1349805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem      ValueList Operands;
1350de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      for (Value *j : VL)
1351de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        Operands.push_back(cast<Instruction>(j)->getOperand(0));
1352805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem
1353369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      buildTree_rec(Operands, Depth + 1);
135453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem      return;
135553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    }
135636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    case Instruction::Call: {
135736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // Check if the calls are all to the same vectorizable intrinsic.
1358dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      CallInst *CI = cast<CallInst>(VL[0]);
1359dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      // Check if this is an Intrinsic call or something that can be
1360dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      // represented by an intrinsic call
1361de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
1362dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      if (!isTriviallyVectorizable(ID)) {
136337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        BS.cancelScheduling(VL);
136436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        newTreeEntry(VL, false);
136536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
136636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        return;
136736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      }
1368dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      Function *Int = CI->getCalledFunction();
1369c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Value *A1I = nullptr;
1370c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      if (hasVectorInstrinsicScalarOpd(ID, 1))
1371c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        A1I = CI->getArgOperand(1);
137236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      for (unsigned i = 1, e = VL.size(); i != e; ++i) {
1373dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        CallInst *CI2 = dyn_cast<CallInst>(VL[i]);
1374dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        if (!CI2 || CI2->getCalledFunction() != Int ||
1375de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar            getVectorIntrinsicIDForCall(CI2, TLI) != ID ||
1376de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar            !CI->hasIdenticalOperandBundleSchema(*CI2)) {
137737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          BS.cancelScheduling(VL);
137836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          newTreeEntry(VL, false);
1379dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i]
138036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                       << "\n");
138136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          return;
138236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        }
1383c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        // ctlz,cttz and powi are special intrinsics whose second argument
1384c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        // should be same in order for them to be vectorized.
1385c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        if (hasVectorInstrinsicScalarOpd(ID, 1)) {
1386c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          Value *A1J = CI2->getArgOperand(1);
1387c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          if (A1I != A1J) {
138837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            BS.cancelScheduling(VL);
1389c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines            newTreeEntry(VL, false);
1390c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines            DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
1391c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines                         << " argument "<< A1I<<"!=" << A1J
1392c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines                         << "\n");
1393c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines            return;
1394c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          }
1395c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        }
1396de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        // Verify that the bundle operands are identical between the two calls.
1397de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        if (CI->hasOperandBundles() &&
1398de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar            !std::equal(CI->op_begin() + CI->getBundleOperandsStartIndex(),
1399de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                        CI->op_begin() + CI->getBundleOperandsEndIndex(),
1400de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                        CI2->op_begin() + CI2->getBundleOperandsStartIndex())) {
1401de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          BS.cancelScheduling(VL);
1402de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          newTreeEntry(VL, false);
1403de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:" << *CI << "!="
1404de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                       << *VL[i] << '\n');
1405de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          return;
1406de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        }
140736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      }
140836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
140936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      newTreeEntry(VL, true);
1410dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
141136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        ValueList Operands;
141236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // Prepare the operand vector.
1413de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        for (Value *j : VL) {
1414de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          CallInst *CI2 = dyn_cast<CallInst>(j);
1415dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          Operands.push_back(CI2->getArgOperand(i));
141636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        }
141736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        buildTree_rec(Operands, Depth + 1);
141836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      }
141936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      return;
142036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    }
1421c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    case Instruction::ShuffleVector: {
1422c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      // If this is not an alternate sequence of opcode like add-sub
1423c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      // then do not vectorize this instruction.
1424c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      if (!isAltShuffle) {
142537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        BS.cancelScheduling(VL);
1426c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        newTreeEntry(VL, false);
1427c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
1428c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        return;
1429c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      }
1430c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      newTreeEntry(VL, true);
1431c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
1432ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
1433ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      // Reorder operands if reordering would enable vectorization.
1434ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      if (isa<BinaryOperator>(VL0)) {
1435ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        ValueList Left, Right;
1436ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        reorderAltShuffleOperands(VL, Left, Right);
1437ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        buildTree_rec(Left, Depth + 1);
1438ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        buildTree_rec(Right, Depth + 1);
1439ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        return;
1440ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      }
1441ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
1442c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
1443c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        ValueList Operands;
1444c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        // Prepare the operand vector.
1445de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        for (Value *j : VL)
1446de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          Operands.push_back(cast<Instruction>(j)->getOperand(i));
1447c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
1448c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        buildTree_rec(Operands, Depth + 1);
1449c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      }
1450c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      return;
1451c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    }
1452369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    default:
145337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      BS.cancelScheduling(VL);
1454369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      newTreeEntry(VL, false);
1455369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n");
1456369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return;
145753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  }
145853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem}
145953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1460de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarunsigned BoUpSLP::canMapToVector(Type *T, const DataLayout &DL) const {
1461de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  unsigned N;
1462de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  Type *EltTy;
1463de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  auto *ST = dyn_cast<StructType>(T);
1464de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  if (ST) {
1465de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    N = ST->getNumElements();
1466de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    EltTy = *ST->element_begin();
1467de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  } else {
1468de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    N = cast<ArrayType>(T)->getNumElements();
1469de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    EltTy = cast<ArrayType>(T)->getElementType();
1470de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  }
1471de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  if (!isValidElementType(EltTy))
1472de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    return 0;
1473de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  uint64_t VTSize = DL.getTypeStoreSizeInBits(VectorType::get(EltTy, N));
1474de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  if (VTSize < MinVecRegSize || VTSize > MaxVecRegSize || VTSize != DL.getTypeStoreSizeInBits(T))
1475de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    return 0;
1476de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  if (ST) {
1477de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // Check that struct is homogeneous.
1478de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    for (const auto *Ty : ST->elements())
1479de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      if (Ty != EltTy)
1480de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        return 0;
1481de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  }
1482de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  return N;
1483de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar}
1484de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
1485de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool BoUpSLP::canReuseExtract(ArrayRef<Value *> VL, unsigned Opcode) const {
1486de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  assert(Opcode == Instruction::ExtractElement ||
1487de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar         Opcode == Instruction::ExtractValue);
1488de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  assert(Opcode == getSameOpcode(VL) && "Invalid opcode");
1489de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // Check if all of the extracts come from the same vector and from the
1490de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // correct offset.
1491de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  Value *VL0 = VL[0];
1492de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  Instruction *E0 = cast<Instruction>(VL0);
1493de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  Value *Vec = E0->getOperand(0);
1494de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
1495de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // We have to extract from a vector/aggregate with the same number of elements.
1496de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  unsigned NElts;
1497de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  if (Opcode == Instruction::ExtractValue) {
1498de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    const DataLayout &DL = E0->getModule()->getDataLayout();
1499de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    NElts = canMapToVector(Vec->getType(), DL);
1500de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    if (!NElts)
1501de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      return false;
1502de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // Check if load can be rewritten as load of vector.
1503de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    LoadInst *LI = dyn_cast<LoadInst>(Vec);
1504de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    if (!LI || !LI->isSimple() || !LI->hasNUses(VL.size()))
1505de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      return false;
1506de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  } else {
1507de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    NElts = Vec->getType()->getVectorNumElements();
1508de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  }
1509de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
1510de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  if (NElts != VL.size())
1511de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    return false;
1512de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
1513de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // Check that all of the indices extract from the correct offset.
1514de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  if (!matchExtractIndex(E0, 0, Opcode))
1515de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    return false;
1516de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
1517de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  for (unsigned i = 1, e = VL.size(); i < e; ++i) {
1518de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    Instruction *E = cast<Instruction>(VL[i]);
1519de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    if (!matchExtractIndex(E, i, Opcode))
1520de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      return false;
1521de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    if (E->getOperand(0) != Vec)
1522de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      return false;
1523de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  }
1524de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
1525de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  return true;
1526de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar}
1527de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
1528369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemint BoUpSLP::getEntryCost(TreeEntry *E) {
1529369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  ArrayRef<Value*> VL = E->Scalars;
153053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
153153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  Type *ScalarTy = VL[0]->getType();
153253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
153353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    ScalarTy = SI->getValueOperand()->getType();
153425961b469a9debe69b915bcb4fa49d35d2ee9544Nadav Rotem  VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
153525961b469a9debe69b915bcb4fa49d35d2ee9544Nadav Rotem
1536de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // If we have computed a smaller type for the expression, update VecTy so
1537de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // that the costs will be accurate.
1538de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  if (MinBWs.count(VL[0]))
1539de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    VecTy = VectorType::get(IntegerType::get(F->getContext(), MinBWs[VL[0]]),
1540de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                            VL.size());
1541de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
1542369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (E->NeedToGather) {
1543369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    if (allConstant(VL))
1544369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return 0;
1545369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    if (isSplat(VL)) {
1546369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);
154753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    }
1548369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    return getGatherCost(E->Scalars);
154953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  }
1550c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  unsigned Opcode = getSameOpcode(VL);
1551c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  assert(Opcode && getSameType(VL) && getSameBlock(VL) && "Invalid VL");
155253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  Instruction *VL0 = cast<Instruction>(VL[0]);
155353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  switch (Opcode) {
1554369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::PHI: {
155553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem      return 0;
155653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    }
1557de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    case Instruction::ExtractValue:
1558369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::ExtractElement: {
1559de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      if (canReuseExtract(VL, Opcode)) {
156036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        int DeadCost = 0;
156136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        for (unsigned i = 0, e = VL.size(); i < e; ++i) {
1562de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          Instruction *E = cast<Instruction>(VL[i]);
156336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          if (E->hasOneUse())
156436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines            // Take credit for instruction that will become dead.
156536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines            DeadCost +=
156636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);
156736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        }
156836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        return -DeadCost;
156936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      }
1570369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return getGatherCost(VecTy);
157125961b469a9debe69b915bcb4fa49d35d2ee9544Nadav Rotem    }
1572369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::ZExt:
1573369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::SExt:
1574369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FPToUI:
1575369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FPToSI:
1576369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FPExt:
1577369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::PtrToInt:
1578369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::IntToPtr:
1579369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::SIToFP:
1580369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::UIToFP:
1581369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Trunc:
1582369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FPTrunc:
1583369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::BitCast: {
1584369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Type *SrcTy = VL0->getOperand(0)->getType();
1585369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
1586369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      // Calculate the cost of this instruction.
1587369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(),
1588369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem                                                         VL0->getType(), SrcTy);
1589369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
1590369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size());
1591369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy);
1592369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return VecCost - ScalarCost;
159353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    }
1594369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FCmp:
1595369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::ICmp:
1596de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    case Instruction::Select: {
1597de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // Calculate the cost of this instruction.
1598de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size());
1599de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      int ScalarCost = VecTy->getNumElements() *
1600de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.getInt1Ty());
1601de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      int VecCost = TTI->getCmpSelInstrCost(Opcode, VecTy, MaskTy);
1602de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      return VecCost - ScalarCost;
1603de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    }
1604369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Add:
1605369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FAdd:
1606369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Sub:
1607369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FSub:
1608369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Mul:
1609369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FMul:
1610369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::UDiv:
1611369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::SDiv:
1612369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FDiv:
1613369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::URem:
1614369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::SRem:
1615369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FRem:
1616369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Shl:
1617369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::LShr:
1618369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::AShr:
1619369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::And:
1620369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Or:
1621369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Xor: {
1622de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // Certain instructions can be cheaper to vectorize if they have a
1623de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // constant second vector operand.
1624de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      TargetTransformInfo::OperandValueKind Op1VK =
1625de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          TargetTransformInfo::OK_AnyValue;
1626de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      TargetTransformInfo::OperandValueKind Op2VK =
1627de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          TargetTransformInfo::OK_UniformConstantValue;
1628de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      TargetTransformInfo::OperandValueProperties Op1VP =
1629de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          TargetTransformInfo::OP_None;
1630de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      TargetTransformInfo::OperandValueProperties Op2VP =
1631de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          TargetTransformInfo::OP_None;
1632de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
1633de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // If all operands are exactly the same ConstantInt then set the
1634de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // operand kind to OK_UniformConstantValue.
1635de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // If instead not all operands are constants, then set the operand kind
1636de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // to OK_AnyValue. If all operands are constants but not the same,
1637de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // then set the operand kind to OK_NonUniformConstantValue.
1638de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      ConstantInt *CInt = nullptr;
1639de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      for (unsigned i = 0; i < VL.size(); ++i) {
1640de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        const Instruction *I = cast<Instruction>(VL[i]);
1641de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        if (!isa<ConstantInt>(I->getOperand(1))) {
1642de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          Op2VK = TargetTransformInfo::OK_AnyValue;
1643de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          break;
164436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        }
1645de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        if (i == 0) {
1646de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          CInt = cast<ConstantInt>(I->getOperand(1));
1647de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          continue;
1648de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        }
1649de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        if (Op2VK == TargetTransformInfo::OK_UniformConstantValue &&
1650de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar            CInt != cast<ConstantInt>(I->getOperand(1)))
1651de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          Op2VK = TargetTransformInfo::OK_NonUniformConstantValue;
1652369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
1653de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // FIXME: Currently cost of model modification for division by power of
1654de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // 2 is handled for X86 and AArch64. Add support for other targets.
1655de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      if (Op2VK == TargetTransformInfo::OK_UniformConstantValue && CInt &&
1656de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          CInt->getValue().isPowerOf2())
1657de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        Op2VP = TargetTransformInfo::OP_PowerOf2;
1658de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
1659de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      int ScalarCost = VecTy->getNumElements() *
1660de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                       TTI->getArithmeticInstrCost(Opcode, ScalarTy, Op1VK,
1661de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                                                   Op2VK, Op1VP, Op2VP);
1662de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy, Op1VK, Op2VK,
1663de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                                                Op1VP, Op2VP);
1664369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return VecCost - ScalarCost;
166553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    }
1666c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    case Instruction::GetElementPtr: {
1667c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      TargetTransformInfo::OperandValueKind Op1VK =
1668c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          TargetTransformInfo::OK_AnyValue;
1669c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      TargetTransformInfo::OperandValueKind Op2VK =
1670c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          TargetTransformInfo::OK_UniformConstantValue;
1671c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
1672c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      int ScalarCost =
1673c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          VecTy->getNumElements() *
1674c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, Op1VK, Op2VK);
1675c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      int VecCost =
1676c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          TTI->getArithmeticInstrCost(Instruction::Add, VecTy, Op1VK, Op2VK);
1677c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
1678c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      return VecCost - ScalarCost;
1679c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    }
1680369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Load: {
1681369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      // Cost of wide load - cost of scalar loads.
1682de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      unsigned alignment = dyn_cast<LoadInst>(VL0)->getAlignment();
1683369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      int ScalarLdCost = VecTy->getNumElements() *
1684de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar            TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0);
1685de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      int VecLdCost = TTI->getMemoryOpCost(Instruction::Load,
1686de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                                           VecTy, alignment, 0);
1687369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return VecLdCost - ScalarLdCost;
168853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    }
1689369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Store: {
1690369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      // We know that we can merge the stores. Calculate the cost.
1691de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      unsigned alignment = dyn_cast<StoreInst>(VL0)->getAlignment();
1692369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      int ScalarStCost = VecTy->getNumElements() *
1693de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar            TTI->getMemoryOpCost(Instruction::Store, ScalarTy, alignment, 0);
1694de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      int VecStCost = TTI->getMemoryOpCost(Instruction::Store,
1695de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                                           VecTy, alignment, 0);
1696369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return VecStCost - ScalarStCost;
169725961b469a9debe69b915bcb4fa49d35d2ee9544Nadav Rotem    }
169836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    case Instruction::Call: {
169936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      CallInst *CI = cast<CallInst>(VL0);
1700de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
170136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
170236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // Calculate the cost of the scalar and vector calls.
170336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      SmallVector<Type*, 4> ScalarTys, VecTys;
1704dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      for (unsigned op = 0, opc = CI->getNumArgOperands(); op!= opc; ++op) {
170536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        ScalarTys.push_back(CI->getArgOperand(op)->getType());
170636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        VecTys.push_back(VectorType::get(CI->getArgOperand(op)->getType(),
170736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                         VecTy->getNumElements()));
170836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      }
170936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
1710de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      FastMathFlags FMF;
1711de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      if (auto *FPMO = dyn_cast<FPMathOperator>(CI))
1712de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        FMF = FPMO->getFastMathFlags();
1713de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
171436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      int ScalarCallCost = VecTy->getNumElements() *
1715de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys, FMF);
171636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
1717de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      int VecCallCost = TTI->getIntrinsicInstrCost(ID, VecTy, VecTys, FMF);
171836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
171936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      DEBUG(dbgs() << "SLP: Call cost "<< VecCallCost - ScalarCallCost
172036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines            << " (" << VecCallCost  << "-" <<  ScalarCallCost << ")"
1721dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines            << " for " << *CI << "\n");
172236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
172336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      return VecCallCost - ScalarCallCost;
172436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    }
1725c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    case Instruction::ShuffleVector: {
1726c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      TargetTransformInfo::OperandValueKind Op1VK =
1727c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          TargetTransformInfo::OK_AnyValue;
1728c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      TargetTransformInfo::OperandValueKind Op2VK =
1729c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          TargetTransformInfo::OK_AnyValue;
1730c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      int ScalarCost = 0;
1731c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      int VecCost = 0;
1732de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      for (Value *i : VL) {
1733de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        Instruction *I = cast<Instruction>(i);
1734c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        if (!I)
1735c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          break;
1736c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        ScalarCost +=
1737c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines            TTI->getArithmeticInstrCost(I->getOpcode(), ScalarTy, Op1VK, Op2VK);
1738c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      }
1739c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      // VecCost is equal to sum of the cost of creating 2 vectors
1740c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      // and the cost of creating shuffle.
1741c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Instruction *I0 = cast<Instruction>(VL[0]);
1742c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      VecCost =
1743c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          TTI->getArithmeticInstrCost(I0->getOpcode(), VecTy, Op1VK, Op2VK);
1744c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Instruction *I1 = cast<Instruction>(VL[1]);
1745c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      VecCost +=
1746c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          TTI->getArithmeticInstrCost(I1->getOpcode(), VecTy, Op1VK, Op2VK);
1747c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      VecCost +=
1748c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, VecTy, 0);
1749c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      return VecCost - ScalarCost;
1750c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    }
1751369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    default:
1752369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      llvm_unreachable("Unknown instruction");
175353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  }
1754369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
175525961b469a9debe69b915bcb4fa49d35d2ee9544Nadav Rotem
1756d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiangbool BoUpSLP::isFullyVectorizableTinyTree() {
1757d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang  DEBUG(dbgs() << "SLP: Check whether the tree with height " <<
1758d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang        VectorizableTree.size() << " is fully vectorizable .\n");
1759d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang
1760d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang  // We only handle trees of height 2.
1761d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang  if (VectorizableTree.size() != 2)
1762d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang    return false;
1763d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang
1764f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  // Handle splat and all-constants stores.
1765f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  if (!VectorizableTree[0].NeedToGather &&
1766f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      (allConstant(VectorizableTree[1].Scalars) ||
1767f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar       isSplat(VectorizableTree[1].Scalars)))
176836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return true;
176936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
1770d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang  // Gathering cost would be too much for tiny trees.
177136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (VectorizableTree[0].NeedToGather || VectorizableTree[1].NeedToGather)
177236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return false;
1773d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang
177436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  return true;
1775d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang}
1776d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang
177737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesint BoUpSLP::getSpillCost() {
177837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // Walk from the bottom of the tree to the top, tracking which values are
177937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // live. When we see a call instruction that is not part of our tree,
178037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // query TTI to see if there is a cost to keeping values live over it
178137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // (for example, if spills and fills are required).
178237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  unsigned BundleWidth = VectorizableTree.front().Scalars.size();
178337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  int Cost = 0;
178437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
178537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  SmallPtrSet<Instruction*, 4> LiveValues;
1786de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  Instruction *PrevInst = nullptr;
178737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
1788de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  for (const auto &N : VectorizableTree) {
1789de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    Instruction *Inst = dyn_cast<Instruction>(N.Scalars[0]);
179037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (!Inst)
179137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      continue;
179237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
179337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (!PrevInst) {
179437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      PrevInst = Inst;
179537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      continue;
179637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
179737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
1798de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // Update LiveValues.
1799de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    LiveValues.erase(PrevInst);
1800de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    for (auto &J : PrevInst->operands()) {
1801de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      if (isa<Instruction>(&*J) && ScalarToTreeEntry.count(&*J))
1802de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        LiveValues.insert(cast<Instruction>(&*J));
1803de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    }
1804de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
180537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    DEBUG(
180637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      dbgs() << "SLP: #LV: " << LiveValues.size();
180737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      for (auto *X : LiveValues)
180837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        dbgs() << " " << X->getName();
180937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      dbgs() << ", Looking at ";
181037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      Inst->dump();
181137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      );
181237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
181337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // Now find the sequence of instructions between PrevInst and Inst.
1814f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    BasicBlock::reverse_iterator InstIt(Inst->getIterator()),
1815f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        PrevInstIt(PrevInst->getIterator());
181637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    --PrevInstIt;
181737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    while (InstIt != PrevInstIt) {
181837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (PrevInstIt == PrevInst->getParent()->rend()) {
181937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        PrevInstIt = Inst->getParent()->rbegin();
182037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        continue;
182137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
182237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
182337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (isa<CallInst>(&*PrevInstIt) && &*PrevInstIt != PrevInst) {
182437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        SmallVector<Type*, 4> V;
182537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        for (auto *II : LiveValues)
182637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          V.push_back(VectorType::get(II->getType(), BundleWidth));
182737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        Cost += TTI->getCostOfKeepingLiveOverCall(V);
182837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
182937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
183037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ++PrevInstIt;
183137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
183237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
183337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    PrevInst = Inst;
183437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
183537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
183637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  return Cost;
183737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
183837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
1839369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemint BoUpSLP::getTreeCost() {
1840369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  int Cost = 0;
1841369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  DEBUG(dbgs() << "SLP: Calculating cost for tree of size " <<
1842369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        VectorizableTree.size() << ".\n");
1843369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
1844d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang  // We only vectorize tiny trees if it is fully vectorizable.
1845d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang  if (VectorizableTree.size() < 3 && !isFullyVectorizableTinyTree()) {
1846ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if (VectorizableTree.empty()) {
184767a38a2875f05ea9c219ab73c4398ee675eb4292Nadav Rotem      assert(!ExternalUses.size() && "We should not have any external users");
184867a38a2875f05ea9c219ab73c4398ee675eb4292Nadav Rotem    }
1849085e23841e9c4f4682385fce456704a5f75f9cdcYi Jiang    return INT_MAX;
1850a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  }
1851a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
1852a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  unsigned BundleWidth = VectorizableTree[0].Scalars.size();
1853a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
1854de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  for (TreeEntry &TE : VectorizableTree) {
1855de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    int C = getEntryCost(&TE);
1856369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    DEBUG(dbgs() << "SLP: Adding cost " << C << " for bundle that starts with "
1857de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                 << *TE.Scalars[0] << ".\n");
1858369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    Cost += C;
185953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  }
1860a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
186136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  SmallSet<Value *, 16> ExtractCostCalculated;
1862a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  int ExtractCost = 0;
1863de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  for (ExternalUser &EU : ExternalUses) {
186436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // We only add extract cost once for the same scalar.
1865de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    if (!ExtractCostCalculated.insert(EU.Scalar).second)
186637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      continue;
186737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
186837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // Uses by ephemeral values are free (because the ephemeral value will be
186937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // removed prior to code generation, and so the extraction will be
187037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // removed as well).
1871de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    if (EphValues.count(EU.User))
187236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      continue;
1873a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
1874de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // If we plan to rewrite the tree in a smaller type, we will need to sign
1875de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // extend the extracted value back to the original type. Here, we account
1876de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // for the extract and the added cost of the sign extend if needed.
1877de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    auto *VecTy = VectorType::get(EU.Scalar->getType(), BundleWidth);
1878de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    auto *ScalarRoot = VectorizableTree[0].Scalars[0];
1879de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    if (MinBWs.count(ScalarRoot)) {
1880de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot]);
1881de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      VecTy = VectorType::get(MinTy, BundleWidth);
1882de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      ExtractCost += TTI->getExtractWithExtendCost(
1883de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          Instruction::SExt, EU.Scalar->getType(), VecTy, EU.Lane);
1884de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    } else {
1885de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      ExtractCost +=
1886de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, EU.Lane);
1887de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    }
1888a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  }
1889a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
1890de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  int SpillCost = getSpillCost();
1891de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  Cost += SpillCost + ExtractCost;
189237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
1893de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  DEBUG(dbgs() << "SLP: Spill Cost = " << SpillCost << ".\n"
1894de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar               << "SLP: Extract Cost = " << ExtractCost << ".\n"
1895de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar               << "SLP: Total Cost = " << Cost << ".\n");
1896de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  return Cost;
1897369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
189853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1899369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemint BoUpSLP::getGatherCost(Type *Ty) {
1900369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  int Cost = 0;
1901369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
1902369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
1903369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  return Cost;
1904369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
190553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1906369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemint BoUpSLP::getGatherCost(ArrayRef<Value *> VL) {
1907369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // Find the type of the operands in VL.
1908369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  Type *ScalarTy = VL[0]->getType();
1909369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
1910369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    ScalarTy = SI->getValueOperand()->getType();
1911369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
1912369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // Find the cost of inserting/extracting values from the vector.
1913369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  return getGatherCost(VecTy);
191453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem}
191553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
1916ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// Reorder commutative operations in alternate shuffle if the resulting vectors
1917ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// are consecutive loads. This would allow us to vectorize the tree.
1918ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// If we have something like-
1919ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// load a[0] - load b[0]
1920ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// load b[1] + load a[1]
1921ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// load a[2] - load b[2]
1922ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// load a[3] + load b[3]
1923ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// Reordering the second load b[1]  load a[1] would allow us to vectorize this
1924ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// code.
1925ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesvoid BoUpSLP::reorderAltShuffleOperands(ArrayRef<Value *> VL,
1926ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                                        SmallVectorImpl<Value *> &Left,
1927ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                                        SmallVectorImpl<Value *> &Right) {
1928ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // Push left and right operands of binary operation into Left and Right
1929de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  for (Value *i : VL) {
1930de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    Left.push_back(cast<Instruction>(i)->getOperand(0));
1931de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    Right.push_back(cast<Instruction>(i)->getOperand(1));
1932ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  }
1933ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
1934ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // Reorder if we have a commutative operation and consecutive access
1935ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // are on either side of the alternate instructions.
1936ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  for (unsigned j = 0; j < VL.size() - 1; ++j) {
1937ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
1938ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
1939ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        Instruction *VL1 = cast<Instruction>(VL[j]);
1940ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        Instruction *VL2 = cast<Instruction>(VL[j + 1]);
1941de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        if (VL1->isCommutative() && isConsecutiveAccess(L, L1, *DL, *SE)) {
1942ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          std::swap(Left[j], Right[j]);
1943ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          continue;
1944de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        } else if (VL2->isCommutative() &&
1945de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                   isConsecutiveAccess(L, L1, *DL, *SE)) {
1946ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          std::swap(Left[j + 1], Right[j + 1]);
1947ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          continue;
1948ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        }
1949ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        // else unchanged
1950ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      }
1951ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    }
1952ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
1953ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
1954ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        Instruction *VL1 = cast<Instruction>(VL[j]);
1955ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        Instruction *VL2 = cast<Instruction>(VL[j + 1]);
1956de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        if (VL1->isCommutative() && isConsecutiveAccess(L, L1, *DL, *SE)) {
1957ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          std::swap(Left[j], Right[j]);
1958ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          continue;
1959de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        } else if (VL2->isCommutative() &&
1960de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                   isConsecutiveAccess(L, L1, *DL, *SE)) {
1961ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          std::swap(Left[j + 1], Right[j + 1]);
1962ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          continue;
1963ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        }
1964ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        // else unchanged
1965ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      }
1966ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    }
1967ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  }
1968ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines}
1969ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
1970f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar// Return true if I should be commuted before adding it's left and right
1971f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar// operands to the arrays Left and Right.
1972f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar//
1973f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar// The vectorizer is trying to either have all elements one side being
1974f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar// instruction with the same opcode to enable further vectorization, or having
1975f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar// a splat to lower the vectorizing cost.
1976f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarstatic bool shouldReorderOperands(int i, Instruction &I,
1977f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar                                  SmallVectorImpl<Value *> &Left,
1978f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar                                  SmallVectorImpl<Value *> &Right,
1979f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar                                  bool AllSameOpcodeLeft,
1980f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar                                  bool AllSameOpcodeRight, bool SplatLeft,
1981f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar                                  bool SplatRight) {
1982f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  Value *VLeft = I.getOperand(0);
1983f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  Value *VRight = I.getOperand(1);
1984f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  // If we have "SplatRight", try to see if commuting is needed to preserve it.
1985f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  if (SplatRight) {
1986f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    if (VRight == Right[i - 1])
1987f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      // Preserve SplatRight
1988f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      return false;
1989f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    if (VLeft == Right[i - 1]) {
1990f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      // Commuting would preserve SplatRight, but we don't want to break
1991f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      // SplatLeft either, i.e. preserve the original order if possible.
1992f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      // (FIXME: why do we care?)
1993f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      if (SplatLeft && VLeft == Left[i - 1])
1994f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        return false;
1995f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      return true;
1996f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    }
1997f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  }
1998f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  // Symmetrically handle Right side.
1999f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  if (SplatLeft) {
2000f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    if (VLeft == Left[i - 1])
2001f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      // Preserve SplatLeft
2002f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      return false;
2003f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    if (VRight == Left[i - 1])
2004f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      return true;
2005f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  }
2006f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
2007f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  Instruction *ILeft = dyn_cast<Instruction>(VLeft);
2008f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  Instruction *IRight = dyn_cast<Instruction>(VRight);
2009f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
2010f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  // If we have "AllSameOpcodeRight", try to see if the left operands preserves
2011f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  // it and not the right, in this case we want to commute.
2012f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  if (AllSameOpcodeRight) {
2013f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    unsigned RightPrevOpcode = cast<Instruction>(Right[i - 1])->getOpcode();
2014f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    if (IRight && RightPrevOpcode == IRight->getOpcode())
2015f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      // Do not commute, a match on the right preserves AllSameOpcodeRight
2016f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      return false;
2017f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    if (ILeft && RightPrevOpcode == ILeft->getOpcode()) {
2018f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      // We have a match and may want to commute, but first check if there is
2019f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      // not also a match on the existing operands on the Left to preserve
2020f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      // AllSameOpcodeLeft, i.e. preserve the original order if possible.
2021f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      // (FIXME: why do we care?)
2022f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      if (AllSameOpcodeLeft && ILeft &&
2023f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar          cast<Instruction>(Left[i - 1])->getOpcode() == ILeft->getOpcode())
2024f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        return false;
2025f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      return true;
2026f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    }
2027f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  }
2028f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  // Symmetrically handle Left side.
2029f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  if (AllSameOpcodeLeft) {
2030f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    unsigned LeftPrevOpcode = cast<Instruction>(Left[i - 1])->getOpcode();
2031f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    if (ILeft && LeftPrevOpcode == ILeft->getOpcode())
2032f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      return false;
2033f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    if (IRight && LeftPrevOpcode == IRight->getOpcode())
2034f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      return true;
2035f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  }
2036f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  return false;
2037f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar}
2038f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
2039ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesvoid BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
2040ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                                             SmallVectorImpl<Value *> &Left,
2041ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                                             SmallVectorImpl<Value *> &Right) {
2042ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
2043f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  if (VL.size()) {
2044f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    // Peel the first iteration out of the loop since there's nothing
2045f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    // interesting to do anyway and it simplifies the checks in the loop.
2046f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    auto VLeft = cast<Instruction>(VL[0])->getOperand(0);
2047f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    auto VRight = cast<Instruction>(VL[0])->getOperand(1);
2048f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    if (!isa<Instruction>(VRight) && isa<Instruction>(VLeft))
2049f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      // Favor having instruction to the right. FIXME: why?
2050f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      std::swap(VLeft, VRight);
2051ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    Left.push_back(VLeft);
2052ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    Right.push_back(VRight);
2053ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  }
2054ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
2055f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  // Keep track if we have instructions with all the same opcode on one side.
2056f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  bool AllSameOpcodeLeft = isa<Instruction>(Left[0]);
2057f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  bool AllSameOpcodeRight = isa<Instruction>(Right[0]);
2058f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  // Keep track if we have one side with all the same value (broadcast).
2059f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  bool SplatLeft = true;
2060f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  bool SplatRight = true;
2061ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
2062f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  for (unsigned i = 1, e = VL.size(); i != e; ++i) {
2063f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    Instruction *I = cast<Instruction>(VL[i]);
2064f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    assert(I->isCommutative() && "Can only process commutative instruction");
2065f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    // Commute to favor either a splat or maximizing having the same opcodes on
2066f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    // one side.
2067f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    if (shouldReorderOperands(i, *I, Left, Right, AllSameOpcodeLeft,
2068f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar                              AllSameOpcodeRight, SplatLeft, SplatRight)) {
2069f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      Left.push_back(I->getOperand(1));
2070f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      Right.push_back(I->getOperand(0));
2071f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    } else {
2072f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      Left.push_back(I->getOperand(0));
2073f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      Right.push_back(I->getOperand(1));
2074f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    }
2075f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    // Update Splat* and AllSameOpcode* after the insertion.
2076f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    SplatRight = SplatRight && (Right[i - 1] == Right[i]);
2077f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    SplatLeft = SplatLeft && (Left[i - 1] == Left[i]);
2078f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    AllSameOpcodeLeft = AllSameOpcodeLeft && isa<Instruction>(Left[i]) &&
2079f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar                        (cast<Instruction>(Left[i - 1])->getOpcode() ==
2080f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar                         cast<Instruction>(Left[i])->getOpcode());
2081f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    AllSameOpcodeRight = AllSameOpcodeRight && isa<Instruction>(Right[i]) &&
2082f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar                         (cast<Instruction>(Right[i - 1])->getOpcode() ==
2083f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar                          cast<Instruction>(Right[i])->getOpcode());
2084ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  }
2085ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
2086f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  // If one operand end up being broadcast, return this operand order.
2087f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  if (SplatRight || SplatLeft)
2088f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    return;
2089f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
2090ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // Finally check if we can get longer vectorizable chain by reordering
2091ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // without breaking the good operand order detected above.
2092ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // E.g. If we have something like-
2093ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // load a[0]  load b[0]
2094ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // load b[1]  load a[1]
2095ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // load a[2]  load b[2]
2096ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // load a[3]  load b[3]
2097ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // Reordering the second load b[1]  load a[1] would allow us to vectorize
2098ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // this code and we still retain AllSameOpcode property.
2099ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // FIXME: This load reordering might break AllSameOpcode in some rare cases
2100ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // such as-
2101ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // add a[0],c[0]  load b[0]
2102ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // add a[1],c[2]  load b[1]
2103ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // b[2]           load b[2]
2104ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // add a[3],c[3]  load b[3]
2105ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  for (unsigned j = 0; j < VL.size() - 1; ++j) {
2106ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
2107ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
2108de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        if (isConsecutiveAccess(L, L1, *DL, *SE)) {
2109ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          std::swap(Left[j + 1], Right[j + 1]);
2110ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          continue;
2111ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        }
2112ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      }
2113ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    }
2114ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
2115ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
2116de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        if (isConsecutiveAccess(L, L1, *DL, *SE)) {
2117ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          std::swap(Left[j + 1], Right[j + 1]);
2118ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          continue;
2119ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines        }
2120ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      }
2121ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    }
2122ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    // else unchanged
2123ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  }
2124ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines}
2125ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
21264b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenaultvoid BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) {
21274b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault  Instruction *VL0 = cast<Instruction>(VL[0]);
2128f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  BasicBlock::iterator NextInst(VL0);
21294b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault  ++NextInst;
21304b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault  Builder.SetInsertPoint(VL0->getParent(), NextInst);
21314b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault  Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
21324b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault}
21334b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault
2134369cc938d261de3295eb70d0738f54ef1a82806cNadav RotemValue *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
213553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  Value *Vec = UndefValue::get(Ty);
213653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  // Generate the 'InsertElement' instruction.
213753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  for (unsigned i = 0; i < Ty->getNumElements(); ++i) {
213853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
2139a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    if (Instruction *Insrt = dyn_cast<Instruction>(Vec)) {
2140a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem      GatherSeq.insert(Insrt);
2141a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling      CSEBlocks.insert(Insrt->getParent());
2142a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
2143a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem      // Add to our 'need-to-extract' list.
2144a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem      if (ScalarToTreeEntry.count(VL[i])) {
2145a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem        int Idx = ScalarToTreeEntry[VL[i]];
2146a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem        TreeEntry *E = &VectorizableTree[Idx];
2147a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem        // Find which lane we need to extract.
2148a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem        int FoundLane = -1;
2149a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem        for (unsigned Lane = 0, LE = VL.size(); Lane != LE; ++Lane) {
2150a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem          // Is this the lane of the scalar that we are looking for ?
2151a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem          if (E->Scalars[Lane] == VL[i]) {
2152a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem            FoundLane = Lane;
2153a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem            break;
2154a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem          }
2155a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem        }
2156a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem        assert(FoundLane >= 0 && "Could not find the correct lane");
2157a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem        ExternalUses.push_back(ExternalUser(VL[i], Insrt, FoundLane));
2158a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem      }
2159a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    }
216053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  }
216153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
216253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  return Vec;
216353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem}
216453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
21656a804acc4ae77c014e4ef97c37f8e720ef360394Matt ArsenaultValue *BoUpSLP::alreadyVectorized(ArrayRef<Value *> VL) const {
21666a804acc4ae77c014e4ef97c37f8e720ef360394Matt Arsenault  SmallDenseMap<Value*, int>::const_iterator Entry
21676a804acc4ae77c014e4ef97c37f8e720ef360394Matt Arsenault    = ScalarToTreeEntry.find(VL[0]);
21686a804acc4ae77c014e4ef97c37f8e720ef360394Matt Arsenault  if (Entry != ScalarToTreeEntry.end()) {
21696a804acc4ae77c014e4ef97c37f8e720ef360394Matt Arsenault    int Idx = Entry->second;
21706a804acc4ae77c014e4ef97c37f8e720ef360394Matt Arsenault    const TreeEntry *En = &VectorizableTree[Idx];
217162657090de3a5731bf644437701ccd78c247119fNadav Rotem    if (En->isSame(VL) && En->VectorizedValue)
217262657090de3a5731bf644437701ccd78c247119fNadav Rotem      return En->VectorizedValue;
217362657090de3a5731bf644437701ccd78c247119fNadav Rotem  }
2174dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  return nullptr;
217562657090de3a5731bf644437701ccd78c247119fNadav Rotem}
217662657090de3a5731bf644437701ccd78c247119fNadav Rotem
2177369cc938d261de3295eb70d0738f54ef1a82806cNadav RotemValue *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
2178369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (ScalarToTreeEntry.count(VL[0])) {
2179369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    int Idx = ScalarToTreeEntry[VL[0]];
2180369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    TreeEntry *E = &VectorizableTree[Idx];
2181369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    if (E->isSame(VL))
2182369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return vectorizeTree(E);
2183369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  }
218453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
218553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  Type *ScalarTy = VL[0]->getType();
218653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
218753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    ScalarTy = SI->getValueOperand()->getType();
218853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
218953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
2190369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  return Gather(VL, VecTy);
2191369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
2192369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
2193369cc938d261de3295eb70d0738f54ef1a82806cNadav RotemValue *BoUpSLP::vectorizeTree(TreeEntry *E) {
2194adb412daa41aef94a9f724dfd1ade9f579bb3a84Benjamin Kramer  IRBuilder<>::InsertPointGuard Guard(Builder);
219553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
2196369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (E->VectorizedValue) {
2197369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    DEBUG(dbgs() << "SLP: Diamond merged for " << *E->Scalars[0] << ".\n");
2198369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    return E->VectorizedValue;
219953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  }
220053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
22011b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault  Instruction *VL0 = cast<Instruction>(E->Scalars[0]);
22021b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault  Type *ScalarTy = VL0->getType();
22031b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault  if (StoreInst *SI = dyn_cast<StoreInst>(VL0))
2204369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    ScalarTy = SI->getValueOperand()->getType();
2205369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  VectorType *VecTy = VectorType::get(ScalarTy, E->Scalars.size());
220653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
2207369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (E->NeedToGather) {
22084b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault    setInsertPointAfterBundle(E->Scalars);
2209369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    return Gather(E->Scalars, VecTy);
2210369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  }
221137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
2212c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  unsigned Opcode = getSameOpcode(E->Scalars);
2213805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem
2214369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  switch (Opcode) {
2215369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::PHI: {
2216369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      PHINode *PH = dyn_cast<PHINode>(VL0);
2217d237e834a816399b7e1561dd4db2c501f5095712Justin Bogner      Builder.SetInsertPoint(PH->getParent()->getFirstNonPHI());
221879c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem      Builder.SetCurrentDebugLocation(PH->getDebugLoc());
2219369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues());
2220369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      E->VectorizedValue = NewPhi;
2221369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
2222353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem      // PHINodes may have multiple entries from the same block. We want to
2223353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem      // visit every block once.
2224353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem      SmallSet<BasicBlock*, 4> VisitedBBs;
2225353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem
2226369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
2227369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        ValueList Operands;
2228369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        BasicBlock *IBB = PH->getIncomingBlock(i);
2229369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
223037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        if (!VisitedBBs.insert(IBB).second) {
2231353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem          NewPhi->addIncoming(NewPhi->getIncomingValueForBlock(IBB), IBB);
2232353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem          continue;
2233353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem        }
2234353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem
2235369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        // Prepare the operand vector.
2236f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        for (Value *V : E->Scalars)
2237f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar          Operands.push_back(cast<PHINode>(V)->getIncomingValueForBlock(IBB));
2238369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
2239369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        Builder.SetInsertPoint(IBB->getTerminator());
224079c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem        Builder.SetCurrentDebugLocation(PH->getDebugLoc());
2241369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        Value *Vec = vectorizeTree(Operands);
2242369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        NewPhi->addIncoming(Vec, IBB);
2243369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
2244805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem
2245369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      assert(NewPhi->getNumIncomingValues() == PH->getNumIncomingValues() &&
2246369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem             "Invalid number of incoming values");
2247369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return NewPhi;
2248805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem    }
2249805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem
2250369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::ExtractElement: {
2251de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      if (canReuseExtract(E->Scalars, Instruction::ExtractElement)) {
2252369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        Value *V = VL0->getOperand(0);
2253369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        E->VectorizedValue = V;
2254369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        return V;
2255369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
2256369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return Gather(E->Scalars, VecTy);
225753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    }
2258de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    case Instruction::ExtractValue: {
2259de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      if (canReuseExtract(E->Scalars, Instruction::ExtractValue)) {
2260de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        LoadInst *LI = cast<LoadInst>(VL0->getOperand(0));
2261de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        Builder.SetInsertPoint(LI);
2262de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        PointerType *PtrTy = PointerType::get(VecTy, LI->getPointerAddressSpace());
2263de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        Value *Ptr = Builder.CreateBitCast(LI->getOperand(0), PtrTy);
2264de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        LoadInst *V = Builder.CreateAlignedLoad(Ptr, LI->getAlignment());
2265de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        E->VectorizedValue = V;
2266de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        return propagateMetadata(V, E->Scalars);
2267de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      }
2268de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      return Gather(E->Scalars, VecTy);
2269de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    }
2270369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::ZExt:
2271369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::SExt:
2272369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FPToUI:
2273369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FPToSI:
2274369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FPExt:
2275369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::PtrToInt:
2276369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::IntToPtr:
2277369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::SIToFP:
2278369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::UIToFP:
2279369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Trunc:
2280369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FPTrunc:
2281369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::BitCast: {
2282369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      ValueList INVL;
2283f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      for (Value *V : E->Scalars)
2284f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        INVL.push_back(cast<Instruction>(V)->getOperand(0));
2285369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
22864b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault      setInsertPointAfterBundle(E->Scalars);
228779c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem
2288369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Value *InVec = vectorizeTree(INVL);
228962657090de3a5731bf644437701ccd78c247119fNadav Rotem
229062657090de3a5731bf644437701ccd78c247119fNadav Rotem      if (Value *V = alreadyVectorized(E->Scalars))
229162657090de3a5731bf644437701ccd78c247119fNadav Rotem        return V;
229262657090de3a5731bf644437701ccd78c247119fNadav Rotem
2293369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      CastInst *CI = dyn_cast<CastInst>(VL0);
2294369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
2295369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      E->VectorizedValue = V;
229637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ++NumVectorInstructions;
2297369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return V;
229853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    }
2299369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FCmp:
2300369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::ICmp: {
2301369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      ValueList LHSV, RHSV;
2302f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      for (Value *V : E->Scalars) {
2303f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        LHSV.push_back(cast<Instruction>(V)->getOperand(0));
2304f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        RHSV.push_back(cast<Instruction>(V)->getOperand(1));
2305369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
230653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
23074b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault      setInsertPointAfterBundle(E->Scalars);
230879c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem
2309369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Value *L = vectorizeTree(LHSV);
2310369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Value *R = vectorizeTree(RHSV);
231162657090de3a5731bf644437701ccd78c247119fNadav Rotem
231262657090de3a5731bf644437701ccd78c247119fNadav Rotem      if (Value *V = alreadyVectorized(E->Scalars))
231362657090de3a5731bf644437701ccd78c247119fNadav Rotem        return V;
231453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
23150c7f116bb6950ef819323d855415b2f2b0aad987Pirama Arumuga Nainar      CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
231662657090de3a5731bf644437701ccd78c247119fNadav Rotem      Value *V;
2317369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      if (Opcode == Instruction::FCmp)
2318369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        V = Builder.CreateFCmp(P0, L, R);
2319369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      else
2320369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        V = Builder.CreateICmp(P0, L, R);
232153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
2322369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      E->VectorizedValue = V;
232337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ++NumVectorInstructions;
2324369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return V;
232553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    }
2326369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Select: {
2327369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      ValueList TrueVec, FalseVec, CondVec;
2328f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      for (Value *V : E->Scalars) {
2329f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        CondVec.push_back(cast<Instruction>(V)->getOperand(0));
2330f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        TrueVec.push_back(cast<Instruction>(V)->getOperand(1));
2331f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        FalseVec.push_back(cast<Instruction>(V)->getOperand(2));
2332369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
233353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
23344b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault      setInsertPointAfterBundle(E->Scalars);
233579c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem
2336369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Value *Cond = vectorizeTree(CondVec);
2337369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Value *True = vectorizeTree(TrueVec);
2338369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Value *False = vectorizeTree(FalseVec);
233962657090de3a5731bf644437701ccd78c247119fNadav Rotem
234062657090de3a5731bf644437701ccd78c247119fNadav Rotem      if (Value *V = alreadyVectorized(E->Scalars))
234162657090de3a5731bf644437701ccd78c247119fNadav Rotem        return V;
234257aa3aad33b50583d5a82735777d0f0dc03ff122Matt Arsenault
2343369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Value *V = Builder.CreateSelect(Cond, True, False);
2344369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      E->VectorizedValue = V;
234537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ++NumVectorInstructions;
2346369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return V;
234753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    }
2348369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Add:
2349369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FAdd:
2350369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Sub:
2351369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FSub:
2352369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Mul:
2353369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FMul:
2354369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::UDiv:
2355369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::SDiv:
2356369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FDiv:
2357369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::URem:
2358369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::SRem:
2359369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::FRem:
2360369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Shl:
2361369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::LShr:
2362369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::AShr:
2363369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::And:
2364369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Or:
2365369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Xor: {
2366369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      ValueList LHSVL, RHSVL;
2367af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer      if (isa<BinaryOperator>(VL0) && VL0->isCommutative())
2368af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer        reorderInputsAccordingToOpcode(E->Scalars, LHSVL, RHSVL);
2369af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer      else
2370f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        for (Value *V : E->Scalars) {
2371f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar          LHSVL.push_back(cast<Instruction>(V)->getOperand(0));
2372f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar          RHSVL.push_back(cast<Instruction>(V)->getOperand(1));
2373af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer        }
237453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
23754b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault      setInsertPointAfterBundle(E->Scalars);
237679c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem
2377369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Value *LHS = vectorizeTree(LHSVL);
2378369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Value *RHS = vectorizeTree(RHSVL);
237953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
2380369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      if (LHS == RHS && isa<Instruction>(LHS)) {
2381369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        assert((VL0->getOperand(0) == VL0->getOperand(1)) && "Invalid order");
2382369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
238353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
238462657090de3a5731bf644437701ccd78c247119fNadav Rotem      if (Value *V = alreadyVectorized(E->Scalars))
238562657090de3a5731bf644437701ccd78c247119fNadav Rotem        return V;
238662657090de3a5731bf644437701ccd78c247119fNadav Rotem
2387369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
2388369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS, RHS);
2389369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      E->VectorizedValue = V;
239037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      propagateIRFlags(E->VectorizedValue, E->Scalars);
239137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ++NumVectorInstructions;
2392fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling
2393fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling      if (Instruction *I = dyn_cast<Instruction>(V))
2394fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling        return propagateMetadata(I, E->Scalars);
2395fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling
2396369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      return V;
2397369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
2398369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Load: {
2399369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      // Loads are inserted at the head of the tree because we don't want to
2400369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      // sink them all the way down past store instructions.
24014b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault      setInsertPointAfterBundle(E->Scalars);
240279c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem
2403369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      LoadInst *LI = cast<LoadInst>(VL0);
240437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      Type *ScalarLoadTy = LI->getType();
24059e93ba225f9f4d0cfab175b73d2a744d01104451Matt Arsenault      unsigned AS = LI->getPointerAddressSpace();
24069e93ba225f9f4d0cfab175b73d2a744d01104451Matt Arsenault
24079e93ba225f9f4d0cfab175b73d2a744d01104451Matt Arsenault      Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
24089e93ba225f9f4d0cfab175b73d2a744d01104451Matt Arsenault                                            VecTy->getPointerTo(AS));
240937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
241037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // The pointer operand uses an in-tree scalar so we add the new BitCast to
241137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // ExternalUses list to make sure that an extract will be generated in the
241237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // future.
241337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (ScalarToTreeEntry.count(LI->getPointerOperand()))
241437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        ExternalUses.push_back(
241537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            ExternalUser(LI->getPointerOperand(), cast<User>(VecPtr), 0));
241637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
2417369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      unsigned Alignment = LI->getAlignment();
2418369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      LI = Builder.CreateLoad(VecPtr);
24194c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar      if (!Alignment) {
2420de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        Alignment = DL->getABITypeAlignment(ScalarLoadTy);
24214c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar      }
2422369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      LI->setAlignment(Alignment);
2423369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      E->VectorizedValue = LI;
242437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ++NumVectorInstructions;
2425fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling      return propagateMetadata(LI, E->Scalars);
2426369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
2427369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    case Instruction::Store: {
2428369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      StoreInst *SI = cast<StoreInst>(VL0);
2429369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      unsigned Alignment = SI->getAlignment();
24309e93ba225f9f4d0cfab175b73d2a744d01104451Matt Arsenault      unsigned AS = SI->getPointerAddressSpace();
2431369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
2432369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      ValueList ValueOp;
2433f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      for (Value *V : E->Scalars)
2434f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        ValueOp.push_back(cast<StoreInst>(V)->getValueOperand());
2435369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
24364b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault      setInsertPointAfterBundle(E->Scalars);
243779c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem
2438369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Value *VecValue = vectorizeTree(ValueOp);
24399e93ba225f9f4d0cfab175b73d2a744d01104451Matt Arsenault      Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
24409e93ba225f9f4d0cfab175b73d2a744d01104451Matt Arsenault                                            VecTy->getPointerTo(AS));
2441369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      StoreInst *S = Builder.CreateStore(VecValue, VecPtr);
244237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
244337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // The pointer operand uses an in-tree scalar so we add the new BitCast to
244437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // ExternalUses list to make sure that an extract will be generated in the
244537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // future.
244637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (ScalarToTreeEntry.count(SI->getPointerOperand()))
244737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        ExternalUses.push_back(
244837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            ExternalUser(SI->getPointerOperand(), cast<User>(VecPtr), 0));
244937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
24504c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar      if (!Alignment) {
2451de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        Alignment = DL->getABITypeAlignment(SI->getValueOperand()->getType());
24524c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar      }
2453369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      S->setAlignment(Alignment);
2454369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      E->VectorizedValue = S;
245537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ++NumVectorInstructions;
2456fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling      return propagateMetadata(S, E->Scalars);
2457369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
2458c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    case Instruction::GetElementPtr: {
2459c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      setInsertPointAfterBundle(E->Scalars);
2460c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
2461c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      ValueList Op0VL;
2462f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      for (Value *V : E->Scalars)
2463f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        Op0VL.push_back(cast<GetElementPtrInst>(V)->getOperand(0));
2464c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
2465c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Value *Op0 = vectorizeTree(Op0VL);
2466c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
2467c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      std::vector<Value *> OpVecs;
2468c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      for (int j = 1, e = cast<GetElementPtrInst>(VL0)->getNumOperands(); j < e;
2469c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines           ++j) {
2470c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        ValueList OpVL;
2471f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        for (Value *V : E->Scalars)
2472f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar          OpVL.push_back(cast<GetElementPtrInst>(V)->getOperand(j));
2473c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
2474c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        Value *OpVec = vectorizeTree(OpVL);
2475c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        OpVecs.push_back(OpVec);
2476c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      }
2477c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
24784c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar      Value *V = Builder.CreateGEP(
24794c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar          cast<GetElementPtrInst>(VL0)->getSourceElementType(), Op0, OpVecs);
2480c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      E->VectorizedValue = V;
248137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ++NumVectorInstructions;
2482c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
2483c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      if (Instruction *I = dyn_cast<Instruction>(V))
2484c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        return propagateMetadata(I, E->Scalars);
2485c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
2486c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      return V;
2487c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    }
248836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    case Instruction::Call: {
248936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      CallInst *CI = cast<CallInst>(VL0);
249036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      setInsertPointAfterBundle(E->Scalars);
2491c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Function *FI;
2492c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Intrinsic::ID IID  = Intrinsic::not_intrinsic;
249337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      Value *ScalarArg = nullptr;
2494c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      if (CI && (FI = CI->getCalledFunction())) {
24956948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar        IID = FI->getIntrinsicID();
2496c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      }
249736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      std::vector<Value *> OpVecs;
249836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) {
249936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        ValueList OpVL;
2500c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        // ctlz,cttz and powi are special intrinsics whose second argument is
2501c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        // a scalar. This argument should not be vectorized.
2502c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        if (hasVectorInstrinsicScalarOpd(IID, 1) && j == 1) {
2503c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          CallInst *CEI = cast<CallInst>(E->Scalars[0]);
250437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          ScalarArg = CEI->getArgOperand(j);
2505c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          OpVecs.push_back(CEI->getArgOperand(j));
2506c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          continue;
2507c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        }
2508f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        for (Value *V : E->Scalars) {
2509f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar          CallInst *CEI = cast<CallInst>(V);
251036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          OpVL.push_back(CEI->getArgOperand(j));
251136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        }
251236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
251336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        Value *OpVec = vectorizeTree(OpVL);
251436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        DEBUG(dbgs() << "SLP: OpVec[" << j << "]: " << *OpVec << "\n");
251536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        OpVecs.push_back(OpVec);
251636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      }
251736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
251836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      Module *M = F->getParent();
2519de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
252036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      Type *Tys[] = { VectorType::get(CI->getType(), E->Scalars.size()) };
252136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      Function *CF = Intrinsic::getDeclaration(M, ID, Tys);
2522de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      SmallVector<OperandBundleDef, 1> OpBundles;
2523de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      CI->getOperandBundlesAsDefs(OpBundles);
2524de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      Value *V = Builder.CreateCall(CF, OpVecs, OpBundles);
252537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
252637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // The scalar argument uses an in-tree scalar so we add the new vectorized
252737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // call to ExternalUses list to make sure that an extract will be
252837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // generated in the future.
252937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (ScalarArg && ScalarToTreeEntry.count(ScalarArg))
253037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        ExternalUses.push_back(ExternalUser(ScalarArg, cast<User>(V), 0));
253137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
253236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      E->VectorizedValue = V;
253337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ++NumVectorInstructions;
253436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      return V;
253536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    }
2536c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    case Instruction::ShuffleVector: {
2537c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      ValueList LHSVL, RHSVL;
2538ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      assert(isa<BinaryOperator>(VL0) && "Invalid Shuffle Vector Operand");
2539ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      reorderAltShuffleOperands(E->Scalars, LHSVL, RHSVL);
2540c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      setInsertPointAfterBundle(E->Scalars);
2541c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
2542c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Value *LHS = vectorizeTree(LHSVL);
2543c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Value *RHS = vectorizeTree(RHSVL);
2544c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
2545c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      if (Value *V = alreadyVectorized(E->Scalars))
2546c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        return V;
2547c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
2548c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      // Create a vector of LHS op1 RHS
2549c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      BinaryOperator *BinOp0 = cast<BinaryOperator>(VL0);
2550c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Value *V0 = Builder.CreateBinOp(BinOp0->getOpcode(), LHS, RHS);
2551c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
2552c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      // Create a vector of LHS op2 RHS
2553c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Instruction *VL1 = cast<Instruction>(E->Scalars[1]);
2554c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      BinaryOperator *BinOp1 = cast<BinaryOperator>(VL1);
2555c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Value *V1 = Builder.CreateBinOp(BinOp1->getOpcode(), LHS, RHS);
2556c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
255737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // Create shuffle to take alternate operations from the vector.
255837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // Also, gather up odd and even scalar ops to propagate IR flags to
255937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // each vector operation.
256037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ValueList OddScalars, EvenScalars;
2561c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      unsigned e = E->Scalars.size();
256237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      SmallVector<Constant *, 8> Mask(e);
2563c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      for (unsigned i = 0; i < e; ++i) {
256437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        if (i & 1) {
2565c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          Mask[i] = Builder.getInt32(e + i);
256637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          OddScalars.push_back(E->Scalars[i]);
256737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        } else {
2568c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          Mask[i] = Builder.getInt32(i);
256937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          EvenScalars.push_back(E->Scalars[i]);
257037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        }
2571c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      }
2572c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
2573c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Value *ShuffleMask = ConstantVector::get(Mask);
257437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      propagateIRFlags(V0, EvenScalars);
257537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      propagateIRFlags(V1, OddScalars);
2576c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
2577c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      Value *V = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2578c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      E->VectorizedValue = V;
257937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ++NumVectorInstructions;
2580c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      if (Instruction *I = dyn_cast<Instruction>(V))
2581c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        return propagateMetadata(I, E->Scalars);
2582c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
2583c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      return V;
2584c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    }
2585369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    default:
2586369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    llvm_unreachable("unknown inst");
258753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  }
2588dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  return nullptr;
2589369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
259053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
2591a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold SchwaighoferValue *BoUpSLP::vectorizeTree() {
2592de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
259337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // All blocks must be scheduled before any instructions are inserted.
259437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  for (auto &BSIter : BlocksSchedules) {
259537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    scheduleBlock(BSIter.second.get());
259637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
259737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
2598f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  Builder.SetInsertPoint(&F->getEntryBlock().front());
2599de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  auto *VectorRoot = vectorizeTree(&VectorizableTree[0]);
2600de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
2601de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // If the vectorized tree can be rewritten in a smaller type, we truncate the
2602de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // vectorized root. InstCombine will then rewrite the entire expression. We
2603de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // sign extend the extracted values below.
2604de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  auto *ScalarRoot = VectorizableTree[0].Scalars[0];
2605de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  if (MinBWs.count(ScalarRoot)) {
2606de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    if (auto *I = dyn_cast<Instruction>(VectorRoot))
2607de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      Builder.SetInsertPoint(&*++BasicBlock::iterator(I));
2608de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    auto BundleWidth = VectorizableTree[0].Scalars.size();
2609de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot]);
2610de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    auto *VecTy = VectorType::get(MinTy, BundleWidth);
2611de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    auto *Trunc = Builder.CreateTrunc(VectorRoot, VecTy);
2612de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    VectorizableTree[0].VectorizedValue = Trunc;
2613de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  }
261453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
2615a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size() << " values .\n");
2616a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
2617a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  // Extract all of the elements with the external uses.
2618de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  for (const auto &ExternalUse : ExternalUses) {
2619de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    Value *Scalar = ExternalUse.Scalar;
2620de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    llvm::User *User = ExternalUse.User;
2621523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem
2622523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem    // Skip users that we already RAUW. This happens when one instruction
2623523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem    // has multiple uses of the same value.
262436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (std::find(Scalar->user_begin(), Scalar->user_end(), User) ==
262536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        Scalar->user_end())
2626a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem      continue;
2627a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    assert(ScalarToTreeEntry.count(Scalar) && "Invalid scalar");
2628a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
2629a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    int Idx = ScalarToTreeEntry[Scalar];
2630a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    TreeEntry *E = &VectorizableTree[Idx];
2631a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    assert(!E->NeedToGather && "Extracting from a gather list");
2632a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
2633a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    Value *Vec = E->VectorizedValue;
2634a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    assert(Vec && "Can't find vectorizable value");
2635a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
2636de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    Value *Lane = Builder.getInt32(ExternalUse.Lane);
2637a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    // Generate extracts for out-of-tree users.
2638a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    // Find the insertion point for the extractelement lane.
2639de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    if (auto *VecI = dyn_cast<Instruction>(Vec)) {
2640523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem      if (PHINode *PH = dyn_cast<PHINode>(User)) {
2641523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem        for (int i = 0, e = PH->getNumIncomingValues(); i != e; ++i) {
2642523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem          if (PH->getIncomingValue(i) == Scalar) {
2643de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar            TerminatorInst *IncomingTerminator =
2644de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                PH->getIncomingBlock(i)->getTerminator();
2645de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar            if (isa<CatchSwitchInst>(IncomingTerminator)) {
2646de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar              Builder.SetInsertPoint(VecI->getParent(),
2647de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                                     std::next(VecI->getIterator()));
2648de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar            } else {
2649de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar              Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator());
2650de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar            }
2651f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem            Value *Ex = Builder.CreateExtractElement(Vec, Lane);
2652de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar            if (MinBWs.count(ScalarRoot))
2653de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar              Ex = Builder.CreateSExt(Ex, Scalar->getType());
2654a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling            CSEBlocks.insert(PH->getIncomingBlock(i));
2655f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem            PH->setOperand(i, Ex);
2656523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem          }
2657523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem        }
2658523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem      } else {
2659f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem        Builder.SetInsertPoint(cast<Instruction>(User));
2660f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem        Value *Ex = Builder.CreateExtractElement(Vec, Lane);
2661de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        if (MinBWs.count(ScalarRoot))
2662de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          Ex = Builder.CreateSExt(Ex, Scalar->getType());
2663a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling        CSEBlocks.insert(cast<Instruction>(User)->getParent());
2664f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem        User->replaceUsesOfWith(Scalar, Ex);
2665523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem     }
2666a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    } else {
2667f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      Builder.SetInsertPoint(&F->getEntryBlock().front());
2668f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem      Value *Ex = Builder.CreateExtractElement(Vec, Lane);
2669de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      if (MinBWs.count(ScalarRoot))
2670de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        Ex = Builder.CreateSExt(Ex, Scalar->getType());
2671a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling      CSEBlocks.insert(&F->getEntryBlock());
2672f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem      User->replaceUsesOfWith(Scalar, Ex);
2673a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    }
2674a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
2675a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem    DEBUG(dbgs() << "SLP: Replaced:" << *User << ".\n");
2676a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem  }
2677a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem
2678369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // For each vectorized value:
2679de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  for (TreeEntry &EIdx : VectorizableTree) {
2680de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    TreeEntry *Entry = &EIdx;
268153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
2682369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    // For each lane:
2683369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
2684369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Value *Scalar = Entry->Scalars[Lane];
2685369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      // No need to handle users of gathered values.
2686369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      if (Entry->NeedToGather)
2687369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        continue;
268853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
2689ace9ed50b549667eff8e19eb76f7714a3a6161aeNadav Rotem      assert(Entry->VectorizedValue && "Can't find vectorizable value");
2690ac2678684689448d1c88f644f3fa88aa4f93997cNadav Rotem
2691369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Type *Ty = Scalar->getType();
2692369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      if (!Ty->isVoidTy()) {
269336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#ifndef NDEBUG
269436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        for (User *U : Scalar->users()) {
269536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n");
2696a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
269736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          assert((ScalarToTreeEntry.count(U) ||
2698dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                  // It is legal to replace users in the ignorelist by undef.
2699dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                  (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), U) !=
2700dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                   UserIgnoreList.end())) &&
2701369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem                 "Replacing out-of-tree value with undef");
2702369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        }
270336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#endif
2704369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        Value *Undef = UndefValue::get(Ty);
2705369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        Scalar->replaceAllUsesWith(Undef);
2706369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
2707369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n");
2708ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      eraseInstruction(cast<Instruction>(Scalar));
2709ac2678684689448d1c88f644f3fa88aa4f93997cNadav Rotem    }
2710ac2678684689448d1c88f644f3fa88aa4f93997cNadav Rotem  }
2711ac2678684689448d1c88f644f3fa88aa4f93997cNadav Rotem
2712c7ffbc019fdc6ae5265f1841eaabae34e301f59bNadav Rotem  Builder.ClearInsertionPoint();
2713a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
2714a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  return VectorizableTree[0].VectorizedValue;
271553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem}
271653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
2717369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemvoid BoUpSLP::optimizeGatherSequence() {
2718369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size()
2719369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        << " gather sequences instructions.\n");
27206959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem  // LICM InsertElementInst sequences.
2721de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  for (Instruction *it : GatherSeq) {
2722de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    InsertElementInst *Insert = dyn_cast<InsertElementInst>(it);
272353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
272453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    if (!Insert)
272553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem      continue;
272653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
272753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    // Check if this block is inside a loop.
27286959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem    Loop *L = LI->getLoopFor(Insert->getParent());
272953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    if (!L)
27306959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem      continue;
273153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
273253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    // Check if it has a preheader.
273353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    BasicBlock *PreHeader = L->getLoopPreheader();
273453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    if (!PreHeader)
273529acf7e03af9b5524daa1e7523e0296cc766ff24Nadav Rotem      continue;
273653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
273753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    // If the vector or the element that we insert into it are
273853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    // instructions that are defined in this basic block then we can't
273953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    // hoist this instruction.
274053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    Instruction *CurrVec = dyn_cast<Instruction>(Insert->getOperand(0));
274153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    Instruction *NewElem = dyn_cast<Instruction>(Insert->getOperand(1));
274253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    if (CurrVec && L->contains(CurrVec))
274353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem      continue;
274453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    if (NewElem && L->contains(NewElem))
274553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem      continue;
274653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
274753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem    // We can hoist this instruction. Move it to the pre-header.
27486959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem    Insert->moveBefore(PreHeader->getTerminator());
27496959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem  }
27506959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem
2751dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // Make a list of all reachable blocks in our CSE queue.
2752dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  SmallVector<const DomTreeNode *, 8> CSEWorkList;
2753dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  CSEWorkList.reserve(CSEBlocks.size());
2754dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  for (BasicBlock *BB : CSEBlocks)
2755dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    if (DomTreeNode *N = DT->getNode(BB)) {
2756dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      assert(DT->isReachableFromEntry(N));
2757dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      CSEWorkList.push_back(N);
2758dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    }
2759dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
27600c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer  // Sort blocks by domination. This ensures we visit a block after all blocks
27610c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer  // dominating it are visited.
276236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  std::stable_sort(CSEWorkList.begin(), CSEWorkList.end(),
2763dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                   [this](const DomTreeNode *A, const DomTreeNode *B) {
276436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return DT->properlyDominates(A, B);
276536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  });
27660c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer
27676959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem  // Perform O(N^2) search over the gather sequences and merge identical
27686959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem  // instructions. TODO: We can further optimize this scan if we split the
27696959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem  // instructions into different buckets based on the insert lane.
27700c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer  SmallVector<Instruction *, 16> Visited;
2771dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  for (auto I = CSEWorkList.begin(), E = CSEWorkList.end(); I != E; ++I) {
277236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    assert((I == CSEWorkList.begin() || !DT->dominates(*I, *std::prev(I))) &&
27730c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer           "Worklist not sorted properly!");
2774dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    BasicBlock *BB = (*I)->getBlock();
27750c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer    // For all instructions in blocks containing gather sequences:
27760c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer    for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) {
2777f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      Instruction *In = &*it++;
2778a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling      if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In))
27796959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem        continue;
27806959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem
278129acf7e03af9b5524daa1e7523e0296cc766ff24Nadav Rotem      // Check if we can replace this instruction with any of the
278229acf7e03af9b5524daa1e7523e0296cc766ff24Nadav Rotem      // visited instructions.
2783de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      for (Instruction *v : Visited) {
2784de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        if (In->isIdenticalTo(v) &&
2785de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar            DT->dominates(v->getParent(), In->getParent())) {
2786de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          In->replaceAllUsesWith(v);
2787ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          eraseInstruction(In);
2788dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          In = nullptr;
27896959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem          break;
27906959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem        }
27916959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem      }
27920c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer      if (In) {
27930c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer        assert(std::find(Visited.begin(), Visited.end(), In) == Visited.end());
27940c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer        Visited.push_back(In);
27950c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer      }
27966959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem    }
279753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem  }
2798a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling  CSEBlocks.clear();
2799a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling  GatherSeq.clear();
280053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem}
280153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
280237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// Groups the instructions to a bundle (which is then a single scheduling entity)
280337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// and schedules instructions until the bundle gets ready.
280437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesbool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
2805ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                                                 BoUpSLP *SLP) {
280637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  if (isa<PHINode>(VL[0]))
280737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    return true;
280837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
280937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // Initialize the instruction bundle.
281037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  Instruction *OldScheduleEnd = ScheduleEnd;
281137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  ScheduleData *PrevInBundle = nullptr;
281237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  ScheduleData *Bundle = nullptr;
281337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  bool ReSchedule = false;
281437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  DEBUG(dbgs() << "SLP:  bundle: " << *VL[0] << "\n");
2815f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
2816f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  // Make sure that the scheduling region contains all
2817f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  // instructions of the bundle.
2818f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  for (Value *V : VL) {
2819f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    if (!extendSchedulingRegion(V))
2820f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      return false;
2821f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  }
2822f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
282337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  for (Value *V : VL) {
282437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *BundleMember = getScheduleData(V);
282537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    assert(BundleMember &&
282637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines           "no ScheduleData for bundle member (maybe not in same basic block)");
282737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (BundleMember->IsScheduled) {
282837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // A bundle member was scheduled as single instruction before and now
282937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // needs to be scheduled as part of the bundle. We just get rid of the
283037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // existing schedule.
283137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      DEBUG(dbgs() << "SLP:  reset schedule because " << *BundleMember
283237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                   << " was already scheduled\n");
283337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ReSchedule = true;
283437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
283537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    assert(BundleMember->isSchedulingEntity() &&
283637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines           "bundle member already part of other bundle");
283737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (PrevInBundle) {
283837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      PrevInBundle->NextInBundle = BundleMember;
283937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    } else {
284037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      Bundle = BundleMember;
284137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
284237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    BundleMember->UnscheduledDepsInBundle = 0;
284337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    Bundle->UnscheduledDepsInBundle += BundleMember->UnscheduledDeps;
284437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
284537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // Group the instructions to a bundle.
284637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    BundleMember->FirstInBundle = Bundle;
284737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    PrevInBundle = BundleMember;
284837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
284937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  if (ScheduleEnd != OldScheduleEnd) {
285037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // The scheduling region got new instructions at the lower end (or it is a
285137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // new region for the first bundle). This makes it necessary to
285237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // recalculate all dependencies.
285337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // It is seldom that this needs to be done a second time after adding the
285437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // initial bundle to the region.
285537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) {
285637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ScheduleData *SD = getScheduleData(I);
285737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      SD->clearDependencies();
285837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
285937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ReSchedule = true;
286037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
286137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  if (ReSchedule) {
286237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    resetSchedule();
286337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    initialFillReadyList(ReadyInsts);
286437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
286537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
286637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle << " in block "
286737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines               << BB->getName() << "\n");
286837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
2869ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  calculateDependencies(Bundle, true, SLP);
287037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
287137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // Now try to schedule the new bundle. As soon as the bundle is "ready" it
287237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // means that there are no cyclic dependencies and we can schedule it.
287337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // Note that's important that we don't "schedule" the bundle yet (see
287437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // cancelScheduling).
287537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  while (!Bundle->isReady() && !ReadyInsts.empty()) {
287637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
287737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *pickedSD = ReadyInsts.back();
287837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ReadyInsts.pop_back();
287937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
288037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (pickedSD->isSchedulingEntity() && pickedSD->isReady()) {
288137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      schedule(pickedSD, ReadyInsts);
288237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
288337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
2884f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  if (!Bundle->isReady()) {
2885f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    cancelScheduling(VL);
2886f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    return false;
2887f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  }
2888f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  return true;
288937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
289037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
289137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesvoid BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL) {
289237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  if (isa<PHINode>(VL[0]))
289337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    return;
289437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
289537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  ScheduleData *Bundle = getScheduleData(VL[0]);
289637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  DEBUG(dbgs() << "SLP:  cancel scheduling of " << *Bundle << "\n");
289737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  assert(!Bundle->IsScheduled &&
289837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines         "Can't cancel bundle which is already scheduled");
289937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  assert(Bundle->isSchedulingEntity() && Bundle->isPartOfBundle() &&
290037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines         "tried to unbundle something which is not a bundle");
290137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
290237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // Un-bundle: make single instructions out of the bundle.
290337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  ScheduleData *BundleMember = Bundle;
290437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  while (BundleMember) {
290537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    assert(BundleMember->FirstInBundle == Bundle && "corrupt bundle links");
290637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    BundleMember->FirstInBundle = BundleMember;
290737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *Next = BundleMember->NextInBundle;
290837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    BundleMember->NextInBundle = nullptr;
290937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    BundleMember->UnscheduledDepsInBundle = BundleMember->UnscheduledDeps;
291037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (BundleMember->UnscheduledDepsInBundle == 0) {
291137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ReadyInsts.insert(BundleMember);
291237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
291337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    BundleMember = Next;
291437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
291537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
291637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
2917f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarbool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) {
291837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  if (getScheduleData(V))
2919f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    return true;
292037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  Instruction *I = dyn_cast<Instruction>(V);
292137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  assert(I && "bundle member must be an instruction");
292237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  assert(!isa<PHINode>(I) && "phi nodes don't need to be scheduled");
292337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  if (!ScheduleStart) {
292437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // It's the first instruction in the new region.
292537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    initScheduleData(I, I->getNextNode(), nullptr, nullptr);
292637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleStart = I;
292737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleEnd = I->getNextNode();
292837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    assert(ScheduleEnd && "tried to vectorize a TerminatorInst?");
292937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    DEBUG(dbgs() << "SLP:  initialize schedule region to " << *I << "\n");
2930f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    return true;
293137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
293237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // Search up and down at the same time, because we don't know if the new
293337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // instruction is above or below the existing scheduling region.
2934f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  BasicBlock::reverse_iterator UpIter(ScheduleStart->getIterator());
293537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  BasicBlock::reverse_iterator UpperEnd = BB->rend();
293637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  BasicBlock::iterator DownIter(ScheduleEnd);
293737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  BasicBlock::iterator LowerEnd = BB->end();
293837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  for (;;) {
2939f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    if (++ScheduleRegionSize > ScheduleRegionSizeLimit) {
2940f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      DEBUG(dbgs() << "SLP:  exceeded schedule region size limit\n");
2941f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      return false;
2942f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    }
2943f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
294437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (UpIter != UpperEnd) {
294537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (&*UpIter == I) {
294637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        initScheduleData(I, ScheduleStart, nullptr, FirstLoadStoreInRegion);
294737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        ScheduleStart = I;
294837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        DEBUG(dbgs() << "SLP:  extend schedule region start to " << *I << "\n");
2949f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        return true;
295037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
295137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      UpIter++;
295237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
295337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (DownIter != LowerEnd) {
295437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (&*DownIter == I) {
295537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        initScheduleData(ScheduleEnd, I->getNextNode(), LastLoadStoreInRegion,
295637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                         nullptr);
295737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        ScheduleEnd = I->getNextNode();
295837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        assert(ScheduleEnd && "tried to vectorize a TerminatorInst?");
295937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        DEBUG(dbgs() << "SLP:  extend schedule region end to " << *I << "\n");
2960f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        return true;
296137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
296237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      DownIter++;
296337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
296437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    assert((UpIter != UpperEnd || DownIter != LowerEnd) &&
296537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines           "instruction not found in block");
296637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
2967f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  return true;
296837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
296937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
297037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesvoid BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI,
297137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                                                Instruction *ToI,
297237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                                                ScheduleData *PrevLoadStore,
297337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                                                ScheduleData *NextLoadStore) {
297437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  ScheduleData *CurrentLoadStore = PrevLoadStore;
297537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  for (Instruction *I = FromI; I != ToI; I = I->getNextNode()) {
297637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *SD = ScheduleDataMap[I];
297737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (!SD) {
297837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // Allocate a new ScheduleData for the instruction.
297937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (ChunkPos >= ChunkSize) {
298037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        ScheduleDataChunks.push_back(
298137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            llvm::make_unique<ScheduleData[]>(ChunkSize));
298237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        ChunkPos = 0;
298337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
298437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      SD = &(ScheduleDataChunks.back()[ChunkPos++]);
298537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ScheduleDataMap[I] = SD;
298637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      SD->Inst = I;
298737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
298837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    assert(!isInSchedulingRegion(SD) &&
298937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines           "new ScheduleData already in scheduling region");
299037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    SD->init(SchedulingRegionID);
299137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
299237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (I->mayReadOrWriteMemory()) {
299337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // Update the linked list of memory accessing instructions.
299437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (CurrentLoadStore) {
299537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        CurrentLoadStore->NextLoadStore = SD;
299637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      } else {
299737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        FirstLoadStoreInRegion = SD;
299837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
299937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      CurrentLoadStore = SD;
300037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
300137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
300237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  if (NextLoadStore) {
300337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (CurrentLoadStore)
300437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      CurrentLoadStore->NextLoadStore = NextLoadStore;
300537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  } else {
300637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    LastLoadStoreInRegion = CurrentLoadStore;
300737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
300837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
300937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
301037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesvoid BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
301137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                                                     bool InsertInReadyList,
3012ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                                                     BoUpSLP *SLP) {
301337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  assert(SD->isSchedulingEntity());
301437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
301537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  SmallVector<ScheduleData *, 10> WorkList;
301637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  WorkList.push_back(SD);
301737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
301837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  while (!WorkList.empty()) {
301937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *SD = WorkList.back();
302037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    WorkList.pop_back();
302137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
302237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *BundleMember = SD;
302337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    while (BundleMember) {
302437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      assert(isInSchedulingRegion(BundleMember));
302537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (!BundleMember->hasValidDependencies()) {
302637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
302737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        DEBUG(dbgs() << "SLP:       update deps of " << *BundleMember << "\n");
302837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        BundleMember->Dependencies = 0;
302937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        BundleMember->resetUnscheduledDeps();
303037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
303137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        // Handle def-use chain dependencies.
303237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        for (User *U : BundleMember->Inst->users()) {
303337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          if (isa<Instruction>(U)) {
303437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            ScheduleData *UseSD = getScheduleData(U);
303537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) {
303637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines              BundleMember->Dependencies++;
303737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines              ScheduleData *DestBundle = UseSD->FirstInBundle;
303837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines              if (!DestBundle->IsScheduled) {
303937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                BundleMember->incrementUnscheduledDeps(1);
304037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines              }
304137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines              if (!DestBundle->hasValidDependencies()) {
304237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                WorkList.push_back(DestBundle);
304337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines              }
304437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            }
304537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          } else {
304637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            // I'm not sure if this can ever happen. But we need to be safe.
3047f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar            // This lets the instruction/bundle never be scheduled and
3048f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar            // eventually disable vectorization.
304937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            BundleMember->Dependencies++;
305037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            BundleMember->incrementUnscheduledDeps(1);
305137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          }
305237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        }
305337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
305437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        // Handle the memory dependencies.
305537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        ScheduleData *DepDest = BundleMember->NextLoadStore;
305637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        if (DepDest) {
3057ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          Instruction *SrcInst = BundleMember->Inst;
3058f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar          MemoryLocation SrcLoc = getLocation(SrcInst, SLP->AA);
305937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          bool SrcMayWrite = BundleMember->Inst->mayWriteToMemory();
3060ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          unsigned numAliased = 0;
3061ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines          unsigned DistToSrc = 1;
306237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
306337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          while (DepDest) {
306437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            assert(isInSchedulingRegion(DepDest));
3065ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
3066ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            // We have two limits to reduce the complexity:
3067ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            // 1) AliasedCheckLimit: It's a small limit to reduce calls to
3068ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            //    SLP->isAliased (which is the expensive part in this loop).
3069ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            // 2) MaxMemDepDistance: It's for very large blocks and it aborts
3070ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            //    the whole loop (even if the loop is fast, it's quadratic).
3071ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            //    It's important for the loop break condition (see below) to
3072ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            //    check this limit even between two read-only instructions.
3073ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            if (DistToSrc >= MaxMemDepDistance ||
3074ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                    ((SrcMayWrite || DepDest->Inst->mayWriteToMemory()) &&
3075ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                     (numAliased >= AliasedCheckLimit ||
3076ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                      SLP->isAliased(SrcLoc, SrcInst, DepDest->Inst)))) {
3077ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
3078ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines              // We increment the counter only if the locations are aliased
3079ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines              // (instead of counting all alias checks). This gives a better
3080ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines              // balance between reduced runtime and accurate dependencies.
3081ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines              numAliased++;
3082ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
3083ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines              DepDest->MemoryDependencies.push_back(BundleMember);
3084ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines              BundleMember->Dependencies++;
3085ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines              ScheduleData *DestBundle = DepDest->FirstInBundle;
3086ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines              if (!DestBundle->IsScheduled) {
3087ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                BundleMember->incrementUnscheduledDeps(1);
3088ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines              }
3089ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines              if (!DestBundle->hasValidDependencies()) {
3090ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                WorkList.push_back(DestBundle);
309137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines              }
309237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            }
309337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            DepDest = DepDest->NextLoadStore;
3094ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
3095ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            // Example, explaining the loop break condition: Let's assume our
3096ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            // starting instruction is i0 and MaxMemDepDistance = 3.
3097ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            //
3098ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            //                      +--------v--v--v
3099ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            //             i0,i1,i2,i3,i4,i5,i6,i7,i8
3100ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            //             +--------^--^--^
3101ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            //
3102ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            // MaxMemDepDistance let us stop alias-checking at i3 and we add
3103ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            // dependencies from i0 to i3,i4,.. (even if they are not aliased).
3104ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            // Previously we already added dependencies from i3 to i6,i7,i8
3105ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            // (because of MaxMemDepDistance). As we added a dependency from
3106ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            // i0 to i3, we have transitive dependencies from i0 to i6,i7,i8
3107ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            // and we can abort this loop at i6.
3108ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            if (DistToSrc >= 2 * MaxMemDepDistance)
3109ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines                break;
3110ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            DistToSrc++;
311137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          }
311237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        }
311337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
311437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      BundleMember = BundleMember->NextInBundle;
311537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
311637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (InsertInReadyList && SD->isReady()) {
311737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      ReadyInsts.push_back(SD);
311837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      DEBUG(dbgs() << "SLP:     gets ready on update: " << *SD->Inst << "\n");
311937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
312037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
312137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
312237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
312337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesvoid BoUpSLP::BlockScheduling::resetSchedule() {
312437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  assert(ScheduleStart &&
312537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines         "tried to reset schedule on block which has not been scheduled");
312637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  for (Instruction *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) {
312737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *SD = getScheduleData(I);
312837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    assert(isInSchedulingRegion(SD));
312937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    SD->IsScheduled = false;
313037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    SD->resetUnscheduledDeps();
313137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
313237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  ReadyInsts.clear();
313337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
313437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
313537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesvoid BoUpSLP::scheduleBlock(BlockScheduling *BS) {
3136de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
313737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  if (!BS->ScheduleStart)
313837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    return;
3139de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
314037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  DEBUG(dbgs() << "SLP: schedule block " << BS->BB->getName() << "\n");
314137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
314237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  BS->resetSchedule();
314337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
314437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // For the real scheduling we use a more sophisticated ready-list: it is
314537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // sorted by the original instruction location. This lets the final schedule
314637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // be as  close as possible to the original instruction order.
314737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  struct ScheduleDataCompare {
314837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    bool operator()(ScheduleData *SD1, ScheduleData *SD2) {
314937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      return SD2->SchedulingPriority < SD1->SchedulingPriority;
315037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
315137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  };
315237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  std::set<ScheduleData *, ScheduleDataCompare> ReadyInsts;
315337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
3154f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  // Ensure that all dependency data is updated and fill the ready-list with
315537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // initial instructions.
315637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  int Idx = 0;
315737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  int NumToSchedule = 0;
315837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  for (auto *I = BS->ScheduleStart; I != BS->ScheduleEnd;
315937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines       I = I->getNextNode()) {
316037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *SD = BS->getScheduleData(I);
316137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    assert(
316237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        SD->isPartOfBundle() == (ScalarToTreeEntry.count(SD->Inst) != 0) &&
316337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        "scheduler and vectorizer have different opinion on what is a bundle");
316437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    SD->FirstInBundle->SchedulingPriority = Idx++;
316537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (SD->isSchedulingEntity()) {
3166ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      BS->calculateDependencies(SD, false, this);
316737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      NumToSchedule++;
316837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
316937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
317037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  BS->initialFillReadyList(ReadyInsts);
317137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
317237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  Instruction *LastScheduledInst = BS->ScheduleEnd;
317337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
317437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // Do the "real" scheduling.
317537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  while (!ReadyInsts.empty()) {
317637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *picked = *ReadyInsts.begin();
317737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ReadyInsts.erase(ReadyInsts.begin());
317837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
317937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // Move the scheduled instruction(s) to their dedicated places, if not
318037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // there yet.
318137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    ScheduleData *BundleMember = picked;
318237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    while (BundleMember) {
318337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      Instruction *pickedInst = BundleMember->Inst;
318437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (LastScheduledInst->getNextNode() != pickedInst) {
318537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        BS->BB->getInstList().remove(pickedInst);
3186f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        BS->BB->getInstList().insert(LastScheduledInst->getIterator(),
3187f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar                                     pickedInst);
318837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
318937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      LastScheduledInst = pickedInst;
319037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      BundleMember = BundleMember->NextInBundle;
319137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
319237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
319337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    BS->schedule(picked, ReadyInsts);
319437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    NumToSchedule--;
319537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
319637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  assert(NumToSchedule == 0 && "could not schedule all instructions");
319737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
319837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // Avoid duplicate scheduling of the block.
319937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  BS->ScheduleStart = nullptr;
320037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
320137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
3202de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarunsigned BoUpSLP::getVectorElementSize(Value *V) {
3203de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // If V is a store, just return the width of the stored value without
3204de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // traversing the expression tree. This is the common case.
3205de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  if (auto *Store = dyn_cast<StoreInst>(V))
3206de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    return DL->getTypeSizeInBits(Store->getValueOperand()->getType());
3207de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3208de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // If V is not a store, we can traverse the expression tree to find loads
3209de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // that feed it. The type of the loaded value may indicate a more suitable
3210de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // width than V's type. We want to base the vector element size on the width
3211de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // of memory operations where possible.
3212de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  SmallVector<Instruction *, 16> Worklist;
3213de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  SmallPtrSet<Instruction *, 16> Visited;
3214de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  if (auto *I = dyn_cast<Instruction>(V))
3215de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    Worklist.push_back(I);
3216de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3217de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // Traverse the expression tree in bottom-up order looking for loads. If we
3218de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // encounter an instruciton we don't yet handle, we give up.
3219de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  auto MaxWidth = 0u;
3220de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  auto FoundUnknownInst = false;
3221de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  while (!Worklist.empty() && !FoundUnknownInst) {
3222de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    auto *I = Worklist.pop_back_val();
3223de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    Visited.insert(I);
3224de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3225de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // We should only be looking at scalar instructions here. If the current
3226de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // instruction has a vector type, give up.
3227de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    auto *Ty = I->getType();
3228de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    if (isa<VectorType>(Ty))
3229de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      FoundUnknownInst = true;
3230de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3231de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // If the current instruction is a load, update MaxWidth to reflect the
3232de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // width of the loaded value.
3233de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    else if (isa<LoadInst>(I))
3234de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      MaxWidth = std::max<unsigned>(MaxWidth, DL->getTypeSizeInBits(Ty));
3235de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3236de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // Otherwise, we need to visit the operands of the instruction. We only
3237de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // handle the interesting cases from buildTree here. If an operand is an
3238de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // instruction we haven't yet visited, we add it to the worklist.
3239de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    else if (isa<PHINode>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) ||
3240de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar             isa<CmpInst>(I) || isa<SelectInst>(I) || isa<BinaryOperator>(I)) {
3241de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      for (Use &U : I->operands())
3242de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        if (auto *J = dyn_cast<Instruction>(U.get()))
3243de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          if (!Visited.count(J))
3244de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar            Worklist.push_back(J);
3245de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    }
32468383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
3247de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // If we don't yet handle the instruction, give up.
3248de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    else
3249de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      FoundUnknownInst = true;
3250de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  }
32518383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
3252de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // If we didn't encounter a memory access in the expression tree, or if we
3253de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // gave up for some reason, just return the width of V.
3254de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  if (!MaxWidth || FoundUnknownInst)
3255de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    return DL->getTypeSizeInBits(V->getType());
3256de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3257de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // Otherwise, return the maximum width we found.
3258de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  return MaxWidth;
3259de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar}
3260de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3261de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar// Determine if a value V in a vectorizable expression Expr can be demoted to a
3262de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar// smaller type with a truncation. We collect the values that will be demoted
3263de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar// in ToDemote and additional roots that require investigating in Roots.
3264de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarstatic bool collectValuesToDemote(Value *V, SmallPtrSetImpl<Value *> &Expr,
3265de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                                  SmallVectorImpl<Value *> &ToDemote,
3266de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                                  SmallVectorImpl<Value *> &Roots) {
3267de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3268de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // We can always demote constants.
3269de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  if (isa<Constant>(V)) {
3270de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    ToDemote.push_back(V);
3271de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    return true;
32728383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem  }
32738383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
3274de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // If the value is not an instruction in the expression with only one use, it
3275de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // cannot be demoted.
3276de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  auto *I = dyn_cast<Instruction>(V);
3277de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  if (!I || !I->hasOneUse() || !Expr.count(I))
3278de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    return false;
3279e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem
3280de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  switch (I->getOpcode()) {
3281de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3282de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // We can always demote truncations and extensions. Since truncations can
3283de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // seed additional demotion, we save the truncated value.
3284de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  case Instruction::Trunc:
3285de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    Roots.push_back(I->getOperand(0));
3286de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  case Instruction::ZExt:
3287de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  case Instruction::SExt:
3288de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    break;
3289de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3290de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // We can demote certain binary operations if we can demote both of their
3291de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // operands.
3292de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  case Instruction::Add:
3293de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  case Instruction::Sub:
3294de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  case Instruction::Mul:
3295de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  case Instruction::And:
3296de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  case Instruction::Or:
3297de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  case Instruction::Xor:
3298de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    if (!collectValuesToDemote(I->getOperand(0), Expr, ToDemote, Roots) ||
3299de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        !collectValuesToDemote(I->getOperand(1), Expr, ToDemote, Roots))
330036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      return false;
3301de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    break;
330236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
3303de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // We can demote selects if we can demote their true and false values.
3304de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  case Instruction::Select: {
3305de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    SelectInst *SI = cast<SelectInst>(I);
3306de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    if (!collectValuesToDemote(SI->getTrueValue(), Expr, ToDemote, Roots) ||
3307de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        !collectValuesToDemote(SI->getFalseValue(), Expr, ToDemote, Roots))
3308d072d1b2a36f6c6e1b4b513dc69e91b61ad8c21aRobert Lytton      return false;
3309de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    break;
3310de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  }
3311d072d1b2a36f6c6e1b4b513dc69e91b61ad8c21aRobert Lytton
3312de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // We can demote phis if we can demote all their incoming operands. Note that
3313de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // we don't need to worry about cycles since we ensure single use above.
3314de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  case Instruction::PHI: {
3315de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    PHINode *PN = cast<PHINode>(I);
3316de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    for (Value *IncValue : PN->incoming_values())
3317de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      if (!collectValuesToDemote(IncValue, Expr, ToDemote, Roots))
3318de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        return false;
3319de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    break;
3320de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  }
3321f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
3322de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // Otherwise, conservatively give up.
3323de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  default:
3324de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    return false;
3325de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  }
33263202f6cdb9193fe5365462118f499f6e164a1738Nadav Rotem
3327de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // Record the value that we can demote.
3328de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  ToDemote.push_back(V);
3329de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  return true;
3330de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar}
333109ec4b21648700f9d4ef5bc90d732f90f32c930cNadav Rotem
3332de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarvoid BoUpSLP::computeMinimumValueSizes() {
3333de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // If there are no external uses, the expression tree must be rooted by a
3334de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // store. We can't demote in-memory values, so there is nothing to do here.
3335de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  if (ExternalUses.empty())
3336de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    return;
3337ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
3338de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // We only attempt to truncate integer expressions.
3339de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  auto &TreeRoot = VectorizableTree[0].Scalars;
3340de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  auto *TreeRootIT = dyn_cast<IntegerType>(TreeRoot[0]->getType());
3341de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  if (!TreeRootIT)
3342de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    return;
334353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
3344de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // If the expression is not rooted by a store, these roots should have
3345de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // external uses. We will rely on InstCombine to rewrite the expression in
3346de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // the narrower type. However, InstCombine only rewrites single-use values.
3347de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // This means that if a tree entry other than a root is used externally, it
3348de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // must have multiple uses and InstCombine will not rewrite it. The code
3349de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // below ensures that only the roots are used externally.
3350de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  SmallPtrSet<Value *, 32> Expr(TreeRoot.begin(), TreeRoot.end());
3351de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  for (auto &EU : ExternalUses)
3352de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    if (!Expr.erase(EU.Scalar))
3353de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      return;
3354de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  if (!Expr.empty())
3355de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    return;
33566611eaa32f7941dd50a3ffe608f3f4a7665dbe91Nadav Rotem
3357de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // Collect the scalar values of the vectorizable expression. We will use this
3358de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // context to determine which values can be demoted. If we see a truncation,
3359de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // we mark it as seeding another demotion.
3360de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  for (auto &Entry : VectorizableTree)
3361de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    Expr.insert(Entry.Scalars.begin(), Entry.Scalars.end());
3362e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem
3363de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // Ensure the roots of the vectorizable tree don't form a cycle. They must
3364de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // have a single external user that is not in the vectorizable tree.
3365de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  for (auto *Root : TreeRoot)
3366de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    if (!Root->hasOneUse() || Expr.count(*Root->user_begin()))
3367de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      return;
3368de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3369de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // Conservatively determine if we can actually truncate the roots of the
3370de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // expression. Collect the values that can be demoted in ToDemote and
3371de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // additional roots that require investigating in Roots.
3372de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  SmallVector<Value *, 32> ToDemote;
3373de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  SmallVector<Value *, 4> Roots;
3374de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  for (auto *Root : TreeRoot)
3375de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    if (!collectValuesToDemote(Root, Expr, ToDemote, Roots))
3376de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      return;
3377de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3378de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // The maximum bit width required to represent all the values that can be
3379de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // demoted without loss of precision. It would be safe to truncate the roots
3380de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // of the expression to this width.
3381de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  auto MaxBitWidth = 8u;
3382de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3383de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // We first check if all the bits of the roots are demanded. If they're not,
3384de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // we can truncate the roots to this narrower type.
3385de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  for (auto *Root : TreeRoot) {
3386de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    auto Mask = DB->getDemandedBits(cast<Instruction>(Root));
3387de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    MaxBitWidth = std::max<unsigned>(
3388de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        Mask.getBitWidth() - Mask.countLeadingZeros(), MaxBitWidth);
3389de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  }
3390de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3391de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // If all the bits of the roots are demanded, we can try a little harder to
3392de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // compute a narrower type. This can happen, for example, if the roots are
3393de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // getelementptr indices. InstCombine promotes these indices to the pointer
3394de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // width. Thus, all their bits are technically demanded even though the
3395de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // address computation might be vectorized in a smaller type.
3396de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  //
3397de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // We start by looking at each entry that can be demoted. We compute the
3398de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // maximum bit width required to store the scalar by using ValueTracking to
3399de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // compute the number of high-order bits we can truncate.
3400de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  if (MaxBitWidth == DL->getTypeSizeInBits(TreeRoot[0]->getType())) {
3401de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    MaxBitWidth = 8u;
3402de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    for (auto *Scalar : ToDemote) {
3403de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      auto NumSignBits = ComputeNumSignBits(Scalar, *DL, 0, AC, 0, DT);
3404de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      auto NumTypeBits = DL->getTypeSizeInBits(Scalar->getType());
3405de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      MaxBitWidth = std::max<unsigned>(NumTypeBits - NumSignBits, MaxBitWidth);
3406e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    }
3407de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  }
3408de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3409de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // Round MaxBitWidth up to the next power-of-two.
3410de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  if (!isPowerOf2_64(MaxBitWidth))
3411de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    MaxBitWidth = NextPowerOf2(MaxBitWidth);
3412de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3413de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // If the maximum bit width we compute is less than the with of the roots'
3414de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // type, we can proceed with the narrowing. Otherwise, do nothing.
3415de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  if (MaxBitWidth >= TreeRootIT->getBitWidth())
3416de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    return;
3417de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3418de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // If we can truncate the root, we must collect additional values that might
3419de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // be demoted as a result. That is, those seeded by truncations we will
3420de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // modify.
3421de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  while (!Roots.empty())
3422de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    collectValuesToDemote(Roots.pop_back_val(), Expr, ToDemote, Roots);
3423de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3424de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // Finally, map the values we can demote to the maximum bit with we computed.
3425de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  for (auto *Scalar : ToDemote)
3426de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    MinBWs[Scalar] = MaxBitWidth;
3427de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar}
3428de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3429de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarnamespace {
3430de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar/// The SLPVectorizer Pass.
3431de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarstruct SLPVectorizer : public FunctionPass {
3432de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  SLPVectorizerPass Impl;
3433de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3434de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  /// Pass identification, replacement for typeid
3435de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  static char ID;
3436de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3437de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  explicit SLPVectorizer() : FunctionPass(ID) {
3438de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    initializeSLPVectorizerPass(*PassRegistry::getPassRegistry());
3439de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  }
3440de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3441de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3442de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  bool doInitialization(Module &M) override {
3443de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    return false;
3444de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  }
3445de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3446de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  bool runOnFunction(Function &F) override {
3447de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    if (skipFunction(F))
3448de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      return false;
3449de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3450de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
3451de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
3452de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
3453de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
3454de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
3455de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
3456de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3457de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
3458de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    auto *DB = &getAnalysis<DemandedBitsWrapperPass>().getDemandedBits();
3459de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3460de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    return Impl.runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB);
3461e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  }
3462e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem
346336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  void getAnalysisUsage(AnalysisUsage &AU) const override {
3464e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    FunctionPass::getAnalysisUsage(AU);
3465ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    AU.addRequired<AssumptionCacheTracker>();
3466f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    AU.addRequired<ScalarEvolutionWrapperPass>();
3467f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    AU.addRequired<AAResultsWrapperPass>();
3468ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    AU.addRequired<TargetTransformInfoWrapperPass>();
3469ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    AU.addRequired<LoopInfoWrapperPass>();
347036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    AU.addRequired<DominatorTreeWrapperPass>();
3471de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    AU.addRequired<DemandedBitsWrapperPass>();
3472ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    AU.addPreserved<LoopInfoWrapperPass>();
347336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    AU.addPreserved<DominatorTreeWrapperPass>();
3474f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    AU.addPreserved<AAResultsWrapperPass>();
3475f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    AU.addPreserved<GlobalsAAWrapperPass>();
3476d4a9ebc7341a1ed066fcdff8e7e4e9cbf1bc4368Nadav Rotem    AU.setPreservesCFG();
3477e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  }
3478de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar};
3479de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} // end anonymous namespace
3480e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem
3481de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga NainarPreservedAnalyses SLPVectorizerPass::run(Function &F, FunctionAnalysisManager &AM) {
3482de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
3483de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  auto *TTI = &AM.getResult<TargetIRAnalysis>(F);
3484de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  auto *TLI = AM.getCachedResult<TargetLibraryAnalysis>(F);
3485de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  auto *AA = &AM.getResult<AAManager>(F);
3486de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  auto *LI = &AM.getResult<LoopAnalysis>(F);
3487de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
3488de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  auto *AC = &AM.getResult<AssumptionAnalysis>(F);
3489de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  auto *DB = &AM.getResult<DemandedBitsAnalysis>(F);
3490de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3491de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  bool Changed = runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB);
3492de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  if (!Changed)
3493de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    return PreservedAnalyses::all();
3494de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  PreservedAnalyses PA;
3495de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  PA.preserve<LoopAnalysis>();
3496de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  PA.preserve<DominatorTreeAnalysis>();
3497de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  PA.preserve<AAManager>();
3498de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  PA.preserve<GlobalsAA>();
3499de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  return PA;
3500de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar}
35018383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
3502de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
3503de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                                TargetTransformInfo *TTI_,
3504de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                                TargetLibraryInfo *TLI_, AliasAnalysis *AA_,
3505de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                                LoopInfo *LI_, DominatorTree *DT_,
3506de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                                AssumptionCache *AC_, DemandedBits *DB_) {
3507de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  SE = SE_;
3508de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  TTI = TTI_;
3509de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  TLI = TLI_;
3510de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  AA = AA_;
3511de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  LI = LI_;
3512de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  DT = DT_;
3513de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  AC = AC_;
3514de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  DB = DB_;
3515de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  DL = &F.getParent()->getDataLayout();
3516de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3517de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  Stores.clear();
3518de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  GEPs.clear();
3519de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  bool Changed = false;
35208383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
3521de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // If the target claims to have no vector registers don't attempt
3522de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // vectorization.
3523de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  if (!TTI->getNumberOfRegisters(true))
3524de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    return false;
35258383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
3526de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // Don't vectorize when the attribute NoImplicitFloat is used.
3527de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  if (F.hasFnAttribute(Attribute::NoImplicitFloat))
3528de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    return false;
3529444e33e8987110c6669bc2d9b8efd768bb17faa1Nadav Rotem
3530de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n");
35318383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
3532de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // Use the bottom up slp vectorizer to construct chains that start with
3533de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // store instructions.
3534de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC, DB, DL);
3535d69d9f20bc3acee0fc233853745c1de015b541f2Nadav Rotem
3536de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // A general note: the vectorizer must use BoUpSLP::eraseInstruction() to
3537de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // delete instructions.
3538369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3539de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // Scan the blocks in the function in post order.
3540de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  for (auto BB : post_order(&F.getEntryBlock())) {
3541de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    collectSeedInstructions(BB);
3542e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem
3543de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // Vectorize trees that end at stores.
3544de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    if (!Stores.empty()) {
3545de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      DEBUG(dbgs() << "SLP: Found stores for " << Stores.size()
3546de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                   << " underlying objects.\n");
3547de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      Changed |= vectorizeStoreChains(R);
3548de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    }
3549de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3550de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // Vectorize trees that end at reductions.
3551de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    Changed |= vectorizeChainsInBlock(BB, R);
3552de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3553de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // Vectorize the index computations of getelementptr instructions. This
3554de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // is primarily intended to catch gather-like idioms ending at
3555de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // non-consecutive loads.
3556de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    if (!GEPs.empty()) {
3557de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      DEBUG(dbgs() << "SLP: Found GEPs for " << GEPs.size()
3558de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                   << " underlying objects.\n");
3559de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      Changed |= vectorizeGEPIndices(BB, R);
3560de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    }
3561de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  }
3562de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3563de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  if (Changed) {
3564de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    R.optimizeGatherSequence();
3565de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    DEBUG(dbgs() << "SLP: vectorized \"" << F.getName() << "\"\n");
3566de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    DEBUG(verifyFunction(F));
3567de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  }
3568de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  return Changed;
3569de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar}
3570e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem
357136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines/// \brief Check that the Values in the slice in VL array are still existent in
3572dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling/// the WeakVH array.
3573dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling/// Vectorization of part of the VL array may cause later values in the VL array
3574dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling/// to become invalid. We track when this has happened in the WeakVH array.
35754c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainarstatic bool hasValueBeenRAUWed(ArrayRef<Value *> VL, ArrayRef<WeakVH> VH,
35764c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar                               unsigned SliceBegin, unsigned SliceSize) {
35774c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  VL = VL.slice(SliceBegin, SliceSize);
35784c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  VH = VH.slice(SliceBegin, SliceSize);
35794c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  return !std::equal(VL.begin(), VL.end(), VH.begin());
3580dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling}
3581dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling
3582de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain,
3583de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                                            int CostThreshold, BoUpSLP &R,
3584de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                                            unsigned VecRegSize) {
3585369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  unsigned ChainLen = Chain.size();
3586369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen
3587369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        << "\n");
3588de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  unsigned Sz = R.getVectorElementSize(Chain[0]);
3589f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  unsigned VF = VecRegSize / Sz;
3590369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3591369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  if (!isPowerOf2_32(Sz) || VF < 2)
3592369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    return false;
3593369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
359436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // Keep track of values that were deleted by vectorizing in the loop below.
3595dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling  SmallVector<WeakVH, 8> TrackValues(Chain.begin(), Chain.end());
3596dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling
3597369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  bool Changed = false;
3598369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // Look for profitable vectorizable trees at all offsets, starting at zero.
3599369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  for (unsigned i = 0, e = ChainLen; i < e; ++i) {
3600369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    if (i + VF > e)
3601369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      break;
3602dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling
3603dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling    // Check that a previous iteration of this loop did not delete the Value.
3604dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling    if (hasValueBeenRAUWed(Chain, TrackValues, i, VF))
3605dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling      continue;
3606dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling
3607369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i
3608369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem          << "\n");
3609369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    ArrayRef<Value *> Operands = Chain.slice(i, VF);
3610369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3611369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    R.buildTree(Operands);
3612de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    R.computeMinimumValueSizes();
3613369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3614369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    int Cost = R.getTreeCost();
3615369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3616369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
3617369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    if (Cost < CostThreshold) {
3618369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
3619369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      R.vectorizeTree();
3620369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3621369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      // Move to the next bundle.
3622369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      i += VF - 1;
3623369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Changed = true;
3624369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
3625369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  }
3626369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
36278e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling  return Changed;
3628369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
3629369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3630de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
3631de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                                        int costThreshold, BoUpSLP &R) {
36324c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  SetVector<StoreInst *> Heads, Tails;
36334c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain;
3634369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3635369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // We may run into multiple chains that merge into a single chain. We mark the
3636369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // stores that we vectorized so that we don't visit the same store twice.
3637369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  BoUpSLP::ValueSet VectorizedStores;
3638369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  bool Changed = false;
3639369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3640369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // Do a quadratic search on all of the given stores and find
36416611eaa32f7941dd50a3ffe608f3f4a7665dbe91Nadav Rotem  // all of the pairs of stores that follow each other.
3642f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  SmallVector<unsigned, 16> IndexQueue;
364321508bf853354343266dbe6d830ff30bed006a68Nadav Rotem  for (unsigned i = 0, e = Stores.size(); i < e; ++i) {
3644f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    IndexQueue.clear();
3645f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    // If a store has multiple consecutive store candidates, search Stores
3646f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    // array according to the sequence: from i+1 to e, then from i-1 to 0.
3647f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    // This is because usually pairing with immediate succeeding or preceding
3648f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    // candidate create the best chance to find slp vectorization opportunity.
3649f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    unsigned j = 0;
3650f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    for (j = i + 1; j < e; ++j)
3651f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      IndexQueue.push_back(j);
3652f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    for (j = i; j > 0; --j)
3653f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      IndexQueue.push_back(j - 1);
3654f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
3655f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    for (auto &k : IndexQueue) {
3656de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      if (isConsecutiveAccess(Stores[i], Stores[k], *DL, *SE)) {
3657f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        Tails.insert(Stores[k]);
3658369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        Heads.insert(Stores[i]);
3659f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        ConsecutiveChain[Stores[i]] = Stores[k];
3660f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        break;
3661369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      }
3662369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
366321508bf853354343266dbe6d830ff30bed006a68Nadav Rotem  }
3664369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3665369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  // For stores that start but don't end a link in the chain:
36664c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end();
3667369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem       it != e; ++it) {
3668369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    if (Tails.count(*it))
3669369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      continue;
3670369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3671369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    // We found a store instr that starts a chain. Now follow the chain and try
3672369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    // to vectorize it.
3673369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    BoUpSLP::ValueList Operands;
36744c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    StoreInst *I = *it;
3675369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    // Collect the chain into a list.
3676369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    while (Tails.count(I) || Heads.count(I)) {
3677369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      if (VectorizedStores.count(I))
3678369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem        break;
3679369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      Operands.push_back(I);
3680369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      // Move to the next value in the chain.
3681369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem      I = ConsecutiveChain[I];
3682369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem    }
3683369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3684f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    // FIXME: Is division-by-2 the correct step? Should we assert that the
3685f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    // register size is a power-of-2?
3686de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    for (unsigned Size = R.getMaxVecRegSize(); Size >= R.getMinVecRegSize(); Size /= 2) {
3687f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      if (vectorizeStoreChain(Operands, costThreshold, R, Size)) {
3688f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        // Mark the vectorized stores so that we don't vectorize them again.
3689f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        VectorizedStores.insert(Operands.begin(), Operands.end());
3690f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        Changed = true;
3691f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        break;
3692f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      }
3693f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    }
3694369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  }
3695369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3696369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem  return Changed;
3697369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem}
3698369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3699de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarvoid SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) {
3700369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem
3701de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // Initialize the collections. We will make a single pass over the block.
3702de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  Stores.clear();
3703de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  GEPs.clear();
3704fc1604ec7274e9b724ff9b2512c288a691167427Arnold Schwaighofer
3705de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // Visit the store and getelementptr instructions in BB and organize them in
3706de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // Stores and GEPs according to the underlying objects of their pointer
3707de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // operands.
3708de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  for (Instruction &I : *BB) {
3709e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem
3710de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // Ignore store instructions that are volatile or have a pointer operand
3711de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // that doesn't point to a scalar type.
3712de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    if (auto *SI = dyn_cast<StoreInst>(&I)) {
3713de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      if (!SI->isSimple())
3714de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        continue;
3715de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      if (!isValidElementType(SI->getValueOperand()->getType()))
3716de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        continue;
3717de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      Stores[GetUnderlyingObject(SI->getPointerOperand(), *DL)].push_back(SI);
3718de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    }
3719e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem
3720de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // Ignore getelementptr instructions that have more than one index, a
3721de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // constant index, or a pointer operand that doesn't point to a scalar
3722de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // type.
3723de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
3724de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      auto Idx = GEP->idx_begin()->get();
3725de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      if (GEP->getNumIndices() > 1 || isa<Constant>(Idx))
3726de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        continue;
3727de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      if (!isValidElementType(Idx->getType()))
3728de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        continue;
3729de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      if (GEP->getType()->isVectorTy())
3730de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        continue;
3731de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      GEPs[GetUnderlyingObject(GEP->getPointerOperand(), *DL)].push_back(GEP);
3732de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    }
37338383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem  }
3734e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem}
3735e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem
3736de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SLPVectorizerPass::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
37370b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem  if (!A || !B)
37380b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem    return false;
37396fe5cc49d88c9dd48a1eefe4c1bdba1567b8eef2Benjamin Kramer  Value *VL[] = { A, B };
374037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  return tryToVectorizeList(VL, R, None, true);
3741444e33e8987110c6669bc2d9b8efd768bb17faa1Nadav Rotem}
3742444e33e8987110c6669bc2d9b8efd768bb17faa1Nadav Rotem
3743de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
3744de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                                           ArrayRef<Value *> BuildVector,
3745de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                                           bool allowReorder) {
37465cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem  if (VL.size() < 2)
37475cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem    return false;
37485cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem
37490b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem  DEBUG(dbgs() << "SLP: Vectorizing a list of length = " << VL.size() << ".\n");
37504f38e16b89895b795ece58742195d0d95cbd4187Nadav Rotem
37515cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem  // Check that all of the parts are scalar instructions of the same type.
37525cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem  Instruction *I0 = dyn_cast<Instruction>(VL[0]);
37530b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem  if (!I0)
375489008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang    return false;
37555cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem
37565cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem  unsigned Opcode0 = I0->getOpcode();
37578e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling
3758f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  // FIXME: Register size should be a parameter to this function, so we can
3759f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  // try different vectorization factors.
3760de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  unsigned Sz = R.getVectorElementSize(I0);
3761de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  unsigned VF = R.getMinVecRegSize() / Sz;
37625cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem
3763f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  for (Value *V : VL) {
3764f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    Type *Ty = V->getType();
3765ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if (!isValidElementType(Ty))
376689008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang      return false;
3767f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    Instruction *Inst = dyn_cast<Instruction>(V);
37685cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem    if (!Inst || Inst->getOpcode() != Opcode0)
376989008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang      return false;
37704f38e16b89895b795ece58742195d0d95cbd4187Nadav Rotem  }
37714f38e16b89895b795ece58742195d0d95cbd4187Nadav Rotem
377289008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang  bool Changed = false;
37738e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling
3774dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // Keep track of values that were deleted by vectorizing in the loop below.
3775dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling  SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end());
3776dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling
377789008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
377889008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang    unsigned OpsWidth = 0;
37798e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling
37808e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling    if (i + VF > e)
378189008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang      OpsWidth = e - i;
378289008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang    else
378389008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang      OpsWidth = VF;
378489008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang
378589008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang    if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2)
378689008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang      break;
3787d6f0c34273dd3536102f2d643403252468dfc4a3Nadav Rotem
3788dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling    // Check that a previous iteration of this loop did not delete the Value.
3789dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling    if (hasValueBeenRAUWed(VL, TrackValues, i, OpsWidth))
3790dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling      continue;
3791dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling
37928e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling    DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations "
37938e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling                 << "\n");
379489008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang    ArrayRef<Value *> Ops = VL.slice(i, OpsWidth);
37958e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling
3796dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    ArrayRef<Value *> BuildVectorSlice;
3797dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    if (!BuildVector.empty())
3798dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      BuildVectorSlice = BuildVector.slice(i, OpsWidth);
3799dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
3800dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    R.buildTree(Ops, BuildVectorSlice);
380137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // TODO: check if we can allow reordering also for other cases than
380237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // tryToVectorizePair()
380337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (allowReorder && R.shouldReorder()) {
380437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      assert(Ops.size() == 2);
380537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      assert(BuildVectorSlice.empty());
380637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      Value *ReorderedOps[] = { Ops[1], Ops[0] };
380737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      R.buildTree(ReorderedOps, None);
380837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
3809de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    R.computeMinimumValueSizes();
381089008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang    int Cost = R.getTreeCost();
38118e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling
381289008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang    if (Cost < -SLPCostThreshold) {
381336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");
3814dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      Value *VectorizedRoot = R.vectorizeTree();
3815dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
3816dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      // Reconstruct the build vector by extracting the vectorized root. This
3817dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      // way we handle the case where some elements of the vector are undefined.
3818dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      //  (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2))
3819dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      if (!BuildVectorSlice.empty()) {
3820dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        // The insert point is the last build vector instruction. The vectorized
3821dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        // root will precede it. This guarantees that we get an instruction. The
3822dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        // vectorized tree could have been constant folded.
3823dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back());
3824dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        unsigned VecIdx = 0;
3825dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        for (auto &V : BuildVectorSlice) {
3826de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          IRBuilder<NoFolder> Builder(InsertAfter->getParent(),
3827de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                                      ++BasicBlock::iterator(InsertAfter));
3828de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          Instruction *I = cast<Instruction>(V);
3829de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          assert(isa<InsertElementInst>(I) || isa<InsertValueInst>(I));
3830dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          Instruction *Extract = cast<Instruction>(Builder.CreateExtractElement(
3831dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines              VectorizedRoot, Builder.getInt32(VecIdx++)));
3832de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          I->setOperand(1, Extract);
3833de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          I->removeFromParent();
3834de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          I->insertAfter(Extract);
3835de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          InsertAfter = I;
3836dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        }
3837dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      }
383889008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang      // Move to the next bundle.
383989008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang      i += VF - 1;
384089008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang      Changed = true;
384189008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang    }
384289008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang  }
38438e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling
38448e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling  return Changed;
3845e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem}
38468383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
3847de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SLPVectorizerPass::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
38480b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem  if (!V)
38490b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem    return false;
385053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
3851e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  // Try to vectorize V.
3852e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R))
3853f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem    return true;
3854f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem
3855e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0));
3856e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1));
3857e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  // Try to skip B.
3858e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  if (B && B->hasOneUse()) {
3859e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
3860e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    BinaryOperator *B1 = dyn_cast<BinaryOperator>(B->getOperand(1));
3861e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    if (tryToVectorizePair(A, B0, R)) {
3862ab105ae95fc473c19d9f0b019fc7c7a16d17b1a5Nadav Rotem      return true;
3863f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem    }
3864e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    if (tryToVectorizePair(A, B1, R)) {
3865e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem      return true;
3866f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem    }
3867f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem  }
3868f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem
38697fac0ef71cfaeafd91b9520b553d00d91f83a442Nadav Rotem  // Try to skip A.
3870e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  if (A && A->hasOneUse()) {
3871e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    BinaryOperator *A0 = dyn_cast<BinaryOperator>(A->getOperand(0));
3872e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    BinaryOperator *A1 = dyn_cast<BinaryOperator>(A->getOperand(1));
3873e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    if (tryToVectorizePair(A0, B, R)) {
3874e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem      return true;
3875e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    }
3876e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    if (tryToVectorizePair(A1, B, R)) {
3877e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem      return true;
3878f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem    }
3879f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem  }
3880e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  return 0;
3881e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem}
3882f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem
3883a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// \brief Generate a shuffle mask to be used in a reduction tree.
3884a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///
3885a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// \param VecLen The length of the vector to be reduced.
3886a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// \param NumEltsToRdx The number of elements that should be reduced in the
3887a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///        vector.
3888a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// \param IsPairwise Whether the reduction is a pairwise or splitting
3889de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar///        reduction. A pairwise reduction will generate a mask of
3890a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///        <0,2,...> or <1,3,..> while a splitting reduction will generate
3891a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///        <2,3, undef,undef> for a vector of 4 and NumElts = 2.
3892a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// \param IsLeft True will generate a mask of even elements, odd otherwise.
3893a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighoferstatic Value *createRdxShuffleMask(unsigned VecLen, unsigned NumEltsToRdx,
3894a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer                                   bool IsPairwise, bool IsLeft,
3895a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer                                   IRBuilder<> &Builder) {
3896a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  assert((IsPairwise || !IsLeft) && "Don't support a <0,1,undef,...> mask");
3897a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3898a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  SmallVector<Constant *, 32> ShuffleMask(
3899a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      VecLen, UndefValue::get(Builder.getInt32Ty()));
3900a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3901a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  if (IsPairwise)
3902a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    // Build a mask of 0, 2, ... (left) or 1, 3, ... (right).
3903a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    for (unsigned i = 0; i != NumEltsToRdx; ++i)
3904a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      ShuffleMask[i] = Builder.getInt32(2 * i + !IsLeft);
3905a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  else
3906a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    // Move the upper half of the vector to the lower half.
3907a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    for (unsigned i = 0; i != NumEltsToRdx; ++i)
3908a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      ShuffleMask[i] = Builder.getInt32(NumEltsToRdx + i);
3909a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3910a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  return ConstantVector::get(ShuffleMask);
3911a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer}
3912a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3913a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3914a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// Model horizontal reductions.
3915a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///
3916a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// A horizontal reduction is a tree of reduction operations (currently add and
3917a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// fadd) that has operations that can be put into a vector as its leaf.
3918a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// For example, this tree:
3919a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///
3920a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// mul mul mul mul
3921a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///  \  /    \  /
3922a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///   +       +
3923a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///    \     /
3924a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///       +
3925a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// This tree has "mul" as its reduced values and "+" as its reduction
3926a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// operations. A reduction might be feeding into a store or a binary operation
3927a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// feeding a phi.
3928a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///    ...
3929a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///    \  /
3930a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///     +
393138bf2d62b6be4b496e8f6d176578699b9c6e08bbArnold Schwaighofer///     |
3932a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///  phi +=
3933a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///
3934a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///  Or:
3935a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///    ...
3936a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///    \  /
3937a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///     +
393838bf2d62b6be4b496e8f6d176578699b9c6e08bbArnold Schwaighofer///     |
3939a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///   *p =
3940a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer///
3941a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighoferclass HorizontalReduction {
3942dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  SmallVector<Value *, 16> ReductionOps;
3943a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  SmallVector<Value *, 32> ReducedVals;
3944a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3945a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  BinaryOperator *ReductionRoot;
3946a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  PHINode *ReductionPHI;
3947a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3948a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  /// The opcode of the reduction.
3949a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  unsigned ReductionOpcode;
3950a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  /// The opcode of the values we perform a reduction on.
3951a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  unsigned ReducedValueOpcode;
3952a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  /// Should we model this reduction as a pairwise reduction tree or a tree that
3953a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  /// splits the vector in halves and adds those halves.
3954a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  bool IsPairwiseReduction;
3955a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3956a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighoferpublic:
3957f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  /// The width of one full horizontal reduction operation.
3958f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  unsigned ReduxWidth;
3959f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
3960de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  /// Minimal width of available vector registers. It's used to determine
3961de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  /// ReduxWidth.
3962de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  unsigned MinVecRegSize;
3963de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
3964de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  HorizontalReduction(unsigned MinVecRegSize)
3965de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      : ReductionRoot(nullptr), ReductionPHI(nullptr), ReductionOpcode(0),
3966de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        ReducedValueOpcode(0), IsPairwiseReduction(false), ReduxWidth(0),
3967de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        MinVecRegSize(MinVecRegSize) {}
3968a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3969a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  /// \brief Try to find a reduction tree.
39704c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B) {
3971a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    assert((!Phi ||
3972a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer            std::find(Phi->op_begin(), Phi->op_end(), B) != Phi->op_end()) &&
3973a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer           "Thi phi needs to use the binary operator");
3974a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3975a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    // We could have a initial reductions that is not an add.
3976a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    //  r *= v1 + v2 + v3 + v4
3977a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    // In such a case start looking for a tree rooted in the first '+'.
3978a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    if (Phi) {
3979a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      if (B->getOperand(0) == Phi) {
3980dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        Phi = nullptr;
3981a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        B = dyn_cast<BinaryOperator>(B->getOperand(1));
3982a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      } else if (B->getOperand(1) == Phi) {
3983dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        Phi = nullptr;
3984a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        B = dyn_cast<BinaryOperator>(B->getOperand(0));
3985a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      }
3986a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    }
3987a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3988a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    if (!B)
3989a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      return false;
3990a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
3991a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    Type *Ty = B->getType();
3992ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    if (!isValidElementType(Ty))
3993a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      return false;
3994a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
39954c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    const DataLayout &DL = B->getModule()->getDataLayout();
3996a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    ReductionOpcode = B->getOpcode();
3997a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    ReducedValueOpcode = 0;
3998f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    // FIXME: Register size should be a parameter to this function, so we can
3999f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    // try different vectorization factors.
40004c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    ReduxWidth = MinVecRegSize / DL.getTypeSizeInBits(Ty);
4001a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    ReductionRoot = B;
4002a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    ReductionPHI = Phi;
4003a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4004a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    if (ReduxWidth < 4)
4005a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      return false;
4006a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4007a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    // We currently only support adds.
4008a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    if (ReductionOpcode != Instruction::Add &&
4009a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        ReductionOpcode != Instruction::FAdd)
4010a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      return false;
4011a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4012a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    // Post order traverse the reduction tree starting at B. We only handle true
4013f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    // trees containing only binary operators or selects.
4014f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    SmallVector<std::pair<Instruction *, unsigned>, 32> Stack;
4015a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    Stack.push_back(std::make_pair(B, 0));
4016a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    while (!Stack.empty()) {
4017f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      Instruction *TreeN = Stack.back().first;
4018a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      unsigned EdgeToVist = Stack.back().second++;
4019a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      bool IsReducedValue = TreeN->getOpcode() != ReductionOpcode;
4020a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4021a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      // Only handle trees in the current basic block.
4022a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      if (TreeN->getParent() != B->getParent())
4023a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        return false;
4024a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4025a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      // Each tree node needs to have one user except for the ultimate
4026a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      // reduction.
4027a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      if (!TreeN->hasOneUse() && TreeN != B)
4028a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        return false;
4029a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4030a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      // Postorder vist.
4031a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      if (EdgeToVist == 2 || IsReducedValue) {
4032a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        if (IsReducedValue) {
4033a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          // Make sure that the opcodes of the operations that we are going to
4034a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          // reduce match.
4035a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          if (!ReducedValueOpcode)
4036a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer            ReducedValueOpcode = TreeN->getOpcode();
4037a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          else if (ReducedValueOpcode != TreeN->getOpcode())
4038a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer            return false;
4039a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          ReducedVals.push_back(TreeN);
4040a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        } else {
4041a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          // We need to be able to reassociate the adds.
4042a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          if (!TreeN->isAssociative())
4043a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer            return false;
4044dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          ReductionOps.push_back(TreeN);
4045a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        }
4046a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        // Retract.
4047a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        Stack.pop_back();
4048a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        continue;
4049a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      }
4050a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4051a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      // Visit left or right.
4052a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      Value *NextV = TreeN->getOperand(EdgeToVist);
4053f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      // We currently only allow BinaryOperator's and SelectInst's as reduction
4054f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      // values in our tree.
4055f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      if (isa<BinaryOperator>(NextV) || isa<SelectInst>(NextV))
4056f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        Stack.push_back(std::make_pair(cast<Instruction>(NextV), 0));
4057a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      else if (NextV != Phi)
4058a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        return false;
4059a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    }
4060a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    return true;
4061a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  }
4062a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4063a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  /// \brief Attempt to vectorize the tree found by
4064a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  /// matchAssociativeReduction.
4065a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  bool tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) {
4066a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    if (ReducedVals.empty())
4067a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      return false;
4068a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4069a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    unsigned NumReducedVals = ReducedVals.size();
4070a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    if (NumReducedVals < ReduxWidth)
4071a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      return false;
4072a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4073dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    Value *VectorizedTree = nullptr;
4074a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    IRBuilder<> Builder(ReductionRoot);
4075a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    FastMathFlags Unsafe;
4076a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    Unsafe.setUnsafeAlgebra();
4077de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    Builder.setFastMathFlags(Unsafe);
4078a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    unsigned i = 0;
4079a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4080a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) {
408137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      V.buildTree(makeArrayRef(&ReducedVals[i], ReduxWidth), ReductionOps);
4082de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      V.computeMinimumValueSizes();
4083a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4084a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      // Estimate cost.
4085a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i]);
4086a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      if (Cost >= -SLPCostThreshold)
4087a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        break;
4088a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4089a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:" << Cost
4090a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer                   << ". (HorRdx)\n");
4091a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4092a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      // Vectorize a tree.
4093a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      DebugLoc Loc = cast<Instruction>(ReducedVals[i])->getDebugLoc();
4094a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      Value *VectorizedRoot = V.vectorizeTree();
4095a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4096a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      // Emit a reduction.
4097a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      Value *ReducedSubTree = emitReduction(VectorizedRoot, Builder);
4098a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      if (VectorizedTree) {
4099a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        Builder.SetCurrentDebugLocation(Loc);
4100a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        VectorizedTree = createBinOp(Builder, ReductionOpcode, VectorizedTree,
4101a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer                                     ReducedSubTree, "bin.rdx");
4102a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      } else
4103a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        VectorizedTree = ReducedSubTree;
4104a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    }
4105a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4106a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    if (VectorizedTree) {
4107a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      // Finish the reduction.
4108a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      for (; i < NumReducedVals; ++i) {
4109a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        Builder.SetCurrentDebugLocation(
4110a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          cast<Instruction>(ReducedVals[i])->getDebugLoc());
4111a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        VectorizedTree = createBinOp(Builder, ReductionOpcode, VectorizedTree,
4112a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer                                     ReducedVals[i]);
4113a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      }
4114a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      // Update users.
4115a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      if (ReductionPHI) {
4116dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        assert(ReductionRoot && "Need a reduction operation");
4117a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        ReductionRoot->setOperand(0, VectorizedTree);
4118a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        ReductionRoot->setOperand(1, ReductionPHI);
4119a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      } else
4120a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        ReductionRoot->replaceAllUsesWith(VectorizedTree);
4121a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    }
4122dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return VectorizedTree != nullptr;
4123a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  }
4124a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4125f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  unsigned numReductionValues() const {
4126f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    return ReducedVals.size();
4127f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  }
4128a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4129f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarprivate:
4130f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  /// \brief Calculate the cost of a reduction.
4131a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  int getReductionCost(TargetTransformInfo *TTI, Value *FirstReducedVal) {
4132a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    Type *ScalarTy = FirstReducedVal->getType();
4133a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    Type *VecTy = VectorType::get(ScalarTy, ReduxWidth);
4134a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4135a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    int PairwiseRdxCost = TTI->getReductionCost(ReductionOpcode, VecTy, true);
4136a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    int SplittingRdxCost = TTI->getReductionCost(ReductionOpcode, VecTy, false);
4137a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4138a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    IsPairwiseReduction = PairwiseRdxCost < SplittingRdxCost;
4139a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    int VecReduxCost = IsPairwiseReduction ? PairwiseRdxCost : SplittingRdxCost;
4140a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4141a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    int ScalarReduxCost =
4142a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        ReduxWidth * TTI->getArithmeticInstrCost(ReductionOpcode, VecTy);
4143a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4144a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    DEBUG(dbgs() << "SLP: Adding cost " << VecReduxCost - ScalarReduxCost
4145a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer                 << " for reduction that starts with " << *FirstReducedVal
4146a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer                 << " (It is a "
4147a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer                 << (IsPairwiseReduction ? "pairwise" : "splitting")
4148a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer                 << " reduction)\n");
4149a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4150a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    return VecReduxCost - ScalarReduxCost;
4151a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  }
4152a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4153a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  static Value *createBinOp(IRBuilder<> &Builder, unsigned Opcode, Value *L,
4154a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer                            Value *R, const Twine &Name = "") {
4155a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    if (Opcode == Instruction::FAdd)
4156a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      return Builder.CreateFAdd(L, R, Name);
4157a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    return Builder.CreateBinOp((Instruction::BinaryOps)Opcode, L, R, Name);
4158a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  }
4159a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4160a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  /// \brief Emit a horizontal reduction of the vectorized value.
4161a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  Value *emitReduction(Value *VectorizedValue, IRBuilder<> &Builder) {
4162a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    assert(VectorizedValue && "Need to have a vectorized tree node");
4163a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    assert(isPowerOf2_32(ReduxWidth) &&
4164a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer           "We only handle power-of-two reductions for now");
4165a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4166ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    Value *TmpVec = VectorizedValue;
4167a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) {
4168a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      if (IsPairwiseReduction) {
4169a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        Value *LeftMask =
4170a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          createRdxShuffleMask(ReduxWidth, i, true, true, Builder);
4171a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        Value *RightMask =
4172a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          createRdxShuffleMask(ReduxWidth, i, true, false, Builder);
4173a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4174a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        Value *LeftShuf = Builder.CreateShuffleVector(
4175a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          TmpVec, UndefValue::get(TmpVec->getType()), LeftMask, "rdx.shuf.l");
4176a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        Value *RightShuf = Builder.CreateShuffleVector(
4177a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          TmpVec, UndefValue::get(TmpVec->getType()), (RightMask),
4178a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          "rdx.shuf.r");
4179a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        TmpVec = createBinOp(Builder, ReductionOpcode, LeftShuf, RightShuf,
4180a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer                             "bin.rdx");
4181a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      } else {
4182a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        Value *UpperHalf =
4183a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          createRdxShuffleMask(ReduxWidth, i, false, false, Builder);
4184a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        Value *Shuf = Builder.CreateShuffleVector(
4185a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer          TmpVec, UndefValue::get(TmpVec->getType()), UpperHalf, "rdx.shuf");
4186a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        TmpVec = createBinOp(Builder, ReductionOpcode, TmpVec, Shuf, "bin.rdx");
4187a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      }
4188a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    }
4189a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4190a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    // The result is in the first element of the vector.
4191a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer    return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
4192a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer  }
4193a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer};
4194a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
41951b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault/// \brief Recognize construction of vectors like
41961b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault///  %ra = insertelement <4 x float> undef, float %s0, i32 0
41971b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault///  %rb = insertelement <4 x float> %ra, float %s1, i32 1
41981b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault///  %rc = insertelement <4 x float> %rb, float %s2, i32 2
41991b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault///  %rd = insertelement <4 x float> %rc, float %s3, i32 3
42001b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault///
42011b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault/// Returns true if it matches
42021b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault///
4203dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesstatic bool findBuildVector(InsertElementInst *FirstInsertElem,
4204dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                            SmallVectorImpl<Value *> &BuildVector,
4205dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                            SmallVectorImpl<Value *> &BuildVectorOpds) {
4206dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (!isa<UndefValue>(FirstInsertElem->getOperand(0)))
42071b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault    return false;
42081b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault
4209dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  InsertElementInst *IE = FirstInsertElem;
42101b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault  while (true) {
4211dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    BuildVector.push_back(IE);
4212dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    BuildVectorOpds.push_back(IE->getOperand(1));
42131b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault
42141b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault    if (IE->use_empty())
42151b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault      return false;
42161b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault
421736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    InsertElementInst *NextUse = dyn_cast<InsertElementInst>(IE->user_back());
42181b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault    if (!NextUse)
42191b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault      return true;
42201b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault
42211b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault    // If this isn't the final use, make sure the next insertelement is the only
42221b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault    // use. It's OK if the final constructed vector is used multiple times
42231b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault    if (!IE->hasOneUse())
42241b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault      return false;
42251b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault
42261b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault    IE = NextUse;
42271b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault  }
42281b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault
42291b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault  return false;
42301b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault}
42311b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault
4232de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar/// \brief Like findBuildVector, but looks backwards for construction of aggregate.
4233de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar///
4234de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar/// \return true if it matches.
4235de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarstatic bool findBuildAggregate(InsertValueInst *IV,
4236de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                               SmallVectorImpl<Value *> &BuildVector,
4237de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                               SmallVectorImpl<Value *> &BuildVectorOpds) {
4238de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  if (!IV->hasOneUse())
4239de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    return false;
4240de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  Value *V = IV->getAggregateOperand();
4241de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  if (!isa<UndefValue>(V)) {
4242de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    InsertValueInst *I = dyn_cast<InsertValueInst>(V);
4243de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    if (!I || !findBuildAggregate(I, BuildVector, BuildVectorOpds))
4244de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      return false;
4245de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  }
4246de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  BuildVector.push_back(IV);
4247de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  BuildVectorOpds.push_back(IV->getInsertedValueOperand());
4248de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  return true;
4249de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar}
4250de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
425124732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighoferstatic bool PhiTypeSorterFunc(Value *V, Value *V2) {
425224732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer  return V->getType() < V2->getType();
425324732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer}
425424732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer
4255f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// \brief Try and get a reduction value from a phi node.
4256f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar///
4257f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// Given a phi node \p P in a block \p ParentBB, consider possible reductions
4258f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// if they come from either \p ParentBB or a containing loop latch.
4259f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar///
4260f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// \returns A candidate reduction value if possible, or \code nullptr \endcode
4261f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// if not possible.
4262f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarstatic Value *getReductionValue(const DominatorTree *DT, PHINode *P,
4263f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar                                BasicBlock *ParentBB, LoopInfo *LI) {
4264f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  // There are situations where the reduction value is not dominated by the
4265f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  // reduction phi. Vectorizing such cases has been reported to cause
4266f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  // miscompiles. See PR25787.
4267f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  auto DominatedReduxValue = [&](Value *R) {
4268f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    return (
4269f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        dyn_cast<Instruction>(R) &&
4270f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar        DT->dominates(P->getParent(), dyn_cast<Instruction>(R)->getParent()));
4271f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  };
4272f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
4273f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  Value *Rdx = nullptr;
4274f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
4275f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  // Return the incoming value if it comes from the same BB as the phi node.
4276f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  if (P->getIncomingBlock(0) == ParentBB) {
4277f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    Rdx = P->getIncomingValue(0);
4278f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  } else if (P->getIncomingBlock(1) == ParentBB) {
4279f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    Rdx = P->getIncomingValue(1);
4280f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  }
4281f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
4282f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  if (Rdx && DominatedReduxValue(Rdx))
4283f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    return Rdx;
4284f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
4285f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  // Otherwise, check whether we have a loop latch to look at.
4286f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  Loop *BBL = LI->getLoopFor(ParentBB);
4287f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  if (!BBL)
4288f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    return nullptr;
4289f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  BasicBlock *BBLatch = BBL->getLoopLatch();
4290f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  if (!BBLatch)
4291f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    return nullptr;
4292f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
4293f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  // There is a loop latch, return the incoming value if it comes from
4294f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  // that. This reduction pattern occassionaly turns up.
4295f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  if (P->getIncomingBlock(0) == BBLatch) {
4296f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    Rdx = P->getIncomingValue(0);
4297f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  } else if (P->getIncomingBlock(1) == BBLatch) {
4298f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    Rdx = P->getIncomingValue(1);
4299f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  }
4300f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
4301f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  if (Rdx && DominatedReduxValue(Rdx))
4302f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    return Rdx;
4303f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
4304f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  return nullptr;
4305f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar}
4306f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
4307f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// \brief Attempt to reduce a horizontal reduction.
4308f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// If it is legal to match a horizontal reduction feeding
4309f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// the phi node P with reduction operators BI, then check if it
4310f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// can be done.
4311f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// \returns true if a horizontal reduction was matched and reduced.
4312f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// \returns false if a horizontal reduction was not matched.
4313f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarstatic bool canMatchHorizontalReduction(PHINode *P, BinaryOperator *BI,
4314de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                                        BoUpSLP &R, TargetTransformInfo *TTI,
4315de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                                        unsigned MinRegSize) {
4316f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  if (!ShouldVectorizeHor)
4317f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    return false;
4318f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
4319de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  HorizontalReduction HorRdx(MinRegSize);
4320f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  if (!HorRdx.matchAssociativeReduction(P, BI))
4321f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    return false;
4322f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
4323f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  // If there is a sufficient number of reduction values, reduce
4324f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  // to a nearby power-of-2. Can safely generate oversized
4325f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  // vectors and rely on the backend to split them to legal sizes.
4326f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  HorRdx.ReduxWidth =
4327f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    std::max((uint64_t)4, PowerOf2Floor(HorRdx.numReductionValues()));
4328f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
4329f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar  return HorRdx.tryToReduce(R, TTI);
4330f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar}
4331f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
4332de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
4333e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  bool Changed = false;
4334931b861e3dcf966fde46d57683013e74736eb448Nadav Rotem  SmallVector<Value *, 4> Incoming;
433524732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer  SmallSet<Value *, 16> VisitedInstrs;
433624732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer
433724732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer  bool HaveVectorizedPhiNodes = true;
433824732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer  while (HaveVectorizedPhiNodes) {
433924732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer    HaveVectorizedPhiNodes = false;
434024732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer
434124732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer    // Collect the incoming values from the PHIs.
434224732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer    Incoming.clear();
4343de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    for (Instruction &I : *BB) {
4344de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      PHINode *P = dyn_cast<PHINode>(&I);
434524732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer      if (!P)
434624732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer        break;
434716a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer
434824732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer      if (!VisitedInstrs.count(P))
434924732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer        Incoming.push_back(P);
435024732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer    }
4351931b861e3dcf966fde46d57683013e74736eb448Nadav Rotem
435224732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer    // Sort by type.
435324732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer    std::stable_sort(Incoming.begin(), Incoming.end(), PhiTypeSorterFunc);
4354931b861e3dcf966fde46d57683013e74736eb448Nadav Rotem
435524732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer    // Try to vectorize elements base on their type.
435624732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer    for (SmallVector<Value *, 4>::iterator IncIt = Incoming.begin(),
435724732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer                                           E = Incoming.end();
435824732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer         IncIt != E;) {
435924732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer
436024732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer      // Look for the next elements with the same type.
436124732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer      SmallVector<Value *, 4>::iterator SameTypeIt = IncIt;
436224732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer      while (SameTypeIt != E &&
436324732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer             (*SameTypeIt)->getType() == (*IncIt)->getType()) {
436424732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer        VisitedInstrs.insert(*SameTypeIt);
436524732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer        ++SameTypeIt;
436624732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer      }
436716a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer
436824732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer      // Try to vectorize them.
436924732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer      unsigned NumElts = (SameTypeIt - IncIt);
437024732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer      DEBUG(errs() << "SLP: Trying to vectorize starting at PHIs (" << NumElts << ")\n");
437137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R)) {
437224732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer        // Success start over because instructions might have been changed.
437324732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer        HaveVectorizedPhiNodes = true;
437416a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer        Changed = true;
437524732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer        break;
437616a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer      }
437716a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer
437836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // Start over at the next instruction of a different type (or the end).
437924732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer      IncIt = SameTypeIt;
4380931b861e3dcf966fde46d57683013e74736eb448Nadav Rotem    }
4381931b861e3dcf966fde46d57683013e74736eb448Nadav Rotem  }
4382931b861e3dcf966fde46d57683013e74736eb448Nadav Rotem
438316a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer  VisitedInstrs.clear();
438416a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer
438516a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer  for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) {
438616a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer    // We may go through BB multiple times so skip the one we have checked.
4387f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    if (!VisitedInstrs.insert(&*it).second)
438816a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer      continue;
438916a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer
439016a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer    if (isa<DbgInfoIntrinsic>(it))
43910b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem      continue;
4392e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem
4393e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    // Try to vectorize reductions that use PHINodes.
439416a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer    if (PHINode *P = dyn_cast<PHINode>(it)) {
4395e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem      // Check that the PHI is a reduction PHI.
43960b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem      if (P->getNumIncomingValues() != 2)
43970b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem        return Changed;
4398f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
4399f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar      Value *Rdx = getReductionValue(DT, P, BB, LI);
4400f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar
4401e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem      // Check if this is a Binary Operator.
4402e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem      BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);
4403e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem      if (!BI)
44048383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem        continue;
4405196ee11f85ce0148d2c2e33fbe1f1171ac5a8828Nadav Rotem
4406a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      // Try to match and vectorize a horizontal reduction.
4407de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      if (canMatchHorizontalReduction(P, BI, R, TTI, R.getMinVecRegSize())) {
4408a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        Changed = true;
4409a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        it = BB->begin();
4410a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        e = BB->end();
4411a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        continue;
4412a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer      }
4413a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4414a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer     Value *Inst = BI->getOperand(0);
44150b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem      if (Inst == P)
44160b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem        Inst = BI->getOperand(1);
441753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem
441816a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer      if (tryToVectorize(dyn_cast<BinaryOperator>(Inst), R)) {
441916a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer        // We would like to start over since some instructions are deleted
442016a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer        // and the iterator may become invalid value.
442116a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer        Changed = true;
442216a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer        it = BB->begin();
442316a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer        e = BB->end();
4424a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        continue;
442516a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer      }
4426a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
4427e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem      continue;
4428e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    }
4429196ee11f85ce0148d2c2e33fbe1f1171ac5a8828Nadav Rotem
44309660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer    if (ShouldStartVectorizeHorAtStore)
44319660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer      if (StoreInst *SI = dyn_cast<StoreInst>(it))
44329660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer        if (BinaryOperator *BinOp =
44339660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer                dyn_cast<BinaryOperator>(SI->getValueOperand())) {
4434de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          if (canMatchHorizontalReduction(nullptr, BinOp, R, TTI,
4435de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                                          R.getMinVecRegSize()) ||
4436f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar              tryToVectorize(BinOp, R)) {
44379660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer            Changed = true;
44389660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer            it = BB->begin();
44399660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer            e = BB->end();
44409660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer            continue;
44419660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer          }
4442a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer        }
4443a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer
444437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // Try to vectorize horizontal reductions feeding into a return.
444537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (ReturnInst *RI = dyn_cast<ReturnInst>(it))
444637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (RI->getNumOperands() != 0)
444737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        if (BinaryOperator *BinOp =
444837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                dyn_cast<BinaryOperator>(RI->getOperand(0))) {
444937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          DEBUG(dbgs() << "SLP: Found a return to vectorize.\n");
445037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          if (tryToVectorizePair(BinOp->getOperand(0),
445137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                                 BinOp->getOperand(1), R)) {
445237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            Changed = true;
445337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            it = BB->begin();
445437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            e = BB->end();
445537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            continue;
445637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          }
445737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        }
445837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
4459e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    // Try to vectorize trees that start at compare instructions.
446016a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer    if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
4461e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem      if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) {
446216a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer        Changed = true;
446316a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer        // We would like to start over since some instructions are deleted
446416a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer        // and the iterator may become invalid value.
446516a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer        it = BB->begin();
446616a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer        e = BB->end();
4467e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem        continue;
4468e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem      }
446916a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer
447016a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer      for (int i = 0; i < 2; ++i) {
447137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i))) {
447237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          if (tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R)) {
447337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            Changed = true;
447437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            // We would like to start over since some instructions are deleted
447537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            // and the iterator may become invalid value.
447637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            it = BB->begin();
447737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            e = BB->end();
4478ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines            break;
447937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          }
448037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        }
448116a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer      }
4482e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem      continue;
44838383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem    }
44841b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault
44851b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault    // Try to vectorize trees that start at insertelement instructions.
4486dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    if (InsertElementInst *FirstInsertElem = dyn_cast<InsertElementInst>(it)) {
4487dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      SmallVector<Value *, 16> BuildVector;
4488dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      SmallVector<Value *, 16> BuildVectorOpds;
4489dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      if (!findBuildVector(FirstInsertElem, BuildVector, BuildVectorOpds))
44901b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault        continue;
44911b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault
4492dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      // Vectorize starting with the build vector operands ignoring the
4493dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      // BuildVector instructions for the purpose of scheduling and user
4494dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      // extraction.
4495dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      if (tryToVectorizeList(BuildVectorOpds, R, BuildVector)) {
44961b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault        Changed = true;
44971b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault        it = BB->begin();
44981b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault        e = BB->end();
44991b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault      }
45001b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault
45011b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault      continue;
45021b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault    }
4503de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
4504de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // Try to vectorize trees that start at insertvalue instructions feeding into
4505de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // a store.
4506de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    if (StoreInst *SI = dyn_cast<StoreInst>(it)) {
4507de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      if (InsertValueInst *LastInsertValue = dyn_cast<InsertValueInst>(SI->getValueOperand())) {
4508de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        const DataLayout &DL = BB->getModule()->getDataLayout();
4509de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        if (R.canMapToVector(SI->getValueOperand()->getType(), DL)) {
4510de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          SmallVector<Value *, 16> BuildVector;
4511de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          SmallVector<Value *, 16> BuildVectorOpds;
4512de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          if (!findBuildAggregate(LastInsertValue, BuildVector, BuildVectorOpds))
4513de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar            continue;
4514de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
4515de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          DEBUG(dbgs() << "SLP: store of array mappable to vector: " << *SI << "\n");
4516de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          if (tryToVectorizeList(BuildVectorOpds, R, BuildVector, false)) {
4517de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar            Changed = true;
4518de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar            it = BB->begin();
4519de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar            e = BB->end();
4520de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          }
4521de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          continue;
4522de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        }
4523de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      }
4524de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    }
45258383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem  }
45268383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
4527e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  return Changed;
4528e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem}
45298383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
4530de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {
4531de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  auto Changed = false;
4532de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  for (auto &Entry : GEPs) {
4533de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
4534de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // If the getelementptr list has fewer than two elements, there's nothing
4535de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // to do.
4536de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    if (Entry.second.size() < 2)
4537de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      continue;
4538de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
4539de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    DEBUG(dbgs() << "SLP: Analyzing a getelementptr list of length "
4540de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar                 << Entry.second.size() << ".\n");
4541de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
4542de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // We process the getelementptr list in chunks of 16 (like we do for
4543de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    // stores) to minimize compile-time.
4544de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    for (unsigned BI = 0, BE = Entry.second.size(); BI < BE; BI += 16) {
4545de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      auto Len = std::min<unsigned>(BE - BI, 16);
4546de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      auto GEPList = makeArrayRef(&Entry.second[BI], Len);
4547de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
4548de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // Initialize a set a candidate getelementptrs. Note that we use a
4549de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // SetVector here to preserve program order. If the index computations
4550de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // are vectorizable and begin with loads, we want to minimize the chance
4551de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // of having to reorder them later.
4552de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      SetVector<Value *> Candidates(GEPList.begin(), GEPList.end());
4553de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
4554de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // Some of the candidates may have already been vectorized after we
4555de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // initially collected them. If so, the WeakVHs will have nullified the
4556de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // values, so remove them from the set of candidates.
4557de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      Candidates.remove(nullptr);
4558de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
4559de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // Remove from the set of candidates all pairs of getelementptrs with
4560de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // constant differences. Such getelementptrs are likely not good
4561de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // candidates for vectorization in a bottom-up phase since one can be
4562de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // computed from the other. We also ensure all candidate getelementptr
4563de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // indices are unique.
4564de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      for (int I = 0, E = GEPList.size(); I < E && Candidates.size() > 1; ++I) {
4565de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        auto *GEPI = cast<GetElementPtrInst>(GEPList[I]);
4566de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        if (!Candidates.count(GEPI))
4567de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          continue;
4568de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        auto *SCEVI = SE->getSCEV(GEPList[I]);
4569de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        for (int J = I + 1; J < E && Candidates.size() > 1; ++J) {
4570de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          auto *GEPJ = cast<GetElementPtrInst>(GEPList[J]);
4571de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          auto *SCEVJ = SE->getSCEV(GEPList[J]);
4572de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          if (isa<SCEVConstant>(SE->getMinusSCEV(SCEVI, SCEVJ))) {
4573de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar            Candidates.remove(GEPList[I]);
4574de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar            Candidates.remove(GEPList[J]);
4575de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          } else if (GEPI->idx_begin()->get() == GEPJ->idx_begin()->get()) {
4576de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar            Candidates.remove(GEPList[J]);
4577de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar          }
4578de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        }
4579de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      }
4580de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
4581de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // We break out of the above computation as soon as we know there are
4582de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // fewer than two candidates remaining.
4583de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      if (Candidates.size() < 2)
4584de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        continue;
4585de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
4586de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // Add the single, non-constant index of each candidate to the bundle. We
4587de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // ensured the indices met these constraints when we originally collected
4588de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // the getelementptrs.
4589de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      SmallVector<Value *, 16> Bundle(Candidates.size());
4590de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      auto BundleIndex = 0u;
4591de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      for (auto *V : Candidates) {
4592de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        auto *GEP = cast<GetElementPtrInst>(V);
4593de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        auto *GEPIdx = GEP->idx_begin()->get();
4594de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        assert(GEP->getNumIndices() == 1 || !isa<Constant>(GEPIdx));
4595de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar        Bundle[BundleIndex++] = GEPIdx;
4596de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      }
4597de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
4598de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // Try and vectorize the indices. We are currently only interested in
4599de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // gather-like cases of the form:
4600de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      //
4601de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // ... = g[a[0] - b[0]] + g[a[1] - b[1]] + ...
4602de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      //
4603de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // where the loads of "a", the loads of "b", and the subtractions can be
4604de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // performed in parallel. It's likely that detecting this pattern in a
4605de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // bottom-up phase will be simpler and less costly than building a
4606de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      // full-blown top-down phase beginning at the consecutive loads.
4607de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar      Changed |= tryToVectorizeList(Bundle, R);
4608de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    }
4609de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  }
4610de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  return Changed;
4611de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar}
4612de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
4613de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
4614e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  bool Changed = false;
4615e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  // Attempt to sort and vectorize each of the store-groups.
4616de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  for (StoreListMap::iterator it = Stores.begin(), e = Stores.end(); it != e;
4617de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar       ++it) {
4618e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem    if (it->second.size() < 2)
4619e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem      continue;
4620f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem
46210b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem    DEBUG(dbgs() << "SLP: Analyzing a store chain of length "
462221508bf853354343266dbe6d830ff30bed006a68Nadav Rotem          << it->second.size() << ".\n");
46238383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
462421508bf853354343266dbe6d830ff30bed006a68Nadav Rotem    // Process the stores in chunks of 16.
4625f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    // TODO: The limit of 16 inhibits greater vectorization factors.
4626f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    //       For example, AVX2 supports v32i8. Increasing this limit, however,
4627f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar    //       may cause a significant compile-time increase.
462821508bf853354343266dbe6d830ff30bed006a68Nadav Rotem    for (unsigned CI = 0, CE = it->second.size(); CI < CE; CI+=16) {
462921508bf853354343266dbe6d830ff30bed006a68Nadav Rotem      unsigned Len = std::min<unsigned>(CE - CI, 16);
463037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      Changed |= vectorizeStores(makeArrayRef(&it->second[CI], Len),
463137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                                 -SLPCostThreshold, R);
463221508bf853354343266dbe6d830ff30bed006a68Nadav Rotem    }
46338383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem  }
4634e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem  return Changed;
4635e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem}
46368383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
46378383b539ff4c039108ee0c202a27b787621d96cfNadav Rotemchar SLPVectorizer::ID = 0;
46388383b539ff4c039108ee0c202a27b787621d96cfNadav Rotemstatic const char lv_name[] = "SLP Vectorizer";
46398383b539ff4c039108ee0c202a27b787621d96cfNadav RotemINITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false)
4640f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga NainarINITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
4641ebe69fe11e48d322045d5949c83283927a0d790bStephen HinesINITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
4642ebe69fe11e48d322045d5949c83283927a0d790bStephen HinesINITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
4643f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga NainarINITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
46448383b539ff4c039108ee0c202a27b787621d96cfNadav RotemINITIALIZE_PASS_DEPENDENCY(LoopSimplify)
4645de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga NainarINITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass)
46468383b539ff4c039108ee0c202a27b787621d96cfNadav RotemINITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false)
46478383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem
46488383b539ff4c039108ee0c202a27b787621d96cfNadav Rotemnamespace llvm {
46490b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav RotemPass *createSLPVectorizerPass() { return new SLPVectorizer(); }
46508383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem}
4651