SLPVectorizer.cpp revision 0c7f116bb6950ef819323d855415b2f2b0aad987
18383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem//===- SLPVectorizer.cpp - A bottom up SLP Vectorizer ---------------------===// 28383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// 38383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// The LLVM Compiler Infrastructure 48383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// 58383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// This file is distributed under the University of Illinois Open Source 68383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// License. See LICENSE.TXT for details. 78383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// 88383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem//===----------------------------------------------------------------------===// 98383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// This pass implements the Bottom Up SLP vectorizer. It detects consecutive 108383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// stores that can be put together into vector-stores. Next, it attempts to 118383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// construct vectorizable tree using the use-def chains. If a profitable tree 128383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// was found, the SLP vectorizer performs vectorization on the tree. 138383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// 148383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// The pass is inspired by the work described in the paper: 158383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks. 168383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// 178383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem//===----------------------------------------------------------------------===// 188383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/Transforms/Vectorize.h" 193f75c6cfb575917c8c112b2de9593cb860f79e56Nadav Rotem#include "llvm/ADT/MapVector.h" 204c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar#include "llvm/ADT/Optional.h" 216959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem#include "llvm/ADT/PostOrderIterator.h" 2253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem#include "llvm/ADT/SetVector.h" 2337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/ADT/Statistic.h" 248383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/Analysis/AliasAnalysis.h" 25ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines#include "llvm/Analysis/AssumptionCache.h" 2637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/Analysis/CodeMetrics.h" 2736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/Analysis/LoopInfo.h" 288383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/Analysis/ScalarEvolution.h" 2953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem#include "llvm/Analysis/ScalarEvolutionExpressions.h" 308383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/Analysis/TargetTransformInfo.h" 316623d050c6f4351293bc1849e49bc0e37ec04596Benjamin Kramer#include "llvm/Analysis/ValueTracking.h" 328383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/IR/DataLayout.h" 3336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/IR/Dominators.h" 3436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/IR/IRBuilder.h" 358383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/IR/Instructions.h" 36f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem#include "llvm/IR/IntrinsicInst.h" 378383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/IR/Module.h" 38dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/IR/NoFolder.h" 398383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/IR/Type.h" 408383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/IR/Value.h" 4136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/IR/Verifier.h" 428383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/Pass.h" 438383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/Support/CommandLine.h" 448383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/Support/Debug.h" 458383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/Support/raw_ostream.h" 46dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/Transforms/Utils/VectorUtils.h" 4753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem#include <algorithm> 488383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include <map> 4937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include <memory> 508383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 518383b539ff4c039108ee0c202a27b787621d96cfNadav Rotemusing namespace llvm; 528383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 53dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define SV_NAME "slp-vectorizer" 54dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define DEBUG_TYPE "SLP" 55dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 5637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen HinesSTATISTIC(NumVectorInstructions, "Number of vector instructions generated"); 5737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 588383b539ff4c039108ee0c202a27b787621d96cfNadav Rotemstatic cl::opt<int> 590b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden, 6008e20fbea18922e9f5f1cfb0ea7931c90006100eNadav Rotem cl::desc("Only vectorize if you gain more than this " 6108e20fbea18922e9f5f1cfb0ea7931c90006100eNadav Rotem "number ")); 62a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 63a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighoferstatic cl::opt<bool> 64a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold SchwaighoferShouldVectorizeHor("slp-vectorize-hor", cl::init(false), cl::Hidden, 65a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer cl::desc("Attempt to vectorize horizontal reductions")); 66a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 679660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighoferstatic cl::opt<bool> ShouldStartVectorizeHorAtStore( 689660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer "slp-vectorize-hor-store", cl::init(false), cl::Hidden, 699660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer cl::desc( 709660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer "Attempt to vectorize horizontal reductions feeding into a store")); 719660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer 728383b539ff4c039108ee0c202a27b787621d96cfNadav Rotemnamespace { 738383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 74b8f54d86f28f84103a5e8dff5d3f3a3b493aaaa7Craig Topperstatic const unsigned MinVecRegSize = 128; 7553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 7625961b469a9debe69b915bcb4fa49d35d2ee9544Nadav Rotemstatic const unsigned RecursionMaxDepth = 12; 7753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 78ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// Limit the number of alias checks. The limit is chosen so that 79ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// it has no negative effect on the llvm benchmarks. 80ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesstatic const unsigned AliasedCheckLimit = 10; 81ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 82ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// Another limit for the alias checks: The maximum distance between load/store 83ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// instructions where alias checks are done. 84ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// This limit is useful for very large basic blocks. 85ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesstatic const unsigned MaxMemDepDistance = 160; 86ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 87ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// \brief Predicate for the element types that the SLP vectorizer supports. 88ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// 89ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// The most important thing to filter here are types which are invalid in LLVM 90ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// vectors. We also filter target specific types which have absolutely no 91ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// meaningful vectorization path such as x86_fp80 and ppc_f128. This just 92ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// avoids spending time checking the cost model and realizing that they will 93ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// be inevitably scalarized. 94ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesstatic bool isValidElementType(Type *Ty) { 95ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return VectorType::isValidElementType(Ty) && !Ty->isX86_FP80Ty() && 96ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines !Ty->isPPC_FP128Ty(); 97ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines} 98ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 99369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// \returns the parent basic block if all of the instructions in \p VL 100369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// are in the same block or null otherwise. 101369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemstatic BasicBlock *getSameBlock(ArrayRef<Value *> VL) { 102369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Instruction *I0 = dyn_cast<Instruction>(VL[0]); 103369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (!I0) 104dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return nullptr; 105369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem BasicBlock *BB = I0->getParent(); 106369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (int i = 1, e = VL.size(); i < e; i++) { 107369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Instruction *I = dyn_cast<Instruction>(VL[i]); 108369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (!I) 109dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return nullptr; 110369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 111369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (BB != I->getParent()) 112dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return nullptr; 113369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 114369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return BB; 115369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 116369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 117369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// \returns True if all of the values in \p VL are constants. 118369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemstatic bool allConstant(ArrayRef<Value *> VL) { 119369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 0, e = VL.size(); i < e; ++i) 120369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (!isa<Constant>(VL[i])) 121369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return false; 122369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return true; 123369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 124369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 125369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// \returns True if all of the values in \p VL are identical. 126369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemstatic bool isSplat(ArrayRef<Value *> VL) { 127369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 1, e = VL.size(); i < e; ++i) 128369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (VL[i] != VL[0]) 129369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return false; 130369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return true; 131369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 132369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 133c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines///\returns Opcode that can be clubbed with \p Op to create an alternate 134c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines/// sequence which can later be merged as a ShuffleVector instruction. 135c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hinesstatic unsigned getAltOpcode(unsigned Op) { 136c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines switch (Op) { 137c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case Instruction::FAdd: 138c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return Instruction::FSub; 139c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case Instruction::FSub: 140c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return Instruction::FAdd; 141c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case Instruction::Add: 142c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return Instruction::Sub; 143c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case Instruction::Sub: 144c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return Instruction::Add; 145c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines default: 146c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return 0; 147c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 148c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines} 149c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 150c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines///\returns bool representing if Opcode \p Op can be part 151c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines/// of an alternate sequence which can later be merged as 152c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines/// a ShuffleVector instruction. 153c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hinesstatic bool canCombineAsAltInst(unsigned Op) { 154c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (Op == Instruction::FAdd || Op == Instruction::FSub || 155c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Op == Instruction::Sub || Op == Instruction::Add) 156c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return true; 157c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return false; 158c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines} 159c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 160c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines/// \returns ShuffleVector instruction if intructions in \p VL have 161c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines/// alternate fadd,fsub / fsub,fadd/add,sub/sub,add sequence. 162c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines/// (i.e. e.g. opcodes of fadd,fsub,fadd,fsub...) 163c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hinesstatic unsigned isAltInst(ArrayRef<Value *> VL) { 164c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Instruction *I0 = dyn_cast<Instruction>(VL[0]); 165c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines unsigned Opcode = I0->getOpcode(); 166c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines unsigned AltOpcode = getAltOpcode(Opcode); 167c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines for (int i = 1, e = VL.size(); i < e; i++) { 168c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Instruction *I = dyn_cast<Instruction>(VL[i]); 169c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (!I || I->getOpcode() != ((i & 1) ? AltOpcode : Opcode)) 170c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return 0; 171c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 172c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return Instruction::ShuffleVector; 173c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines} 174c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 175369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// \returns The opcode if all of the Instructions in \p VL have the same 176369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// opcode, or zero. 177369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemstatic unsigned getSameOpcode(ArrayRef<Value *> VL) { 178369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Instruction *I0 = dyn_cast<Instruction>(VL[0]); 179369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (!I0) 180369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return 0; 181369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem unsigned Opcode = I0->getOpcode(); 182369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (int i = 1, e = VL.size(); i < e; i++) { 183369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Instruction *I = dyn_cast<Instruction>(VL[i]); 184c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (!I || Opcode != I->getOpcode()) { 185c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (canCombineAsAltInst(Opcode) && i == 1) 186c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return isAltInst(VL); 187369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return 0; 188c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 189369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 190369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return Opcode; 191369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 192369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 19337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// Get the intersection (logical and) of all of the potential IR flags 19437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// of each scalar operation (VL) that will be converted into a vector (I). 19537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// Flag set: NSW, NUW, exact, and all of fast-math. 19637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesstatic void propagateIRFlags(Value *I, ArrayRef<Value *> VL) { 19737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (auto *VecOp = dyn_cast<BinaryOperator>(I)) { 19837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (auto *Intersection = dyn_cast<BinaryOperator>(VL[0])) { 19937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Intersection is initialized to the 0th scalar, 20037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // so start counting from index '1'. 20137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (int i = 1, e = VL.size(); i < e; ++i) { 20237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (auto *Scalar = dyn_cast<BinaryOperator>(VL[i])) 20337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Intersection->andIRFlags(Scalar); 20437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 20537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines VecOp->copyIRFlags(Intersection); 20637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 20737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 20837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 20937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 210fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling/// \returns \p I after propagating metadata from \p VL. 211fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendlingstatic Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL) { 212fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling Instruction *I0 = cast<Instruction>(VL[0]); 213fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata; 214fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling I0->getAllMetadataOtherThanDebugLoc(Metadata); 215fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling 216fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling for (unsigned i = 0, n = Metadata.size(); i != n; ++i) { 217fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling unsigned Kind = Metadata[i].first; 218fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling MDNode *MD = Metadata[i].second; 219fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling 220fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling for (int i = 1, e = VL.size(); MD && i != e; i++) { 221fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling Instruction *I = cast<Instruction>(VL[i]); 222fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling MDNode *IMD = I->getMetadata(Kind); 223fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling 224fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling switch (Kind) { 225fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling default: 226dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines MD = nullptr; // Remove unknown metadata 227fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling break; 228fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling case LLVMContext::MD_tbaa: 229fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling MD = MDNode::getMostGenericTBAA(MD, IMD); 230fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling break; 23137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case LLVMContext::MD_alias_scope: 232ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines MD = MDNode::getMostGenericAliasScope(MD, IMD); 233ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines break; 23437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case LLVMContext::MD_noalias: 23537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines MD = MDNode::intersect(MD, IMD); 23637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines break; 237fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling case LLVMContext::MD_fpmath: 238fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling MD = MDNode::getMostGenericFPMath(MD, IMD); 239fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling break; 240fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling } 241fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling } 242fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling I->setMetadata(Kind, MD); 243fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling } 244fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling return I; 245fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling} 246fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling 247369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// \returns The type that all of the values in \p VL have or null if there 248369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// are different types. 249369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemstatic Type* getSameType(ArrayRef<Value *> VL) { 250369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Type *Ty = VL[0]->getType(); 251369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (int i = 1, e = VL.size(); i < e; i++) 25230bbf070a2683fc95c105ad78f921ca59c56bb35Nadav Rotem if (VL[i]->getType() != Ty) 253dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return nullptr; 254369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 255369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return Ty; 256369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 257369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 258369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// \returns True if the ExtractElement instructions in VL can be vectorized 259369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// to use the original vector. 260369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemstatic bool CanReuseExtract(ArrayRef<Value *> VL) { 261369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem assert(Instruction::ExtractElement == getSameOpcode(VL) && "Invalid opcode"); 262369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Check if all of the extracts come from the same vector and from the 263369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // correct offset. 264369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *VL0 = VL[0]; 265369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ExtractElementInst *E0 = cast<ExtractElementInst>(VL0); 266369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *Vec = E0->getOperand(0); 267369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 268369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // We have to extract from the same vector type. 269369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem unsigned NElts = Vec->getType()->getVectorNumElements(); 270369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 271369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (NElts != VL.size()) 272369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return false; 273369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 274369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Check that all of the indices extract from the correct offset. 275369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ConstantInt *CI = dyn_cast<ConstantInt>(E0->getOperand(1)); 276369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (!CI || CI->getZExtValue()) 277369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return false; 278369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 279369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 1, e = VL.size(); i < e; ++i) { 280369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ExtractElementInst *E = cast<ExtractElementInst>(VL[i]); 281369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ConstantInt *CI = dyn_cast<ConstantInt>(E->getOperand(1)); 282369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 283369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (!CI || CI->getZExtValue() != i || E->getOperand(0) != Vec) 284369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return false; 285369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 286369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 287369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return true; 288369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 289369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 29037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// \returns True if in-tree use also needs extract. This refers to 29137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// possible scalar operand in vectorized instruction. 29237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesstatic bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst, 29337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines TargetLibraryInfo *TLI) { 29437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 29537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned Opcode = UserInst->getOpcode(); 29637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines switch (Opcode) { 29737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case Instruction::Load: { 29837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines LoadInst *LI = cast<LoadInst>(UserInst); 29937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return (LI->getPointerOperand() == Scalar); 30037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 30137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case Instruction::Store: { 30237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines StoreInst *SI = cast<StoreInst>(UserInst); 30337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return (SI->getPointerOperand() == Scalar); 30437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 30537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case Instruction::Call: { 30637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines CallInst *CI = cast<CallInst>(UserInst); 30737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI); 30837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (hasVectorInstrinsicScalarOpd(ID, 1)) { 30937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return (CI->getArgOperand(1) == Scalar); 31037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 31137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 31237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines default: 31337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 31437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 31537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 31637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 317ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// \returns the AA location that is being access by the instruction. 318ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesstatic AliasAnalysis::Location getLocation(Instruction *I, AliasAnalysis *AA) { 319ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (StoreInst *SI = dyn_cast<StoreInst>(I)) 320ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return AA->getLocation(SI); 321ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (LoadInst *LI = dyn_cast<LoadInst>(I)) 322ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return AA->getLocation(LI); 323ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return AliasAnalysis::Location(); 324ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines} 325ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 326ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// \returns True if the instruction is not a volatile or atomic load/store. 327ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesstatic bool isSimple(Instruction *I) { 328ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (LoadInst *LI = dyn_cast<LoadInst>(I)) 329ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return LI->isSimple(); 330ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (StoreInst *SI = dyn_cast<StoreInst>(I)) 331ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return SI->isSimple(); 332ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) 333ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return !MI->isVolatile(); 334ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return true; 335ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines} 336ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 337369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// Bottom Up SLP Vectorizer. 338369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemclass BoUpSLP { 339369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotempublic: 34053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem typedef SmallVector<Value *, 8> ValueList; 34153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem typedef SmallVector<Instruction *, 16> InstrList; 34253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem typedef SmallPtrSet<Value *, 16> ValueSet; 34353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem typedef SmallVector<StoreInst *, 8> StoreList; 34453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 3454c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti, 3464c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li, 3474c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar DominatorTree *Dt, AssumptionCache *AC) 348ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines : NumLoadsWantToKeepOrder(0), NumLoadsWantToChangeOrder(0), F(Func), 3494c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), 35037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Builder(Se->getContext()) { 351ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines CodeMetrics::collectEphemeralValues(F, AC, EphValues); 35237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 35353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 35453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem /// \brief Vectorize the tree that starts with the elements in \p VL. 355a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer /// Returns the vectorized root. 356a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Value *vectorizeTree(); 35753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 35837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// \returns the cost incurred by unwanted spills and fills, caused by 35937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// holding live values over call sites. 36037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int getSpillCost(); 36137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 36253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem /// \returns the vectorization cost of the subtree that starts at \p VL. 36353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem /// A negative number means that this is profitable. 364369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem int getTreeCost(); 365369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 366dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines /// Construct a vectorizable tree that starts at \p Roots, ignoring users for 367dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines /// the purpose of scheduling and extraction in the \p UserIgnoreLst. 368dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines void buildTree(ArrayRef<Value *> Roots, 369dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines ArrayRef<Value *> UserIgnoreLst = None); 370369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 371369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// Clear the internal data structures that are created by 'buildTree'. 372369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem void deleteTree() { 373369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem VectorizableTree.clear(); 374369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ScalarToTreeEntry.clear(); 375369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem MustGather.clear(); 376a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem ExternalUses.clear(); 37737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines NumLoadsWantToKeepOrder = 0; 37837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines NumLoadsWantToChangeOrder = 0; 37937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (auto &Iter : BlocksSchedules) { 38037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BlockScheduling *BS = Iter.second.get(); 38137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS->clear(); 38237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 383369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 38453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 385369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// \returns true if the memory operations A and B are consecutive. 3864c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL); 387369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 388369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// \brief Perform LICM and CSE on the newly generated gather sequences. 389369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem void optimizeGatherSequence(); 390c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 39137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// \returns true if it is benefitial to reverse the vector order. 39237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines bool shouldReorder() const { 39337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return NumLoadsWantToChangeOrder > NumLoadsWantToKeepOrder; 39437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 39537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 396369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemprivate: 397369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem struct TreeEntry; 39853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 399369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// \returns the cost of the vectorizable entry. 400369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem int getEntryCost(TreeEntry *E); 40153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 402369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// This is the recursive part of buildTree. 403369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth); 40453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 40562657090de3a5731bf644437701ccd78c247119fNadav Rotem /// Vectorize a single entry in the tree. 406369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *vectorizeTree(TreeEntry *E); 407369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 40862657090de3a5731bf644437701ccd78c247119fNadav Rotem /// Vectorize a single entry in the tree, starting in \p VL. 409369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *vectorizeTree(ArrayRef<Value *> VL); 41053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 41162657090de3a5731bf644437701ccd78c247119fNadav Rotem /// \returns the pointer to the vectorized value if \p VL is already 41262657090de3a5731bf644437701ccd78c247119fNadav Rotem /// vectorized, or NULL. They may happen in cycles. 4136a804acc4ae77c014e4ef97c37f8e720ef360394Matt Arsenault Value *alreadyVectorized(ArrayRef<Value *> VL) const; 41462657090de3a5731bf644437701ccd78c247119fNadav Rotem 415369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// \brief Take the pointer operand from the Load/Store instruction. 416369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// \returns NULL if this is not a valid Load/Store instruction. 417369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem static Value *getPointerOperand(Value *I); 41853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 419369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// \brief Take the address space operand from the Load/Store instruction. 420369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// \returns -1 if this is not a valid Load/Store instruction. 421369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem static unsigned getAddressSpaceOperand(Value *I); 42253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 42353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem /// \returns the scalarization cost for this type. Scalarization in this 42453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem /// context means the creation of vectors from a group of scalars. 42553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem int getGatherCost(Type *Ty); 42653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 427d6f0c34273dd3536102f2d643403252468dfc4a3Nadav Rotem /// \returns the scalarization cost for this list of values. Assuming that 428d6f0c34273dd3536102f2d643403252468dfc4a3Nadav Rotem /// this subtree gets vectorized, we may need to extract the values from the 429d6f0c34273dd3536102f2d643403252468dfc4a3Nadav Rotem /// roots. This method calculates the cost of extracting the values. 430d6f0c34273dd3536102f2d643403252468dfc4a3Nadav Rotem int getGatherCost(ArrayRef<Value *> VL); 431d6f0c34273dd3536102f2d643403252468dfc4a3Nadav Rotem 4324b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault /// \brief Set the Builder insert point to one after the last instruction in 4334b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault /// the bundle 4344b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault void setInsertPointAfterBundle(ArrayRef<Value *> VL); 4354b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault 43653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem /// \returns a vector from a collection of scalars in \p VL. 43753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem Value *Gather(ArrayRef<Value *> VL, VectorType *Ty); 43853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 439d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang /// \returns whether the VectorizableTree is fully vectoriable and will 440d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang /// be beneficial even the tree height is tiny. 44136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool isFullyVectorizableTinyTree(); 442d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang 443ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// \reorder commutative operands in alt shuffle if they result in 444ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// vectorized code. 445ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines void reorderAltShuffleOperands(ArrayRef<Value *> VL, 446ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines SmallVectorImpl<Value *> &Left, 447ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines SmallVectorImpl<Value *> &Right); 448ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// \reorder commutative operands to get better probability of 449ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// generating vectorized code. 450ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL, 451ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines SmallVectorImpl<Value *> &Left, 452ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines SmallVectorImpl<Value *> &Right); 453369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem struct TreeEntry { 45437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines TreeEntry() : Scalars(), VectorizedValue(nullptr), 455369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem NeedToGather(0) {} 45653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 457369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// \returns true if the scalars in VL are equal to this entry. 4586a804acc4ae77c014e4ef97c37f8e720ef360394Matt Arsenault bool isSame(ArrayRef<Value *> VL) const { 459369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem assert(VL.size() == Scalars.size() && "Invalid size"); 4606623d050c6f4351293bc1849e49bc0e37ec04596Benjamin Kramer return std::equal(VL.begin(), VL.end(), Scalars.begin()); 461369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 462369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 463369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// A vector of scalars. 464369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ValueList Scalars; 465369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 466369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// The Scalars are vectorized into this value. It is initialized to Null. 467369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *VectorizedValue; 468369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 469369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// Do we need to gather this sequence ? 470369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem bool NeedToGather; 471369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem }; 47253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 473369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// Create a new VectorizableTree entry. 474369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem TreeEntry *newTreeEntry(ArrayRef<Value *> VL, bool Vectorized) { 475369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem VectorizableTree.push_back(TreeEntry()); 476369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem int idx = VectorizableTree.size() - 1; 477369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem TreeEntry *Last = &VectorizableTree[idx]; 478369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end()); 479369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Last->NeedToGather = !Vectorized; 480369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (Vectorized) { 481369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (int i = 0, e = VL.size(); i != e; ++i) { 482369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem assert(!ScalarToTreeEntry.count(VL[i]) && "Scalar already in tree!"); 483369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ScalarToTreeEntry[VL[i]] = idx; 484369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 485369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } else { 486369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem MustGather.insert(VL.begin(), VL.end()); 487369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 488369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return Last; 489ac2678684689448d1c88f644f3fa88aa4f93997cNadav Rotem } 49037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 49153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem /// -- Vectorization State -- 492369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// Holds all of the tree entries. 493369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem std::vector<TreeEntry> VectorizableTree; 49453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 495369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// Maps a specific scalar to its tree entry. 496369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem SmallDenseMap<Value*, int> ScalarToTreeEntry; 49753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 498369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// A list of scalars that we found that we need to keep as scalars. 49953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem ValueSet MustGather; 50053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 501a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem /// This POD struct describes one external user in the vectorized tree. 502a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem struct ExternalUser { 503a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem ExternalUser (Value *S, llvm::User *U, int L) : 504a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem Scalar(S), User(U), Lane(L){}; 505a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem // Which scalar in our function. 506a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem Value *Scalar; 507a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem // Which user that uses the scalar. 508a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem llvm::User *User; 509a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem // Which lane does the scalar belong to. 510a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem int Lane; 511a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem }; 512a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem typedef SmallVector<ExternalUser, 16> UserList; 513a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 514ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// Checks if two instructions may access the same memory. 515ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// 516ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// \p Loc1 is the location of \p Inst1. It is passed explicitly because it 517ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// is invariant in the calling loop. 518ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines bool isAliased(const AliasAnalysis::Location &Loc1, Instruction *Inst1, 519ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Instruction *Inst2) { 520ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 521ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // First check if the result is already in the cache. 522ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines AliasCacheKey key = std::make_pair(Inst1, Inst2); 523ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Optional<bool> &result = AliasCache[key]; 524ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (result.hasValue()) { 525ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return result.getValue(); 526ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 527ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines AliasAnalysis::Location Loc2 = getLocation(Inst2, AA); 528ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines bool aliased = true; 529ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (Loc1.Ptr && Loc2.Ptr && isSimple(Inst1) && isSimple(Inst2)) { 530ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Do the alias check. 531ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines aliased = AA->alias(Loc1, Loc2); 532ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 533ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Store the result in the cache. 534ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines result = aliased; 535ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return aliased; 536ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 537ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 538ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines typedef std::pair<Instruction *, Instruction *> AliasCacheKey; 539ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 540ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// Cache for alias results. 541ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// TODO: consider moving this to the AliasAnalysis itself. 542ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines DenseMap<AliasCacheKey, Optional<bool>> AliasCache; 543ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 544ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// Removes an instruction from its block and eventually deletes it. 545ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// It's like Instruction::eraseFromParent() except that the actual deletion 546ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// is delayed until BoUpSLP is destructed. 547ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// This is required to ensure that there are no incorrect collisions in the 548ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// AliasCache, which can happen if a new instruction is allocated at the 549ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// same address as a previously deleted instruction. 550ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines void eraseInstruction(Instruction *I) { 551ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines I->removeFromParent(); 552ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines I->dropAllReferences(); 553ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines DeletedInstructions.push_back(std::unique_ptr<Instruction>(I)); 554ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 555ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 556ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// Temporary store for deleted instructions. Instructions will be deleted 557ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// eventually when the BoUpSLP is destructed. 558ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines SmallVector<std::unique_ptr<Instruction>, 8> DeletedInstructions; 559ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 560a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem /// A list of values that need to extracted out of the tree. 561a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem /// This list holds pairs of (Internal Scalar : External User). 562a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem UserList ExternalUses; 563a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 56437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Values used only by @llvm.assume calls. 56537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SmallPtrSet<const Value *, 32> EphValues; 56653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 56753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem /// Holds all of the instructions that we gathered. 56853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem SetVector<Instruction *> GatherSeq; 569a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling /// A list of blocks that we are going to CSE. 57036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SetVector<BasicBlock *> CSEBlocks; 57153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 57237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Contains all scheduling relevant data for an instruction. 57337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// A ScheduleData either represents a single instruction or a member of an 57437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// instruction bundle (= a group of instructions which is combined into a 57537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// vector instruction). 57637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines struct ScheduleData { 57737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 57837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // The initial value for the dependency counters. It means that the 57937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // dependencies are not calculated yet. 58037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines enum { InvalidDeps = -1 }; 58137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 58237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData() 58337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines : Inst(nullptr), FirstInBundle(nullptr), NextInBundle(nullptr), 58437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines NextLoadStore(nullptr), SchedulingRegionID(0), SchedulingPriority(0), 58537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Dependencies(InvalidDeps), UnscheduledDeps(InvalidDeps), 58637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines UnscheduledDepsInBundle(InvalidDeps), IsScheduled(false) {} 58737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 58837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines void init(int BlockSchedulingRegionID) { 58937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines FirstInBundle = this; 59037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines NextInBundle = nullptr; 59137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines NextLoadStore = nullptr; 59237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines IsScheduled = false; 59337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SchedulingRegionID = BlockSchedulingRegionID; 59437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines UnscheduledDepsInBundle = UnscheduledDeps; 59537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines clearDependencies(); 59637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 59737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 59837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Returns true if the dependency information has been calculated. 59937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines bool hasValidDependencies() const { return Dependencies != InvalidDeps; } 60037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 60137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Returns true for single instructions and for bundle representatives 60237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// (= the head of a bundle). 60337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines bool isSchedulingEntity() const { return FirstInBundle == this; } 60437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 60537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Returns true if it represents an instruction bundle and not only a 60637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// single instruction. 60737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines bool isPartOfBundle() const { 60837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return NextInBundle != nullptr || FirstInBundle != this; 60937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 61037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 61137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Returns true if it is ready for scheduling, i.e. it has no more 61237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// unscheduled depending instructions/bundles. 61337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines bool isReady() const { 61437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(isSchedulingEntity() && 61537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines "can't consider non-scheduling entity for ready list"); 61637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return UnscheduledDepsInBundle == 0 && !IsScheduled; 61737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 61837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 61937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Modifies the number of unscheduled dependencies, also updating it for 62037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// the whole bundle. 62137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int incrementUnscheduledDeps(int Incr) { 62237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines UnscheduledDeps += Incr; 62337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return FirstInBundle->UnscheduledDepsInBundle += Incr; 62437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 62537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 62637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Sets the number of unscheduled dependencies to the number of 62737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// dependencies. 62837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines void resetUnscheduledDeps() { 62937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines incrementUnscheduledDeps(Dependencies - UnscheduledDeps); 63037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 63137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 63237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Clears all dependency information. 63337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines void clearDependencies() { 63437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Dependencies = InvalidDeps; 63537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines resetUnscheduledDeps(); 63637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines MemoryDependencies.clear(); 63737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 63837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 63937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines void dump(raw_ostream &os) const { 64037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!isSchedulingEntity()) { 64137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines os << "/ " << *Inst; 64237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } else if (NextInBundle) { 64337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines os << '[' << *Inst; 64437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *SD = NextInBundle; 64537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines while (SD) { 64637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines os << ';' << *SD->Inst; 64737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SD = SD->NextInBundle; 64837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 64937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines os << ']'; 65037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } else { 65137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines os << *Inst; 65237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 65337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 65453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 65537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Instruction *Inst; 65637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 65737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Points to the head in an instruction bundle (and always to this for 65837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// single instructions). 65937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *FirstInBundle; 66037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 66137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Single linked list of all instructions in a bundle. Null if it is a 66237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// single instruction. 66337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *NextInBundle; 66437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 66537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Single linked list of all memory instructions (e.g. load, store, call) 66637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// in the block - until the end of the scheduling region. 66737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *NextLoadStore; 66837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 66937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// The dependent memory instructions. 67037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// This list is derived on demand in calculateDependencies(). 67137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SmallVector<ScheduleData *, 4> MemoryDependencies; 67237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 67337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// This ScheduleData is in the current scheduling region if this matches 67437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// the current SchedulingRegionID of BlockScheduling. 67537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int SchedulingRegionID; 67637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 67737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Used for getting a "good" final ordering of instructions. 67837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int SchedulingPriority; 67937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 68037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// The number of dependencies. Constitutes of the number of users of the 68137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// instruction plus the number of dependent memory instructions (if any). 68237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// This value is calculated on demand. 68337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// If InvalidDeps, the number of dependencies is not calculated yet. 68437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// 68537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int Dependencies; 68637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 68737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// The number of dependencies minus the number of dependencies of scheduled 68837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// instructions. As soon as this is zero, the instruction/bundle gets ready 68937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// for scheduling. 69037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Note that this is negative as long as Dependencies is not calculated. 69137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int UnscheduledDeps; 69237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 69337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// The sum of UnscheduledDeps in a bundle. Equals to UnscheduledDeps for 69437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// single instructions. 69537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int UnscheduledDepsInBundle; 69637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 69737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// True if this instruction is scheduled (or considered as scheduled in the 69837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// dry-run). 69937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines bool IsScheduled; 70037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines }; 70137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 70237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#ifndef NDEBUG 70337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines friend raw_ostream &operator<<(raw_ostream &os, 70437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines const BoUpSLP::ScheduleData &SD); 70537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#endif 70637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 70737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Contains all scheduling data for a basic block. 70837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// 70937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines struct BlockScheduling { 71037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 71137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BlockScheduling(BasicBlock *BB) 71237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines : BB(BB), ChunkSize(BB->size()), ChunkPos(ChunkSize), 71337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleStart(nullptr), ScheduleEnd(nullptr), 71437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines FirstLoadStoreInRegion(nullptr), LastLoadStoreInRegion(nullptr), 71537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Make sure that the initial SchedulingRegionID is greater than the 71637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // initial SchedulingRegionID in ScheduleData (which is 0). 71737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SchedulingRegionID(1) {} 71837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 71937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines void clear() { 72037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ReadyInsts.clear(); 72137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleStart = nullptr; 72237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleEnd = nullptr; 72337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines FirstLoadStoreInRegion = nullptr; 72437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines LastLoadStoreInRegion = nullptr; 72537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 72637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Make a new scheduling region, i.e. all existing ScheduleData is not 72737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // in the new region yet. 72837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ++SchedulingRegionID; 72937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 73037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 73137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *getScheduleData(Value *V) { 73237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *SD = ScheduleDataMap[V]; 73337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (SD && SD->SchedulingRegionID == SchedulingRegionID) 73437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return SD; 73537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return nullptr; 73637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 73737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 73837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines bool isInSchedulingRegion(ScheduleData *SD) { 73937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return SD->SchedulingRegionID == SchedulingRegionID; 74037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 74137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 74237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Marks an instruction as scheduled and puts all dependent ready 74337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// instructions into the ready-list. 74437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines template <typename ReadyListType> 74537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines void schedule(ScheduleData *SD, ReadyListType &ReadyList) { 74637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SD->IsScheduled = true; 74737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: schedule " << *SD << "\n"); 74837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 74937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *BundleMember = SD; 75037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines while (BundleMember) { 75137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Handle the def-use chain dependencies. 75237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (Use &U : BundleMember->Inst->operands()) { 75337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *OpDef = getScheduleData(U.get()); 75437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (OpDef && OpDef->hasValidDependencies() && 75537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines OpDef->incrementUnscheduledDeps(-1) == 0) { 75637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // There are no more unscheduled dependencies after decrementing, 75737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // so we can put the dependent instruction into the ready list. 75837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *DepBundle = OpDef->FirstInBundle; 75937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(!DepBundle->IsScheduled && 76037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines "already scheduled bundle gets ready"); 76137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ReadyList.insert(DepBundle); 76237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: gets ready (def): " << *DepBundle << "\n"); 76337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 76437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 76537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Handle the memory dependencies. 76637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (ScheduleData *MemoryDepSD : BundleMember->MemoryDependencies) { 76737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (MemoryDepSD->incrementUnscheduledDeps(-1) == 0) { 76837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // There are no more unscheduled dependencies after decrementing, 76937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // so we can put the dependent instruction into the ready list. 77037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *DepBundle = MemoryDepSD->FirstInBundle; 77137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(!DepBundle->IsScheduled && 77237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines "already scheduled bundle gets ready"); 77337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ReadyList.insert(DepBundle); 77437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: gets ready (mem): " << *DepBundle << "\n"); 77537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 77637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 77737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember = BundleMember->NextInBundle; 77837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 77937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 78037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 78137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Put all instructions into the ReadyList which are ready for scheduling. 78237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines template <typename ReadyListType> 78337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines void initialFillReadyList(ReadyListType &ReadyList) { 78437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) { 78537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *SD = getScheduleData(I); 78637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (SD->isSchedulingEntity() && SD->isReady()) { 78737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ReadyList.insert(SD); 78837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: initially in ready list: " << *I << "\n"); 78937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 79037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 79137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 79237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 79337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Checks if a bundle of instructions can be scheduled, i.e. has no 79437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// cyclic dependencies. This is only a dry-run, no instructions are 79537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// actually moved at this stage. 796ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines bool tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP); 79737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 79837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Un-bundles a group of instructions. 79937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines void cancelScheduling(ArrayRef<Value *> VL); 80037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 80137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Extends the scheduling region so that V is inside the region. 80237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines void extendSchedulingRegion(Value *V); 80337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 80437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Initialize the ScheduleData structures for new instructions in the 80537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// scheduling region. 80637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines void initScheduleData(Instruction *FromI, Instruction *ToI, 80737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *PrevLoadStore, 80837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *NextLoadStore); 80937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 81037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Updates the dependency information of a bundle and of all instructions/ 81137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// bundles which depend on the original bundle. 81237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines void calculateDependencies(ScheduleData *SD, bool InsertInReadyList, 813ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines BoUpSLP *SLP); 81437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 81537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Sets all instruction in the scheduling region to un-scheduled. 81637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines void resetSchedule(); 81737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 81837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BasicBlock *BB; 81937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 82037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Simple memory allocation for ScheduleData. 82137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines std::vector<std::unique_ptr<ScheduleData[]>> ScheduleDataChunks; 82237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 82337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// The size of a ScheduleData array in ScheduleDataChunks. 82437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int ChunkSize; 82537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 82637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// The allocator position in the current chunk, which is the last entry 82737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// of ScheduleDataChunks. 82837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int ChunkPos; 82937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 83037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Attaches ScheduleData to Instruction. 83137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Note that the mapping survives during all vectorization iterations, i.e. 83237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// ScheduleData structures are recycled. 83337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DenseMap<Value *, ScheduleData *> ScheduleDataMap; 83437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 83537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines struct ReadyList : SmallVector<ScheduleData *, 8> { 83637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines void insert(ScheduleData *SD) { push_back(SD); } 83737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines }; 83837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 83937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// The ready-list for scheduling (only used for the dry-run). 84037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ReadyList ReadyInsts; 84137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 84237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// The first instruction of the scheduling region. 84337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Instruction *ScheduleStart; 84437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 84537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// The first instruction _after_ the scheduling region. 84637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Instruction *ScheduleEnd; 84737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 84837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// The first memory accessing instruction in the scheduling region 84937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// (can be null). 85037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *FirstLoadStoreInRegion; 85137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 85237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// The last memory accessing instruction in the scheduling region 85337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// (can be null). 85437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *LastLoadStoreInRegion; 85537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 85637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// The ID of the scheduling region. For a new vectorization iteration this 85737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// is incremented which "removes" all ScheduleData from the region. 85837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int SchedulingRegionID; 85937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines }; 86037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 86137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Attaches the BlockScheduling structures to basic blocks. 862ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines MapVector<BasicBlock *, std::unique_ptr<BlockScheduling>> BlocksSchedules; 86337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 86437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Performs the "real" scheduling. Done before vectorization is actually 86537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// performed in a basic block. 86637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines void scheduleBlock(BlockScheduling *BS); 867dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 868dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines /// List of users to ignore during scheduling and that don't need extracting. 869dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines ArrayRef<Value *> UserIgnoreList; 870a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 87137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Number of load-bundles, which contain consecutive loads. 87237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int NumLoadsWantToKeepOrder; 87337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 87437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Number of load-bundles of size 2, which are consecutive loads if reversed. 87537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int NumLoadsWantToChangeOrder; 87637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 87753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem // Analysis and block reference. 87853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem Function *F; 87953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem ScalarEvolution *SE; 88053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem TargetTransformInfo *TTI; 881dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines TargetLibraryInfo *TLI; 88253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem AliasAnalysis *AA; 88353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem LoopInfo *LI; 884722b0a4d293b16eebaed94ae65d5f11743cbcea5Nadav Rotem DominatorTree *DT; 88553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem /// Instruction builder to construct the vectorized tree. 88653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem IRBuilder<> Builder; 88753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem}; 88853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 88937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#ifndef NDEBUG 89037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesraw_ostream &operator<<(raw_ostream &os, const BoUpSLP::ScheduleData &SD) { 89137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SD.dump(os); 89237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return os; 89337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 89437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#endif 89537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 896dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesvoid BoUpSLP::buildTree(ArrayRef<Value *> Roots, 897dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines ArrayRef<Value *> UserIgnoreLst) { 898369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem deleteTree(); 899dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines UserIgnoreList = UserIgnoreLst; 90030bbf070a2683fc95c105ad78f921ca59c56bb35Nadav Rotem if (!getSameType(Roots)) 90130bbf070a2683fc95c105ad78f921ca59c56bb35Nadav Rotem return; 902369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem buildTree_rec(Roots, 0); 903a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 904a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem // Collect the values that we need to extract from the tree. 905a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem for (int EIdx = 0, EE = VectorizableTree.size(); EIdx < EE; ++EIdx) { 906a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem TreeEntry *Entry = &VectorizableTree[EIdx]; 907a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 908a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem // For each lane: 909a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) { 910a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem Value *Scalar = Entry->Scalars[Lane]; 911a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 912a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem // No need to handle users of gathered values. 913a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem if (Entry->NeedToGather) 914a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem continue; 915a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 91636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (User *U : Scalar->users()) { 91736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n"); 918a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 91936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Instruction *UserInst = dyn_cast<Instruction>(U); 920a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (!UserInst) 921a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer continue; 922a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 92337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Skip in-tree scalars that become vectors 92437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (ScalarToTreeEntry.count(U)) { 92537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int Idx = ScalarToTreeEntry[U]; 92637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines TreeEntry *UseEntry = &VectorizableTree[Idx]; 92737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Value *UseScalar = UseEntry->Scalars[0]; 92837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Some in-tree scalars will remain as scalar in vectorized 92937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // instructions. If that is the case, the one in Lane 0 will 93037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // be used. 93137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (UseScalar != U || 93237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines !InTreeUserNeedToExtract(Scalar, UserInst, TLI)) { 93337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U 93437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines << ".\n"); 93537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(!VectorizableTree[Idx].NeedToGather && "Bad state"); 93637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines continue; 93737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 93837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 93937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 940dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Ignore users in the user ignore list. 941dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UserInst) != 942dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines UserIgnoreList.end()) 943a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem continue; 944a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 94536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " << 946a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem Lane << " from " << *Scalar << ".\n"); 94736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ExternalUses.push_back(ExternalUser(Scalar, U, Lane)); 948a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem } 949a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem } 950a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem } 95153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem} 95253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 95353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 954369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemvoid BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { 955369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem bool SameTy = getSameType(VL); (void)SameTy; 956c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines bool isAltShuffle = false; 957369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem assert(SameTy && "Invalid types!"); 95853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 959369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (Depth == RecursionMaxDepth) { 960369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n"); 961369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, false); 962369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 963369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 96453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 965369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Don't handle vectors. 966369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (VL[0]->getType()->isVectorTy()) { 967369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Gathering due to vector type.\n"); 968369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, false); 969369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 970369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 97153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 972369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) 973369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (SI->getValueOperand()->getType()->isVectorTy()) { 974369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n"); 975369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, false); 976369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 977369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 978c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines unsigned Opcode = getSameOpcode(VL); 979c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 980c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // Check that this shuffle vector refers to the alternate 981c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // sequence of opcodes. 982c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (Opcode == Instruction::ShuffleVector) { 983c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Instruction *I0 = dyn_cast<Instruction>(VL[0]); 984c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines unsigned Op = I0->getOpcode(); 985c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (Op != Instruction::ShuffleVector) 986c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines isAltShuffle = true; 987c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 98853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 989369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // If all of the operands are identical or constant we have a simple solution. 990c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (allConstant(VL) || isSplat(VL) || !getSameBlock(VL) || !Opcode) { 991369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n"); 992369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, false); 993369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 994369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 99553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 996369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // We now know that this is a vector of instructions of the same type from 997369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // the same block. 998369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 99937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Don't vectorize ephemeral values. 100037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (unsigned i = 0, e = VL.size(); i != e; ++i) { 100137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (EphValues.count(VL[i])) { 100237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] << 100337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ") is ephemeral.\n"); 100437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines newTreeEntry(VL, false); 100537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return; 100637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 100737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 100837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1009369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Check if this is a duplicate of another entry. 1010369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (ScalarToTreeEntry.count(VL[0])) { 1011369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem int Idx = ScalarToTreeEntry[VL[0]]; 1012369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem TreeEntry *E = &VectorizableTree[Idx]; 1013369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 0, e = VL.size(); i != e; ++i) { 1014369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: \tChecking bundle: " << *VL[i] << ".\n"); 1015369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (E->Scalars[i] != VL[i]) { 1016369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n"); 1017369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, false); 1018369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 1019369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1020369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1021369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *VL[0] << ".\n"); 1022369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 1023369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 102453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1025369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Check that none of the instructions in the bundle are already in the tree. 1026369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 0, e = VL.size(); i != e; ++i) { 1027369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (ScalarToTreeEntry.count(VL[i])) { 1028369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] << 1029369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ") is already in tree.\n"); 1030369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, false); 1031369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 1032369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1033369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 103453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1035ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // If any of the scalars is marked as a value that needs to stay scalar then 1036ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // we need to gather the scalars. 1037369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 0, e = VL.size(); i != e; ++i) { 1038ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (MustGather.count(VL[i])) { 1039ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n"); 1040369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, false); 1041369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 1042369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1043369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 104453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1045369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Check that all of the users of the scalars that we want to vectorize are 1046369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // schedulable. 1047369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Instruction *VL0 = cast<Instruction>(VL[0]); 1048369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem BasicBlock *BB = cast<Instruction>(VL0)->getParent(); 104953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 105037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!DT->isReachableFromEntry(BB)) { 105137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Don't go into unreachable blocks. They may contain instructions with 105237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // dependency cycles which confuse the final scheduling. 105337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: bundle in unreachable block.\n"); 105437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines newTreeEntry(VL, false); 105537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return; 105653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 105737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1058369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Check that every instructions appears once in this bundle. 105953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem for (unsigned i = 0, e = VL.size(); i < e; ++i) 1060369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned j = i+1; j < e; ++j) 1061369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (VL[i] == VL[j]) { 1062369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n"); 1063369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, false); 1064369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 1065369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 106653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 106737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines auto &BSRef = BlocksSchedules[BB]; 106837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!BSRef) { 106937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BSRef = llvm::make_unique<BlockScheduling>(BB); 107053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 107137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BlockScheduling &BS = *BSRef.get(); 107253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1073ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (!BS.tryScheduleBundle(VL, this)) { 107437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n"); 107537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 107637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines newTreeEntry(VL, false); 107737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return; 107853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 107937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n"); 108053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1081369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem switch (Opcode) { 1082369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::PHI: { 1083369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem PHINode *PH = dyn_cast<PHINode>(VL0); 10843c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer 10853c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer // Check for terminator values (e.g. invoke). 10863c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer for (unsigned j = 0; j < VL.size(); ++j) 10873c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) { 108836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines TerminatorInst *Term = dyn_cast<TerminatorInst>( 108936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines cast<PHINode>(VL[j])->getIncomingValueForBlock(PH->getIncomingBlock(i))); 10903c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer if (Term) { 10913c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (TerminatorInst use).\n"); 109237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 10933c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer newTreeEntry(VL, false); 10943c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer return; 10953c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer } 10963c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer } 10973c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer 1098369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, true); 1099369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n"); 1100369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 1101369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) { 1102369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ValueList Operands; 1103369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Prepare the operand vector. 1104369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned j = 0; j < VL.size(); ++j) 110536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Operands.push_back(cast<PHINode>(VL[j])->getIncomingValueForBlock( 110636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines PH->getIncomingBlock(i))); 1107369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 1108369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem buildTree_rec(Operands, Depth + 1); 1109369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1110369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 1111369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1112369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::ExtractElement: { 1113369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem bool Reuse = CanReuseExtract(VL); 1114369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (Reuse) { 1115369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Reusing extract sequence.\n"); 111637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } else { 111737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 1118369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1119369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, Reuse); 1120369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 1121369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1122369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Load: { 1123369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Check if the loads are consecutive or of we need to swizzle them. 1124fc1604ec7274e9b724ff9b2512c288a691167427Arnold Schwaighofer for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) { 1125fc1604ec7274e9b724ff9b2512c288a691167427Arnold Schwaighofer LoadInst *L = cast<LoadInst>(VL[i]); 112637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!L->isSimple()) { 112737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 1128369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, false); 112937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n"); 113037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return; 113137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 11324c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar const DataLayout &DL = F->getParent()->getDataLayout(); 11334c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) { 11344c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], DL)) { 113537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ++NumLoadsWantToChangeOrder; 113637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 113737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 113837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines newTreeEntry(VL, false); 113937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n"); 1140369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 1141369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1142fc1604ec7274e9b724ff9b2512c288a691167427Arnold Schwaighofer } 114337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ++NumLoadsWantToKeepOrder; 1144369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, true); 1145369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: added a vector of loads.\n"); 1146369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 1147369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1148369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::ZExt: 1149369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::SExt: 1150369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FPToUI: 1151369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FPToSI: 1152369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FPExt: 1153369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::PtrToInt: 1154369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::IntToPtr: 1155369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::SIToFP: 1156369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::UIToFP: 1157369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Trunc: 1158369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FPTrunc: 1159369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::BitCast: { 1160369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Type *SrcTy = VL0->getOperand(0)->getType(); 1161369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 0; i < VL.size(); ++i) { 1162369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Type *Ty = cast<Instruction>(VL[i])->getOperand(0)->getType(); 1163ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (Ty != SrcTy || !isValidElementType(Ty)) { 116437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 1165369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, false); 1166369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n"); 1167369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 1168369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1169369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1170369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, true); 1171369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: added a vector of casts.\n"); 117253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1173369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { 1174369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ValueList Operands; 1175369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Prepare the operand vector. 1176369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned j = 0; j < VL.size(); ++j) 1177369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); 117853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1179369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem buildTree_rec(Operands, Depth+1); 1180369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 118153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem return; 1182369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1183369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::ICmp: 1184369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FCmp: { 1185369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Check that all of the compares have the same predicate. 11860c7f116bb6950ef819323d855415b2f2b0aad987Pirama Arumuga Nainar CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate(); 1187135e81efe3c1848a308c96dfd65e4d88b0d8667bNadav Rotem Type *ComparedTy = cast<Instruction>(VL[0])->getOperand(0)->getType(); 1188369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 1, e = VL.size(); i < e; ++i) { 1189369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem CmpInst *Cmp = cast<CmpInst>(VL[i]); 1190135e81efe3c1848a308c96dfd65e4d88b0d8667bNadav Rotem if (Cmp->getPredicate() != P0 || 1191135e81efe3c1848a308c96dfd65e4d88b0d8667bNadav Rotem Cmp->getOperand(0)->getType() != ComparedTy) { 119237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 1193369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, false); 1194369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n"); 1195369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 1196369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1197369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 119853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1199369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, true); 1200369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: added a vector of compares.\n"); 120153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1202369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { 1203369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ValueList Operands; 1204369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Prepare the operand vector. 1205369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned j = 0; j < VL.size(); ++j) 1206369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); 120753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1208369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem buildTree_rec(Operands, Depth+1); 1209805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem } 1210369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 121153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 1212369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Select: 1213369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Add: 1214369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FAdd: 1215369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Sub: 1216369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FSub: 1217369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Mul: 1218369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FMul: 1219369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::UDiv: 1220369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::SDiv: 1221369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FDiv: 1222369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::URem: 1223369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::SRem: 1224369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FRem: 1225369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Shl: 1226369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::LShr: 1227369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::AShr: 1228369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::And: 1229369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Or: 1230369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Xor: { 1231369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, true); 1232369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: added a vector of bin op.\n"); 1233369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 1234af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer // Sort operands of the instructions so that each side is more likely to 1235af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer // have the same opcode. 1236af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) { 1237af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer ValueList Left, Right; 1238af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer reorderInputsAccordingToOpcode(VL, Left, Right); 123937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines buildTree_rec(Left, Depth + 1); 124037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines buildTree_rec(Right, Depth + 1); 1241af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer return; 1242af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer } 1243af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer 1244369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { 1245369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ValueList Operands; 1246369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Prepare the operand vector. 1247369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned j = 0; j < VL.size(); ++j) 1248369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); 1249369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 1250369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem buildTree_rec(Operands, Depth+1); 1251369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1252369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 125353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 1254c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case Instruction::GetElementPtr: { 1255c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // We don't combine GEPs with complicated (nested) indexing. 1256c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines for (unsigned j = 0; j < VL.size(); ++j) { 1257c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (cast<Instruction>(VL[j])->getNumOperands() != 2) { 1258c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n"); 125937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 1260c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines newTreeEntry(VL, false); 1261c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return; 1262c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1263c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1264c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 1265c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // We can't combine several GEPs into one vector if they operate on 1266c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // different types. 1267c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Type *Ty0 = cast<Instruction>(VL0)->getOperand(0)->getType(); 1268c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines for (unsigned j = 0; j < VL.size(); ++j) { 1269c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Type *CurTy = cast<Instruction>(VL[j])->getOperand(0)->getType(); 1270c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (Ty0 != CurTy) { 1271c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n"); 127237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 1273c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines newTreeEntry(VL, false); 1274c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return; 1275c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1276c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1277c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 1278c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // We don't combine GEPs with non-constant indexes. 1279c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines for (unsigned j = 0; j < VL.size(); ++j) { 1280c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines auto Op = cast<Instruction>(VL[j])->getOperand(1); 1281c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (!isa<ConstantInt>(Op)) { 1282c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines DEBUG( 1283c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n"); 128437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 1285c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines newTreeEntry(VL, false); 1286c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return; 1287c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1288c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1289c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 1290c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines newTreeEntry(VL, true); 1291c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines DEBUG(dbgs() << "SLP: added a vector of GEPs.\n"); 1292c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines for (unsigned i = 0, e = 2; i < e; ++i) { 1293c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines ValueList Operands; 1294c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // Prepare the operand vector. 1295c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines for (unsigned j = 0; j < VL.size(); ++j) 1296c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); 1297c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 1298c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines buildTree_rec(Operands, Depth + 1); 1299c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1300c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return; 1301c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1302369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Store: { 13034c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar const DataLayout &DL = F->getParent()->getDataLayout(); 1304369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Check if the stores are consecutive or of we need to swizzle them. 1305369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) 13064c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) { 130737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 1308369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, false); 130936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << "SLP: Non-consecutive store.\n"); 1310369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 1311369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1312805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem 1313369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, true); 1314369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: added a vector of stores.\n"); 1315805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem 1316805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem ValueList Operands; 1317805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem for (unsigned j = 0; j < VL.size(); ++j) 1318369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Operands.push_back(cast<Instruction>(VL[j])->getOperand(0)); 1319805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem 1320369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem buildTree_rec(Operands, Depth + 1); 132153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem return; 132253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 132336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case Instruction::Call: { 132436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Check if the calls are all to the same vectorizable intrinsic. 1325dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines CallInst *CI = cast<CallInst>(VL[0]); 1326dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Check if this is an Intrinsic call or something that can be 1327dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // represented by an intrinsic call 1328dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI); 1329dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!isTriviallyVectorizable(ID)) { 133037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 133136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines newTreeEntry(VL, false); 133236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << "SLP: Non-vectorizable call.\n"); 133336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return; 133436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 1335dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Function *Int = CI->getCalledFunction(); 1336c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Value *A1I = nullptr; 1337c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (hasVectorInstrinsicScalarOpd(ID, 1)) 1338c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines A1I = CI->getArgOperand(1); 133936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (unsigned i = 1, e = VL.size(); i != e; ++i) { 1340dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines CallInst *CI2 = dyn_cast<CallInst>(VL[i]); 1341dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!CI2 || CI2->getCalledFunction() != Int || 1342dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines getIntrinsicIDForCall(CI2, TLI) != ID) { 134337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 134436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines newTreeEntry(VL, false); 1345dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i] 134636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines << "\n"); 134736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return; 134836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 1349c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // ctlz,cttz and powi are special intrinsics whose second argument 1350c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // should be same in order for them to be vectorized. 1351c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (hasVectorInstrinsicScalarOpd(ID, 1)) { 1352c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Value *A1J = CI2->getArgOperand(1); 1353c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (A1I != A1J) { 135437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 1355c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines newTreeEntry(VL, false); 1356c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI 1357c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines << " argument "<< A1I<<"!=" << A1J 1358c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines << "\n"); 1359c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return; 1360c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1361c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 136236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 136336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 136436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines newTreeEntry(VL, true); 1365dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) { 136636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ValueList Operands; 136736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Prepare the operand vector. 136836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (unsigned j = 0; j < VL.size(); ++j) { 1369dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines CallInst *CI2 = dyn_cast<CallInst>(VL[j]); 1370dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Operands.push_back(CI2->getArgOperand(i)); 137136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 137236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines buildTree_rec(Operands, Depth + 1); 137336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 137436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return; 137536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 1376c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case Instruction::ShuffleVector: { 1377c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // If this is not an alternate sequence of opcode like add-sub 1378c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // then do not vectorize this instruction. 1379c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (!isAltShuffle) { 138037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 1381c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines newTreeEntry(VL, false); 1382c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n"); 1383c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return; 1384c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1385c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines newTreeEntry(VL, true); 1386c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n"); 1387ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1388ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Reorder operands if reordering would enable vectorization. 1389ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (isa<BinaryOperator>(VL0)) { 1390ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines ValueList Left, Right; 1391ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines reorderAltShuffleOperands(VL, Left, Right); 1392ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines buildTree_rec(Left, Depth + 1); 1393ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines buildTree_rec(Right, Depth + 1); 1394ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return; 1395ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1396ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1397c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { 1398c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines ValueList Operands; 1399c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // Prepare the operand vector. 1400c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines for (unsigned j = 0; j < VL.size(); ++j) 1401c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); 1402c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 1403c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines buildTree_rec(Operands, Depth + 1); 1404c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1405c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return; 1406c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1407369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem default: 140837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 1409369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, false); 1410369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n"); 1411369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 141253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 141353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem} 141453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1415369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemint BoUpSLP::getEntryCost(TreeEntry *E) { 1416369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ArrayRef<Value*> VL = E->Scalars; 141753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 141853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem Type *ScalarTy = VL[0]->getType(); 141953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) 142053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem ScalarTy = SI->getValueOperand()->getType(); 142125961b469a9debe69b915bcb4fa49d35d2ee9544Nadav Rotem VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); 142225961b469a9debe69b915bcb4fa49d35d2ee9544Nadav Rotem 1423369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (E->NeedToGather) { 1424369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (allConstant(VL)) 1425369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return 0; 1426369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (isSplat(VL)) { 1427369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0); 142853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 1429369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return getGatherCost(E->Scalars); 143053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 1431c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines unsigned Opcode = getSameOpcode(VL); 1432c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines assert(Opcode && getSameType(VL) && getSameBlock(VL) && "Invalid VL"); 143353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem Instruction *VL0 = cast<Instruction>(VL[0]); 143453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem switch (Opcode) { 1435369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::PHI: { 143653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem return 0; 143753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 1438369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::ExtractElement: { 143936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (CanReuseExtract(VL)) { 144036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines int DeadCost = 0; 144136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (unsigned i = 0, e = VL.size(); i < e; ++i) { 144236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ExtractElementInst *E = cast<ExtractElementInst>(VL[i]); 144336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (E->hasOneUse()) 144436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Take credit for instruction that will become dead. 144536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DeadCost += 144636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i); 144736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 144836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return -DeadCost; 144936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 1450369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return getGatherCost(VecTy); 145125961b469a9debe69b915bcb4fa49d35d2ee9544Nadav Rotem } 1452369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::ZExt: 1453369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::SExt: 1454369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FPToUI: 1455369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FPToSI: 1456369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FPExt: 1457369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::PtrToInt: 1458369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::IntToPtr: 1459369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::SIToFP: 1460369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::UIToFP: 1461369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Trunc: 1462369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FPTrunc: 1463369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::BitCast: { 1464369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Type *SrcTy = VL0->getOperand(0)->getType(); 1465369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 1466369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Calculate the cost of this instruction. 1467369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(), 1468369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem VL0->getType(), SrcTy); 1469369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 1470369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size()); 1471369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy); 1472369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return VecCost - ScalarCost; 147353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 1474369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FCmp: 1475369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::ICmp: 1476369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Select: 1477369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Add: 1478369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FAdd: 1479369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Sub: 1480369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FSub: 1481369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Mul: 1482369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FMul: 1483369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::UDiv: 1484369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::SDiv: 1485369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FDiv: 1486369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::URem: 1487369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::SRem: 1488369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FRem: 1489369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Shl: 1490369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::LShr: 1491369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::AShr: 1492369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::And: 1493369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Or: 1494369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Xor: { 1495369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Calculate the cost of this instruction. 1496369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem int ScalarCost = 0; 1497369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem int VecCost = 0; 1498369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (Opcode == Instruction::FCmp || Opcode == Instruction::ICmp || 1499369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Opcode == Instruction::Select) { 1500369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size()); 1501369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ScalarCost = VecTy->getNumElements() * 1502369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.getInt1Ty()); 1503369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem VecCost = TTI->getCmpSelInstrCost(Opcode, VecTy, MaskTy); 1504369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } else { 15057e8cebf22d170769b0bf0c2a69309faa0e36ac4cArnold Schwaighofer // Certain instructions can be cheaper to vectorize if they have a 15067e8cebf22d170769b0bf0c2a69309faa0e36ac4cArnold Schwaighofer // constant second vector operand. 15077e8cebf22d170769b0bf0c2a69309faa0e36ac4cArnold Schwaighofer TargetTransformInfo::OperandValueKind Op1VK = 15087e8cebf22d170769b0bf0c2a69309faa0e36ac4cArnold Schwaighofer TargetTransformInfo::OK_AnyValue; 15097e8cebf22d170769b0bf0c2a69309faa0e36ac4cArnold Schwaighofer TargetTransformInfo::OperandValueKind Op2VK = 15107e8cebf22d170769b0bf0c2a69309faa0e36ac4cArnold Schwaighofer TargetTransformInfo::OK_UniformConstantValue; 151137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines TargetTransformInfo::OperandValueProperties Op1VP = 151237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines TargetTransformInfo::OP_None; 151337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines TargetTransformInfo::OperandValueProperties Op2VP = 151437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines TargetTransformInfo::OP_None; 15157e8cebf22d170769b0bf0c2a69309faa0e36ac4cArnold Schwaighofer 151636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // If all operands are exactly the same ConstantInt then set the 151736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // operand kind to OK_UniformConstantValue. 151836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // If instead not all operands are constants, then set the operand kind 151936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // to OK_AnyValue. If all operands are constants but not the same, 152036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // then set the operand kind to OK_NonUniformConstantValue. 1521dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines ConstantInt *CInt = nullptr; 152236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (unsigned i = 0; i < VL.size(); ++i) { 152336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const Instruction *I = cast<Instruction>(VL[i]); 152436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (!isa<ConstantInt>(I->getOperand(1))) { 15257e8cebf22d170769b0bf0c2a69309faa0e36ac4cArnold Schwaighofer Op2VK = TargetTransformInfo::OK_AnyValue; 15267e8cebf22d170769b0bf0c2a69309faa0e36ac4cArnold Schwaighofer break; 15277e8cebf22d170769b0bf0c2a69309faa0e36ac4cArnold Schwaighofer } 152836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (i == 0) { 152936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines CInt = cast<ConstantInt>(I->getOperand(1)); 153036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines continue; 153136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 153236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (Op2VK == TargetTransformInfo::OK_UniformConstantValue && 153336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines CInt != cast<ConstantInt>(I->getOperand(1))) 153436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Op2VK = TargetTransformInfo::OK_NonUniformConstantValue; 153536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 153637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // FIXME: Currently cost of model modification for division by 153737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // power of 2 is handled only for X86. Add support for other targets. 153837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (Op2VK == TargetTransformInfo::OK_UniformConstantValue && CInt && 153937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines CInt->getValue().isPowerOf2()) 154037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Op2VP = TargetTransformInfo::OP_PowerOf2; 15417e8cebf22d170769b0bf0c2a69309faa0e36ac4cArnold Schwaighofer 154237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScalarCost = VecTy->getNumElements() * 154337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines TTI->getArithmeticInstrCost(Opcode, ScalarTy, Op1VK, Op2VK, 154437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Op1VP, Op2VP); 154537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy, Op1VK, Op2VK, 154637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Op1VP, Op2VP); 1547369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1548369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return VecCost - ScalarCost; 154953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 1550c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case Instruction::GetElementPtr: { 1551c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines TargetTransformInfo::OperandValueKind Op1VK = 1552c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines TargetTransformInfo::OK_AnyValue; 1553c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines TargetTransformInfo::OperandValueKind Op2VK = 1554c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines TargetTransformInfo::OK_UniformConstantValue; 1555c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 1556c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines int ScalarCost = 1557c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines VecTy->getNumElements() * 1558c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, Op1VK, Op2VK); 1559c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines int VecCost = 1560c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines TTI->getArithmeticInstrCost(Instruction::Add, VecTy, Op1VK, Op2VK); 1561c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 1562c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return VecCost - ScalarCost; 1563c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1564369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Load: { 1565369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Cost of wide load - cost of scalar loads. 1566369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem int ScalarLdCost = VecTy->getNumElements() * 1567369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0); 15684a6b3a9a770ec2064fb5975ff2d57419c1339a21Arnold Schwaighofer int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, VecTy, 1, 0); 1569369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return VecLdCost - ScalarLdCost; 157053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 1571369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Store: { 1572369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // We know that we can merge the stores. Calculate the cost. 1573369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem int ScalarStCost = VecTy->getNumElements() * 1574369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0); 15754a6b3a9a770ec2064fb5975ff2d57419c1339a21Arnold Schwaighofer int VecStCost = TTI->getMemoryOpCost(Instruction::Store, VecTy, 1, 0); 1576369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return VecStCost - ScalarStCost; 157725961b469a9debe69b915bcb4fa49d35d2ee9544Nadav Rotem } 157836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case Instruction::Call: { 157936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines CallInst *CI = cast<CallInst>(VL0); 1580dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI); 158136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 158236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Calculate the cost of the scalar and vector calls. 158336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SmallVector<Type*, 4> ScalarTys, VecTys; 1584dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines for (unsigned op = 0, opc = CI->getNumArgOperands(); op!= opc; ++op) { 158536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ScalarTys.push_back(CI->getArgOperand(op)->getType()); 158636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines VecTys.push_back(VectorType::get(CI->getArgOperand(op)->getType(), 158736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines VecTy->getNumElements())); 158836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 158936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 159036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines int ScalarCallCost = VecTy->getNumElements() * 159136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys); 159236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 159336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines int VecCallCost = TTI->getIntrinsicInstrCost(ID, VecTy, VecTys); 159436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 159536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << "SLP: Call cost "<< VecCallCost - ScalarCallCost 159636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines << " (" << VecCallCost << "-" << ScalarCallCost << ")" 1597dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines << " for " << *CI << "\n"); 159836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 159936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return VecCallCost - ScalarCallCost; 160036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 1601c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case Instruction::ShuffleVector: { 1602c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines TargetTransformInfo::OperandValueKind Op1VK = 1603c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines TargetTransformInfo::OK_AnyValue; 1604c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines TargetTransformInfo::OperandValueKind Op2VK = 1605c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines TargetTransformInfo::OK_AnyValue; 1606c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines int ScalarCost = 0; 1607c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines int VecCost = 0; 1608c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines for (unsigned i = 0; i < VL.size(); ++i) { 1609c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Instruction *I = cast<Instruction>(VL[i]); 1610c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (!I) 1611c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines break; 1612c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines ScalarCost += 1613c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines TTI->getArithmeticInstrCost(I->getOpcode(), ScalarTy, Op1VK, Op2VK); 1614c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1615c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // VecCost is equal to sum of the cost of creating 2 vectors 1616c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // and the cost of creating shuffle. 1617c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Instruction *I0 = cast<Instruction>(VL[0]); 1618c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines VecCost = 1619c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines TTI->getArithmeticInstrCost(I0->getOpcode(), VecTy, Op1VK, Op2VK); 1620c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Instruction *I1 = cast<Instruction>(VL[1]); 1621c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines VecCost += 1622c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines TTI->getArithmeticInstrCost(I1->getOpcode(), VecTy, Op1VK, Op2VK); 1623c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines VecCost += 1624c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, VecTy, 0); 1625c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return VecCost - ScalarCost; 1626c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1627369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem default: 1628369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem llvm_unreachable("Unknown instruction"); 162953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 1630369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 163125961b469a9debe69b915bcb4fa49d35d2ee9544Nadav Rotem 1632d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiangbool BoUpSLP::isFullyVectorizableTinyTree() { 1633d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang DEBUG(dbgs() << "SLP: Check whether the tree with height " << 1634d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang VectorizableTree.size() << " is fully vectorizable .\n"); 1635d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang 1636d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang // We only handle trees of height 2. 1637d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang if (VectorizableTree.size() != 2) 1638d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang return false; 1639d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang 164036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Handle splat stores. 164136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (!VectorizableTree[0].NeedToGather && isSplat(VectorizableTree[1].Scalars)) 164236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return true; 164336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 1644d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang // Gathering cost would be too much for tiny trees. 164536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (VectorizableTree[0].NeedToGather || VectorizableTree[1].NeedToGather) 164636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return false; 1647d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang 164836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return true; 1649d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang} 1650d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang 165137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesint BoUpSLP::getSpillCost() { 165237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Walk from the bottom of the tree to the top, tracking which values are 165337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // live. When we see a call instruction that is not part of our tree, 165437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // query TTI to see if there is a cost to keeping values live over it 165537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // (for example, if spills and fills are required). 165637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned BundleWidth = VectorizableTree.front().Scalars.size(); 165737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int Cost = 0; 165837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 165937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SmallPtrSet<Instruction*, 4> LiveValues; 166037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Instruction *PrevInst = nullptr; 166137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 166237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (unsigned N = 0; N < VectorizableTree.size(); ++N) { 166337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Instruction *Inst = dyn_cast<Instruction>(VectorizableTree[N].Scalars[0]); 166437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!Inst) 166537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines continue; 166637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 166737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!PrevInst) { 166837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines PrevInst = Inst; 166937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines continue; 167037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 167137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 167237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG( 167337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines dbgs() << "SLP: #LV: " << LiveValues.size(); 167437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (auto *X : LiveValues) 167537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines dbgs() << " " << X->getName(); 167637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines dbgs() << ", Looking at "; 167737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Inst->dump(); 167837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ); 167937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 168037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Update LiveValues. 168137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines LiveValues.erase(PrevInst); 168237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (auto &J : PrevInst->operands()) { 168337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (isa<Instruction>(&*J) && ScalarToTreeEntry.count(&*J)) 168437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines LiveValues.insert(cast<Instruction>(&*J)); 168537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 168637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 168737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Now find the sequence of instructions between PrevInst and Inst. 168837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BasicBlock::reverse_iterator InstIt(Inst), PrevInstIt(PrevInst); 168937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines --PrevInstIt; 169037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines while (InstIt != PrevInstIt) { 169137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (PrevInstIt == PrevInst->getParent()->rend()) { 169237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines PrevInstIt = Inst->getParent()->rbegin(); 169337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines continue; 169437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 169537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 169637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (isa<CallInst>(&*PrevInstIt) && &*PrevInstIt != PrevInst) { 169737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SmallVector<Type*, 4> V; 169837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (auto *II : LiveValues) 169937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines V.push_back(VectorType::get(II->getType(), BundleWidth)); 170037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Cost += TTI->getCostOfKeepingLiveOverCall(V); 170137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 170237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 170337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ++PrevInstIt; 170437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 170537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 170637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines PrevInst = Inst; 170737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 170837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 170937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: SpillCost=" << Cost << "\n"); 171037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return Cost; 171137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 171237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1713369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemint BoUpSLP::getTreeCost() { 1714369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem int Cost = 0; 1715369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Calculating cost for tree of size " << 1716369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem VectorizableTree.size() << ".\n"); 1717369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 1718d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang // We only vectorize tiny trees if it is fully vectorizable. 1719d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang if (VectorizableTree.size() < 3 && !isFullyVectorizableTinyTree()) { 1720ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (VectorizableTree.empty()) { 172167a38a2875f05ea9c219ab73c4398ee675eb4292Nadav Rotem assert(!ExternalUses.size() && "We should not have any external users"); 172267a38a2875f05ea9c219ab73c4398ee675eb4292Nadav Rotem } 1723085e23841e9c4f4682385fce456704a5f75f9cdcYi Jiang return INT_MAX; 1724a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem } 1725a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 1726a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem unsigned BundleWidth = VectorizableTree[0].Scalars.size(); 1727a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 1728369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 0, e = VectorizableTree.size(); i != e; ++i) { 1729369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem int C = getEntryCost(&VectorizableTree[i]); 1730369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Adding cost " << C << " for bundle that starts with " 1731369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem << *VectorizableTree[i].Scalars[0] << " .\n"); 1732369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Cost += C; 173353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 1734a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 173536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SmallSet<Value *, 16> ExtractCostCalculated; 1736a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem int ExtractCost = 0; 1737a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem for (UserList::iterator I = ExternalUses.begin(), E = ExternalUses.end(); 1738a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem I != E; ++I) { 173936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // We only add extract cost once for the same scalar. 174037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!ExtractCostCalculated.insert(I->Scalar).second) 174137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines continue; 174237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 174337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Uses by ephemeral values are free (because the ephemeral value will be 174437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // removed prior to code generation, and so the extraction will be 174537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // removed as well). 174637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (EphValues.count(I->User)) 174736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines continue; 1748a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 1749a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem VectorType *VecTy = VectorType::get(I->Scalar->getType(), BundleWidth); 1750a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem ExtractCost += TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, 1751a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem I->Lane); 1752a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem } 1753a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 175437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Cost += getSpillCost(); 175537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1756a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem DEBUG(dbgs() << "SLP: Total Cost " << Cost + ExtractCost<< ".\n"); 1757a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem return Cost + ExtractCost; 1758369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 175953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1760369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemint BoUpSLP::getGatherCost(Type *Ty) { 1761369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem int Cost = 0; 1762369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i) 1763369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); 1764369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return Cost; 1765369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 176653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1767369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemint BoUpSLP::getGatherCost(ArrayRef<Value *> VL) { 1768369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Find the type of the operands in VL. 1769369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Type *ScalarTy = VL[0]->getType(); 1770369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) 1771369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ScalarTy = SI->getValueOperand()->getType(); 1772369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); 1773369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Find the cost of inserting/extracting values from the vector. 1774369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return getGatherCost(VecTy); 177553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem} 177653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1777369cc938d261de3295eb70d0738f54ef1a82806cNadav RotemValue *BoUpSLP::getPointerOperand(Value *I) { 1778369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (LoadInst *LI = dyn_cast<LoadInst>(I)) 1779369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return LI->getPointerOperand(); 1780369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (StoreInst *SI = dyn_cast<StoreInst>(I)) 1781369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return SI->getPointerOperand(); 1782dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return nullptr; 1783369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 1784ac2678684689448d1c88f644f3fa88aa4f93997cNadav Rotem 1785369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemunsigned BoUpSLP::getAddressSpaceOperand(Value *I) { 1786369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (LoadInst *L = dyn_cast<LoadInst>(I)) 1787369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return L->getPointerAddressSpace(); 1788369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (StoreInst *S = dyn_cast<StoreInst>(I)) 1789369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return S->getPointerAddressSpace(); 1790369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return -1; 1791369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 1792ac2678684689448d1c88f644f3fa88aa4f93997cNadav Rotem 17934c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainarbool BoUpSLP::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL) { 1794369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *PtrA = getPointerOperand(A); 1795369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *PtrB = getPointerOperand(B); 1796369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem unsigned ASA = getAddressSpaceOperand(A); 1797369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem unsigned ASB = getAddressSpaceOperand(B); 1798ac2678684689448d1c88f644f3fa88aa4f93997cNadav Rotem 1799369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Check that the address spaces match and that the pointers are valid. 1800369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (!PtrA || !PtrB || (ASA != ASB)) 1801369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return false; 180253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 18033a7997516982117382b9023ea1176fd53caa948dNadav Rotem // Make sure that A and B are different pointers of the same type. 1804e65b219edbf5d18ed235dc8a5919580f71d2327bNadav Rotem if (PtrA == PtrB || PtrA->getType() != PtrB->getType()) 1805369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return false; 180653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 18074c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA); 18085b35d4459222f46000194102bf04d5102c6960cdNadav Rotem Type *Ty = cast<PointerType>(PtrA->getType())->getElementType(); 18094c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty)); 18105b35d4459222f46000194102bf04d5102c6960cdNadav Rotem 1811474be0d0f83eb6543bd4091946b40bb4967a3c11Chandler Carruth APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0); 18124c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); 18134c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); 18145b35d4459222f46000194102bf04d5102c6960cdNadav Rotem 1815474be0d0f83eb6543bd4091946b40bb4967a3c11Chandler Carruth APInt OffsetDelta = OffsetB - OffsetA; 18165b35d4459222f46000194102bf04d5102c6960cdNadav Rotem 1817474be0d0f83eb6543bd4091946b40bb4967a3c11Chandler Carruth // Check if they are based on the same pointer. That makes the offsets 1818474be0d0f83eb6543bd4091946b40bb4967a3c11Chandler Carruth // sufficient. 1819474be0d0f83eb6543bd4091946b40bb4967a3c11Chandler Carruth if (PtrA == PtrB) 1820474be0d0f83eb6543bd4091946b40bb4967a3c11Chandler Carruth return OffsetDelta == Size; 1821dfacdd04cd2dd3b474fcabc5497255548f5506d5Nadav Rotem 1822474be0d0f83eb6543bd4091946b40bb4967a3c11Chandler Carruth // Compute the necessary base pointer delta to have the necessary final delta 1823474be0d0f83eb6543bd4091946b40bb4967a3c11Chandler Carruth // equal to the size. 1824474be0d0f83eb6543bd4091946b40bb4967a3c11Chandler Carruth APInt BaseDelta = Size - OffsetDelta; 182539f59f4d95de11c3c39bf6753a555ac32cacf7b7Nadav Rotem 1826474be0d0f83eb6543bd4091946b40bb4967a3c11Chandler Carruth // Otherwise compute the distance with SCEV between the base pointers. 1827369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem const SCEV *PtrSCEVA = SE->getSCEV(PtrA); 1828369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem const SCEV *PtrSCEVB = SE->getSCEV(PtrB); 1829474be0d0f83eb6543bd4091946b40bb4967a3c11Chandler Carruth const SCEV *C = SE->getConstant(BaseDelta); 1830a38edf071dbc76b2e0525485ea4c368cee908373Nadav Rotem const SCEV *X = SE->getAddExpr(PtrSCEVA, C); 1831a38edf071dbc76b2e0525485ea4c368cee908373Nadav Rotem return X == PtrSCEVB; 1832369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 183353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1834ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// Reorder commutative operations in alternate shuffle if the resulting vectors 1835ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// are consecutive loads. This would allow us to vectorize the tree. 1836ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// If we have something like- 1837ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// load a[0] - load b[0] 1838ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// load b[1] + load a[1] 1839ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// load a[2] - load b[2] 1840ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// load a[3] + load b[3] 1841ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// Reordering the second load b[1] load a[1] would allow us to vectorize this 1842ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// code. 1843ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesvoid BoUpSLP::reorderAltShuffleOperands(ArrayRef<Value *> VL, 1844ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines SmallVectorImpl<Value *> &Left, 1845ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines SmallVectorImpl<Value *> &Right) { 18464c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar const DataLayout &DL = F->getParent()->getDataLayout(); 1847ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1848ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Push left and right operands of binary operation into Left and Right 1849ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines for (unsigned i = 0, e = VL.size(); i < e; ++i) { 1850ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Left.push_back(cast<Instruction>(VL[i])->getOperand(0)); 1851ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Right.push_back(cast<Instruction>(VL[i])->getOperand(1)); 1852ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1853ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1854ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Reorder if we have a commutative operation and consecutive access 1855ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // are on either side of the alternate instructions. 1856ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines for (unsigned j = 0; j < VL.size() - 1; ++j) { 1857ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) { 1858ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) { 1859ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Instruction *VL1 = cast<Instruction>(VL[j]); 1860ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Instruction *VL2 = cast<Instruction>(VL[j + 1]); 18614c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) { 1862ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines std::swap(Left[j], Right[j]); 1863ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines continue; 18644c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar } else if (isConsecutiveAccess(L, L1, DL) && VL2->isCommutative()) { 1865ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines std::swap(Left[j + 1], Right[j + 1]); 1866ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines continue; 1867ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1868ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // else unchanged 1869ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1870ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1871ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) { 1872ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) { 1873ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Instruction *VL1 = cast<Instruction>(VL[j]); 1874ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Instruction *VL2 = cast<Instruction>(VL[j + 1]); 18754c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) { 1876ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines std::swap(Left[j], Right[j]); 1877ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines continue; 18784c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar } else if (isConsecutiveAccess(L, L1, DL) && VL2->isCommutative()) { 1879ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines std::swap(Left[j + 1], Right[j + 1]); 1880ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines continue; 1881ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1882ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // else unchanged 1883ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1884ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1885ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1886ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines} 1887ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1888ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesvoid BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL, 1889ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines SmallVectorImpl<Value *> &Left, 1890ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines SmallVectorImpl<Value *> &Right) { 1891ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1892ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines SmallVector<Value *, 16> OrigLeft, OrigRight; 1893ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1894ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines bool AllSameOpcodeLeft = true; 1895ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines bool AllSameOpcodeRight = true; 1896ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines for (unsigned i = 0, e = VL.size(); i != e; ++i) { 1897ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Instruction *I = cast<Instruction>(VL[i]); 1898ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Value *VLeft = I->getOperand(0); 1899ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Value *VRight = I->getOperand(1); 1900ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1901ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines OrigLeft.push_back(VLeft); 1902ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines OrigRight.push_back(VRight); 1903ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1904ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Instruction *ILeft = dyn_cast<Instruction>(VLeft); 1905ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Instruction *IRight = dyn_cast<Instruction>(VRight); 1906ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1907ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Check whether all operands on one side have the same opcode. In this case 1908ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // we want to preserve the original order and not make things worse by 1909ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // reordering. 1910ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (i && AllSameOpcodeLeft && ILeft) { 1911ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (Instruction *PLeft = dyn_cast<Instruction>(OrigLeft[i - 1])) { 1912ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (PLeft->getOpcode() != ILeft->getOpcode()) 1913ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines AllSameOpcodeLeft = false; 1914ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } else 1915ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines AllSameOpcodeLeft = false; 1916ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1917ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (i && AllSameOpcodeRight && IRight) { 1918ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (Instruction *PRight = dyn_cast<Instruction>(OrigRight[i - 1])) { 1919ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (PRight->getOpcode() != IRight->getOpcode()) 1920ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines AllSameOpcodeRight = false; 1921ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } else 1922ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines AllSameOpcodeRight = false; 1923ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1924ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1925ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Sort two opcodes. In the code below we try to preserve the ability to use 1926ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // broadcast of values instead of individual inserts. 1927ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // vl1 = load 1928ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // vl2 = phi 1929ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // vr1 = load 1930ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // vr2 = vr2 1931ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // = vl1 x vr1 1932ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // = vl2 x vr2 1933ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // If we just sorted according to opcode we would leave the first line in 1934ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // tact but we would swap vl2 with vr2 because opcode(phi) > opcode(load). 1935ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // = vl1 x vr1 1936ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // = vr2 x vl2 1937ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Because vr2 and vr1 are from the same load we loose the opportunity of a 1938ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // broadcast for the packed right side in the backend: we have [vr1, vl2] 1939ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // instead of [vr1, vr2=vr1]. 1940ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (ILeft && IRight) { 1941ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (!i && ILeft->getOpcode() > IRight->getOpcode()) { 1942ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Left.push_back(IRight); 1943ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Right.push_back(ILeft); 1944ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } else if (i && ILeft->getOpcode() > IRight->getOpcode() && 1945ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Right[i - 1] != IRight) { 1946ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Try not to destroy a broad cast for no apparent benefit. 1947ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Left.push_back(IRight); 1948ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Right.push_back(ILeft); 1949ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } else if (i && ILeft->getOpcode() == IRight->getOpcode() && 1950ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Right[i - 1] == ILeft) { 1951ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Try preserve broadcasts. 1952ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Left.push_back(IRight); 1953ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Right.push_back(ILeft); 1954ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } else if (i && ILeft->getOpcode() == IRight->getOpcode() && 1955ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Left[i - 1] == IRight) { 1956ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Try preserve broadcasts. 1957ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Left.push_back(IRight); 1958ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Right.push_back(ILeft); 1959ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } else { 1960ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Left.push_back(ILeft); 1961ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Right.push_back(IRight); 1962ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1963ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines continue; 1964ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1965ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // One opcode, put the instruction on the right. 1966ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (ILeft) { 1967ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Left.push_back(VRight); 1968ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Right.push_back(ILeft); 1969ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines continue; 1970ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1971ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Left.push_back(VLeft); 1972ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Right.push_back(VRight); 1973ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1974ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1975ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines bool LeftBroadcast = isSplat(Left); 1976ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines bool RightBroadcast = isSplat(Right); 1977ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1978ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // If operands end up being broadcast return this operand order. 1979ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (LeftBroadcast || RightBroadcast) 1980ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return; 1981ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1982ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Don't reorder if the operands where good to begin. 1983ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (AllSameOpcodeRight || AllSameOpcodeLeft) { 1984ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Left = OrigLeft; 1985ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Right = OrigRight; 1986ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1987ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 19884c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar const DataLayout &DL = F->getParent()->getDataLayout(); 19894c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar 1990ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Finally check if we can get longer vectorizable chain by reordering 1991ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // without breaking the good operand order detected above. 1992ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // E.g. If we have something like- 1993ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // load a[0] load b[0] 1994ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // load b[1] load a[1] 1995ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // load a[2] load b[2] 1996ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // load a[3] load b[3] 1997ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Reordering the second load b[1] load a[1] would allow us to vectorize 1998ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // this code and we still retain AllSameOpcode property. 1999ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // FIXME: This load reordering might break AllSameOpcode in some rare cases 2000ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // such as- 2001ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // add a[0],c[0] load b[0] 2002ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // add a[1],c[2] load b[1] 2003ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // b[2] load b[2] 2004ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // add a[3],c[3] load b[3] 2005ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines for (unsigned j = 0; j < VL.size() - 1; ++j) { 2006ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) { 2007ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) { 20084c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar if (isConsecutiveAccess(L, L1, DL)) { 2009ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines std::swap(Left[j + 1], Right[j + 1]); 2010ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines continue; 2011ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 2012ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 2013ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 2014ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) { 2015ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) { 20164c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar if (isConsecutiveAccess(L, L1, DL)) { 2017ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines std::swap(Left[j + 1], Right[j + 1]); 2018ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines continue; 2019ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 2020ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 2021ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 2022ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // else unchanged 2023ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 2024ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines} 2025ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 20264b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenaultvoid BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) { 20274b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault Instruction *VL0 = cast<Instruction>(VL[0]); 202837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BasicBlock::iterator NextInst = VL0; 20294b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault ++NextInst; 20304b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault Builder.SetInsertPoint(VL0->getParent(), NextInst); 20314b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault Builder.SetCurrentDebugLocation(VL0->getDebugLoc()); 20324b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault} 20334b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault 2034369cc938d261de3295eb70d0738f54ef1a82806cNadav RotemValue *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) { 203553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem Value *Vec = UndefValue::get(Ty); 203653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem // Generate the 'InsertElement' instruction. 203753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem for (unsigned i = 0; i < Ty->getNumElements(); ++i) { 203853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i)); 2039a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem if (Instruction *Insrt = dyn_cast<Instruction>(Vec)) { 2040a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem GatherSeq.insert(Insrt); 2041a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling CSEBlocks.insert(Insrt->getParent()); 2042a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 2043a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem // Add to our 'need-to-extract' list. 2044a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem if (ScalarToTreeEntry.count(VL[i])) { 2045a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem int Idx = ScalarToTreeEntry[VL[i]]; 2046a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem TreeEntry *E = &VectorizableTree[Idx]; 2047a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem // Find which lane we need to extract. 2048a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem int FoundLane = -1; 2049a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem for (unsigned Lane = 0, LE = VL.size(); Lane != LE; ++Lane) { 2050a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem // Is this the lane of the scalar that we are looking for ? 2051a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem if (E->Scalars[Lane] == VL[i]) { 2052a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem FoundLane = Lane; 2053a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem break; 2054a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem } 2055a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem } 2056a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem assert(FoundLane >= 0 && "Could not find the correct lane"); 2057a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem ExternalUses.push_back(ExternalUser(VL[i], Insrt, FoundLane)); 2058a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem } 2059a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem } 206053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 206153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 206253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem return Vec; 206353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem} 206453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 20656a804acc4ae77c014e4ef97c37f8e720ef360394Matt ArsenaultValue *BoUpSLP::alreadyVectorized(ArrayRef<Value *> VL) const { 20666a804acc4ae77c014e4ef97c37f8e720ef360394Matt Arsenault SmallDenseMap<Value*, int>::const_iterator Entry 20676a804acc4ae77c014e4ef97c37f8e720ef360394Matt Arsenault = ScalarToTreeEntry.find(VL[0]); 20686a804acc4ae77c014e4ef97c37f8e720ef360394Matt Arsenault if (Entry != ScalarToTreeEntry.end()) { 20696a804acc4ae77c014e4ef97c37f8e720ef360394Matt Arsenault int Idx = Entry->second; 20706a804acc4ae77c014e4ef97c37f8e720ef360394Matt Arsenault const TreeEntry *En = &VectorizableTree[Idx]; 207162657090de3a5731bf644437701ccd78c247119fNadav Rotem if (En->isSame(VL) && En->VectorizedValue) 207262657090de3a5731bf644437701ccd78c247119fNadav Rotem return En->VectorizedValue; 207362657090de3a5731bf644437701ccd78c247119fNadav Rotem } 2074dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return nullptr; 207562657090de3a5731bf644437701ccd78c247119fNadav Rotem} 207662657090de3a5731bf644437701ccd78c247119fNadav Rotem 2077369cc938d261de3295eb70d0738f54ef1a82806cNadav RotemValue *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) { 2078369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (ScalarToTreeEntry.count(VL[0])) { 2079369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem int Idx = ScalarToTreeEntry[VL[0]]; 2080369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem TreeEntry *E = &VectorizableTree[Idx]; 2081369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (E->isSame(VL)) 2082369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return vectorizeTree(E); 2083369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 208453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 208553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem Type *ScalarTy = VL[0]->getType(); 208653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) 208753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem ScalarTy = SI->getValueOperand()->getType(); 208853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); 208953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 2090369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return Gather(VL, VecTy); 2091369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 2092369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 2093369cc938d261de3295eb70d0738f54ef1a82806cNadav RotemValue *BoUpSLP::vectorizeTree(TreeEntry *E) { 2094adb412daa41aef94a9f724dfd1ade9f579bb3a84Benjamin Kramer IRBuilder<>::InsertPointGuard Guard(Builder); 209553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 2096369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (E->VectorizedValue) { 2097369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Diamond merged for " << *E->Scalars[0] << ".\n"); 2098369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return E->VectorizedValue; 209953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 210053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 21011b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault Instruction *VL0 = cast<Instruction>(E->Scalars[0]); 21021b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault Type *ScalarTy = VL0->getType(); 21031b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault if (StoreInst *SI = dyn_cast<StoreInst>(VL0)) 2104369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ScalarTy = SI->getValueOperand()->getType(); 2105369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem VectorType *VecTy = VectorType::get(ScalarTy, E->Scalars.size()); 210653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 2107369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (E->NeedToGather) { 21084b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault setInsertPointAfterBundle(E->Scalars); 2109369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return Gather(E->Scalars, VecTy); 2110369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 211137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 21124c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar const DataLayout &DL = F->getParent()->getDataLayout(); 2113c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines unsigned Opcode = getSameOpcode(E->Scalars); 2114805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem 2115369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem switch (Opcode) { 2116369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::PHI: { 2117369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem PHINode *PH = dyn_cast<PHINode>(VL0); 2118d237e834a816399b7e1561dd4db2c501f5095712Justin Bogner Builder.SetInsertPoint(PH->getParent()->getFirstNonPHI()); 211979c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem Builder.SetCurrentDebugLocation(PH->getDebugLoc()); 2120369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues()); 2121369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem E->VectorizedValue = NewPhi; 2122369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 2123353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem // PHINodes may have multiple entries from the same block. We want to 2124353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem // visit every block once. 2125353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem SmallSet<BasicBlock*, 4> VisitedBBs; 2126353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem 2127369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) { 2128369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ValueList Operands; 2129369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem BasicBlock *IBB = PH->getIncomingBlock(i); 2130369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 213137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!VisitedBBs.insert(IBB).second) { 2132353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem NewPhi->addIncoming(NewPhi->getIncomingValueForBlock(IBB), IBB); 2133353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem continue; 2134353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem } 2135353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem 2136369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Prepare the operand vector. 2137369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned j = 0; j < E->Scalars.size(); ++j) 2138369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Operands.push_back(cast<PHINode>(E->Scalars[j])-> 2139369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem getIncomingValueForBlock(IBB)); 2140369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 2141369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Builder.SetInsertPoint(IBB->getTerminator()); 214279c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem Builder.SetCurrentDebugLocation(PH->getDebugLoc()); 2143369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *Vec = vectorizeTree(Operands); 2144369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem NewPhi->addIncoming(Vec, IBB); 2145369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 2146805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem 2147369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem assert(NewPhi->getNumIncomingValues() == PH->getNumIncomingValues() && 2148369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem "Invalid number of incoming values"); 2149369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return NewPhi; 2150805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem } 2151805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem 2152369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::ExtractElement: { 2153369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (CanReuseExtract(E->Scalars)) { 2154369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *V = VL0->getOperand(0); 2155369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem E->VectorizedValue = V; 2156369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return V; 2157369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 2158369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return Gather(E->Scalars, VecTy); 215953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 2160369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::ZExt: 2161369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::SExt: 2162369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FPToUI: 2163369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FPToSI: 2164369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FPExt: 2165369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::PtrToInt: 2166369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::IntToPtr: 2167369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::SIToFP: 2168369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::UIToFP: 2169369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Trunc: 2170369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FPTrunc: 2171369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::BitCast: { 2172369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ValueList INVL; 2173369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (int i = 0, e = E->Scalars.size(); i < e; ++i) 2174369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem INVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0)); 2175369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 21764b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault setInsertPointAfterBundle(E->Scalars); 217779c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem 2178369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *InVec = vectorizeTree(INVL); 217962657090de3a5731bf644437701ccd78c247119fNadav Rotem 218062657090de3a5731bf644437701ccd78c247119fNadav Rotem if (Value *V = alreadyVectorized(E->Scalars)) 218162657090de3a5731bf644437701ccd78c247119fNadav Rotem return V; 218262657090de3a5731bf644437701ccd78c247119fNadav Rotem 2183369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem CastInst *CI = dyn_cast<CastInst>(VL0); 2184369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy); 2185369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem E->VectorizedValue = V; 218637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ++NumVectorInstructions; 2187369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return V; 218853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 2189369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FCmp: 2190369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::ICmp: { 2191369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ValueList LHSV, RHSV; 2192369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (int i = 0, e = E->Scalars.size(); i < e; ++i) { 2193369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem LHSV.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0)); 2194369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem RHSV.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1)); 2195369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 219653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 21974b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault setInsertPointAfterBundle(E->Scalars); 219879c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem 2199369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *L = vectorizeTree(LHSV); 2200369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *R = vectorizeTree(RHSV); 220162657090de3a5731bf644437701ccd78c247119fNadav Rotem 220262657090de3a5731bf644437701ccd78c247119fNadav Rotem if (Value *V = alreadyVectorized(E->Scalars)) 220362657090de3a5731bf644437701ccd78c247119fNadav Rotem return V; 220453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 22050c7f116bb6950ef819323d855415b2f2b0aad987Pirama Arumuga Nainar CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate(); 220662657090de3a5731bf644437701ccd78c247119fNadav Rotem Value *V; 2207369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (Opcode == Instruction::FCmp) 2208369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem V = Builder.CreateFCmp(P0, L, R); 2209369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem else 2210369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem V = Builder.CreateICmp(P0, L, R); 221153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 2212369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem E->VectorizedValue = V; 221337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ++NumVectorInstructions; 2214369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return V; 221553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 2216369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Select: { 2217369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ValueList TrueVec, FalseVec, CondVec; 2218369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (int i = 0, e = E->Scalars.size(); i < e; ++i) { 2219369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem CondVec.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0)); 2220369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem TrueVec.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1)); 2221369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem FalseVec.push_back(cast<Instruction>(E->Scalars[i])->getOperand(2)); 2222369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 222353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 22244b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault setInsertPointAfterBundle(E->Scalars); 222579c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem 2226369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *Cond = vectorizeTree(CondVec); 2227369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *True = vectorizeTree(TrueVec); 2228369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *False = vectorizeTree(FalseVec); 222962657090de3a5731bf644437701ccd78c247119fNadav Rotem 223062657090de3a5731bf644437701ccd78c247119fNadav Rotem if (Value *V = alreadyVectorized(E->Scalars)) 223162657090de3a5731bf644437701ccd78c247119fNadav Rotem return V; 223257aa3aad33b50583d5a82735777d0f0dc03ff122Matt Arsenault 2233369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *V = Builder.CreateSelect(Cond, True, False); 2234369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem E->VectorizedValue = V; 223537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ++NumVectorInstructions; 2236369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return V; 223753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 2238369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Add: 2239369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FAdd: 2240369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Sub: 2241369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FSub: 2242369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Mul: 2243369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FMul: 2244369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::UDiv: 2245369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::SDiv: 2246369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FDiv: 2247369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::URem: 2248369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::SRem: 2249369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FRem: 2250369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Shl: 2251369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::LShr: 2252369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::AShr: 2253369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::And: 2254369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Or: 2255369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Xor: { 2256369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ValueList LHSVL, RHSVL; 2257af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) 2258af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer reorderInputsAccordingToOpcode(E->Scalars, LHSVL, RHSVL); 2259af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer else 2260af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer for (int i = 0, e = E->Scalars.size(); i < e; ++i) { 2261af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0)); 2262af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1)); 2263af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer } 226453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 22654b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault setInsertPointAfterBundle(E->Scalars); 226679c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem 2267369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *LHS = vectorizeTree(LHSVL); 2268369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *RHS = vectorizeTree(RHSVL); 226953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 2270369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (LHS == RHS && isa<Instruction>(LHS)) { 2271369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem assert((VL0->getOperand(0) == VL0->getOperand(1)) && "Invalid order"); 2272369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 227353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 227462657090de3a5731bf644437701ccd78c247119fNadav Rotem if (Value *V = alreadyVectorized(E->Scalars)) 227562657090de3a5731bf644437701ccd78c247119fNadav Rotem return V; 227662657090de3a5731bf644437701ccd78c247119fNadav Rotem 2277369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem BinaryOperator *BinOp = cast<BinaryOperator>(VL0); 2278369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS, RHS); 2279369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem E->VectorizedValue = V; 228037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines propagateIRFlags(E->VectorizedValue, E->Scalars); 228137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ++NumVectorInstructions; 2282fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling 2283fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling if (Instruction *I = dyn_cast<Instruction>(V)) 2284fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling return propagateMetadata(I, E->Scalars); 2285fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling 2286369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return V; 2287369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 2288369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Load: { 2289369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Loads are inserted at the head of the tree because we don't want to 2290369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // sink them all the way down past store instructions. 22914b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault setInsertPointAfterBundle(E->Scalars); 229279c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem 2293369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem LoadInst *LI = cast<LoadInst>(VL0); 229437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Type *ScalarLoadTy = LI->getType(); 22959e93ba225f9f4d0cfab175b73d2a744d01104451Matt Arsenault unsigned AS = LI->getPointerAddressSpace(); 22969e93ba225f9f4d0cfab175b73d2a744d01104451Matt Arsenault 22979e93ba225f9f4d0cfab175b73d2a744d01104451Matt Arsenault Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(), 22989e93ba225f9f4d0cfab175b73d2a744d01104451Matt Arsenault VecTy->getPointerTo(AS)); 229937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 230037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // The pointer operand uses an in-tree scalar so we add the new BitCast to 230137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // ExternalUses list to make sure that an extract will be generated in the 230237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // future. 230337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (ScalarToTreeEntry.count(LI->getPointerOperand())) 230437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ExternalUses.push_back( 230537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ExternalUser(LI->getPointerOperand(), cast<User>(VecPtr), 0)); 230637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 2307369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem unsigned Alignment = LI->getAlignment(); 2308369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem LI = Builder.CreateLoad(VecPtr); 23094c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar if (!Alignment) { 23104c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar Alignment = DL.getABITypeAlignment(ScalarLoadTy); 23114c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar } 2312369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem LI->setAlignment(Alignment); 2313369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem E->VectorizedValue = LI; 231437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ++NumVectorInstructions; 2315fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling return propagateMetadata(LI, E->Scalars); 2316369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 2317369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Store: { 2318369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem StoreInst *SI = cast<StoreInst>(VL0); 2319369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem unsigned Alignment = SI->getAlignment(); 23209e93ba225f9f4d0cfab175b73d2a744d01104451Matt Arsenault unsigned AS = SI->getPointerAddressSpace(); 2321369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 2322369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ValueList ValueOp; 2323369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (int i = 0, e = E->Scalars.size(); i < e; ++i) 2324369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ValueOp.push_back(cast<StoreInst>(E->Scalars[i])->getValueOperand()); 2325369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 23264b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault setInsertPointAfterBundle(E->Scalars); 232779c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem 2328369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *VecValue = vectorizeTree(ValueOp); 23299e93ba225f9f4d0cfab175b73d2a744d01104451Matt Arsenault Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(), 23309e93ba225f9f4d0cfab175b73d2a744d01104451Matt Arsenault VecTy->getPointerTo(AS)); 2331369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem StoreInst *S = Builder.CreateStore(VecValue, VecPtr); 233237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 233337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // The pointer operand uses an in-tree scalar so we add the new BitCast to 233437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // ExternalUses list to make sure that an extract will be generated in the 233537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // future. 233637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (ScalarToTreeEntry.count(SI->getPointerOperand())) 233737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ExternalUses.push_back( 233837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ExternalUser(SI->getPointerOperand(), cast<User>(VecPtr), 0)); 233937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 23404c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar if (!Alignment) { 23414c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar Alignment = DL.getABITypeAlignment(SI->getValueOperand()->getType()); 23424c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar } 2343369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem S->setAlignment(Alignment); 2344369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem E->VectorizedValue = S; 234537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ++NumVectorInstructions; 2346fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling return propagateMetadata(S, E->Scalars); 2347369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 2348c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case Instruction::GetElementPtr: { 2349c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines setInsertPointAfterBundle(E->Scalars); 2350c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2351c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines ValueList Op0VL; 2352c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines for (int i = 0, e = E->Scalars.size(); i < e; ++i) 2353c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Op0VL.push_back(cast<GetElementPtrInst>(E->Scalars[i])->getOperand(0)); 2354c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2355c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Value *Op0 = vectorizeTree(Op0VL); 2356c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2357c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines std::vector<Value *> OpVecs; 2358c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines for (int j = 1, e = cast<GetElementPtrInst>(VL0)->getNumOperands(); j < e; 2359c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines ++j) { 2360c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines ValueList OpVL; 2361c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines for (int i = 0, e = E->Scalars.size(); i < e; ++i) 2362c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines OpVL.push_back(cast<GetElementPtrInst>(E->Scalars[i])->getOperand(j)); 2363c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2364c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Value *OpVec = vectorizeTree(OpVL); 2365c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines OpVecs.push_back(OpVec); 2366c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 2367c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 23684c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar Value *V = Builder.CreateGEP( 23694c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar cast<GetElementPtrInst>(VL0)->getSourceElementType(), Op0, OpVecs); 2370c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines E->VectorizedValue = V; 237137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ++NumVectorInstructions; 2372c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2373c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (Instruction *I = dyn_cast<Instruction>(V)) 2374c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return propagateMetadata(I, E->Scalars); 2375c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2376c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return V; 2377c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 237836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case Instruction::Call: { 237936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines CallInst *CI = cast<CallInst>(VL0); 238036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines setInsertPointAfterBundle(E->Scalars); 2381c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Function *FI; 2382c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Intrinsic::ID IID = Intrinsic::not_intrinsic; 238337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Value *ScalarArg = nullptr; 2384c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (CI && (FI = CI->getCalledFunction())) { 2385c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines IID = (Intrinsic::ID) FI->getIntrinsicID(); 2386c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 238736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines std::vector<Value *> OpVecs; 238836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) { 238936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ValueList OpVL; 2390c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // ctlz,cttz and powi are special intrinsics whose second argument is 2391c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // a scalar. This argument should not be vectorized. 2392c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (hasVectorInstrinsicScalarOpd(IID, 1) && j == 1) { 2393c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines CallInst *CEI = cast<CallInst>(E->Scalars[0]); 239437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScalarArg = CEI->getArgOperand(j); 2395c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines OpVecs.push_back(CEI->getArgOperand(j)); 2396c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines continue; 2397c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 239836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (int i = 0, e = E->Scalars.size(); i < e; ++i) { 239936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines CallInst *CEI = cast<CallInst>(E->Scalars[i]); 240036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines OpVL.push_back(CEI->getArgOperand(j)); 240136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 240236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 240336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Value *OpVec = vectorizeTree(OpVL); 240436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << "SLP: OpVec[" << j << "]: " << *OpVec << "\n"); 240536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines OpVecs.push_back(OpVec); 240636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 240736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 240836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Module *M = F->getParent(); 2409dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI); 241036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Type *Tys[] = { VectorType::get(CI->getType(), E->Scalars.size()) }; 241136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Function *CF = Intrinsic::getDeclaration(M, ID, Tys); 241236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Value *V = Builder.CreateCall(CF, OpVecs); 241337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 241437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // The scalar argument uses an in-tree scalar so we add the new vectorized 241537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // call to ExternalUses list to make sure that an extract will be 241637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // generated in the future. 241737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (ScalarArg && ScalarToTreeEntry.count(ScalarArg)) 241837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ExternalUses.push_back(ExternalUser(ScalarArg, cast<User>(V), 0)); 241937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 242036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines E->VectorizedValue = V; 242137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ++NumVectorInstructions; 242236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return V; 242336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 2424c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case Instruction::ShuffleVector: { 2425c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines ValueList LHSVL, RHSVL; 2426ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines assert(isa<BinaryOperator>(VL0) && "Invalid Shuffle Vector Operand"); 2427ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines reorderAltShuffleOperands(E->Scalars, LHSVL, RHSVL); 2428c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines setInsertPointAfterBundle(E->Scalars); 2429c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2430c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Value *LHS = vectorizeTree(LHSVL); 2431c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Value *RHS = vectorizeTree(RHSVL); 2432c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2433c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (Value *V = alreadyVectorized(E->Scalars)) 2434c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return V; 2435c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2436c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // Create a vector of LHS op1 RHS 2437c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines BinaryOperator *BinOp0 = cast<BinaryOperator>(VL0); 2438c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Value *V0 = Builder.CreateBinOp(BinOp0->getOpcode(), LHS, RHS); 2439c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2440c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // Create a vector of LHS op2 RHS 2441c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Instruction *VL1 = cast<Instruction>(E->Scalars[1]); 2442c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines BinaryOperator *BinOp1 = cast<BinaryOperator>(VL1); 2443c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Value *V1 = Builder.CreateBinOp(BinOp1->getOpcode(), LHS, RHS); 2444c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 244537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Create shuffle to take alternate operations from the vector. 244637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Also, gather up odd and even scalar ops to propagate IR flags to 244737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // each vector operation. 244837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ValueList OddScalars, EvenScalars; 2449c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines unsigned e = E->Scalars.size(); 245037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SmallVector<Constant *, 8> Mask(e); 2451c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines for (unsigned i = 0; i < e; ++i) { 245237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (i & 1) { 2453c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Mask[i] = Builder.getInt32(e + i); 245437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines OddScalars.push_back(E->Scalars[i]); 245537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } else { 2456c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Mask[i] = Builder.getInt32(i); 245737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines EvenScalars.push_back(E->Scalars[i]); 245837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 2459c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 2460c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2461c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Value *ShuffleMask = ConstantVector::get(Mask); 246237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines propagateIRFlags(V0, EvenScalars); 246337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines propagateIRFlags(V1, OddScalars); 2464c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2465c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Value *V = Builder.CreateShuffleVector(V0, V1, ShuffleMask); 2466c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines E->VectorizedValue = V; 246737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ++NumVectorInstructions; 2468c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (Instruction *I = dyn_cast<Instruction>(V)) 2469c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return propagateMetadata(I, E->Scalars); 2470c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2471c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return V; 2472c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 2473369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem default: 2474369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem llvm_unreachable("unknown inst"); 247553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 2476dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return nullptr; 2477369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 247853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 2479a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold SchwaighoferValue *BoUpSLP::vectorizeTree() { 248037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 248137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // All blocks must be scheduled before any instructions are inserted. 248237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (auto &BSIter : BlocksSchedules) { 248337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines scheduleBlock(BSIter.second.get()); 248437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 248537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 248629b741942807bc4c0441d98f1330b70446794b88Nadav Rotem Builder.SetInsertPoint(F->getEntryBlock().begin()); 2487369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem vectorizeTree(&VectorizableTree[0]); 248853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 2489a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size() << " values .\n"); 2490a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 2491a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem // Extract all of the elements with the external uses. 2492a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem for (UserList::iterator it = ExternalUses.begin(), e = ExternalUses.end(); 2493a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem it != e; ++it) { 2494a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem Value *Scalar = it->Scalar; 2495a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem llvm::User *User = it->User; 2496523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem 2497523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem // Skip users that we already RAUW. This happens when one instruction 2498523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem // has multiple uses of the same value. 249936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (std::find(Scalar->user_begin(), Scalar->user_end(), User) == 250036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Scalar->user_end()) 2501a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem continue; 2502a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem assert(ScalarToTreeEntry.count(Scalar) && "Invalid scalar"); 2503a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 2504a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem int Idx = ScalarToTreeEntry[Scalar]; 2505a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem TreeEntry *E = &VectorizableTree[Idx]; 2506a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem assert(!E->NeedToGather && "Extracting from a gather list"); 2507a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 2508a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem Value *Vec = E->VectorizedValue; 2509a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem assert(Vec && "Can't find vectorizable value"); 2510a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 2511f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem Value *Lane = Builder.getInt32(it->Lane); 2512a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem // Generate extracts for out-of-tree users. 2513a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem // Find the insertion point for the extractelement lane. 251436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (isa<Instruction>(Vec)){ 2515523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem if (PHINode *PH = dyn_cast<PHINode>(User)) { 2516523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem for (int i = 0, e = PH->getNumIncomingValues(); i != e; ++i) { 2517523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem if (PH->getIncomingValue(i) == Scalar) { 2518f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator()); 2519f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem Value *Ex = Builder.CreateExtractElement(Vec, Lane); 2520a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling CSEBlocks.insert(PH->getIncomingBlock(i)); 2521f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem PH->setOperand(i, Ex); 2522523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem } 2523523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem } 2524523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem } else { 2525f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem Builder.SetInsertPoint(cast<Instruction>(User)); 2526f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem Value *Ex = Builder.CreateExtractElement(Vec, Lane); 2527a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling CSEBlocks.insert(cast<Instruction>(User)->getParent()); 2528f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem User->replaceUsesOfWith(Scalar, Ex); 2529523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem } 2530a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem } else { 2531f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem Builder.SetInsertPoint(F->getEntryBlock().begin()); 2532f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem Value *Ex = Builder.CreateExtractElement(Vec, Lane); 2533a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling CSEBlocks.insert(&F->getEntryBlock()); 2534f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem User->replaceUsesOfWith(Scalar, Ex); 2535a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem } 2536a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 2537a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem DEBUG(dbgs() << "SLP: Replaced:" << *User << ".\n"); 2538a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem } 2539a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 2540369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // For each vectorized value: 2541369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (int EIdx = 0, EE = VectorizableTree.size(); EIdx < EE; ++EIdx) { 2542369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem TreeEntry *Entry = &VectorizableTree[EIdx]; 254353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 2544369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // For each lane: 2545369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) { 2546369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *Scalar = Entry->Scalars[Lane]; 2547369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // No need to handle users of gathered values. 2548369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (Entry->NeedToGather) 2549369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem continue; 255053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 2551ace9ed50b549667eff8e19eb76f7714a3a6161aeNadav Rotem assert(Entry->VectorizedValue && "Can't find vectorizable value"); 2552ac2678684689448d1c88f644f3fa88aa4f93997cNadav Rotem 2553369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Type *Ty = Scalar->getType(); 2554369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (!Ty->isVoidTy()) { 255536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#ifndef NDEBUG 255636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (User *U : Scalar->users()) { 255736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n"); 2558a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 255936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines assert((ScalarToTreeEntry.count(U) || 2560dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // It is legal to replace users in the ignorelist by undef. 2561dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), U) != 2562dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines UserIgnoreList.end())) && 2563369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem "Replacing out-of-tree value with undef"); 2564369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 256536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#endif 2566369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *Undef = UndefValue::get(Ty); 2567369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Scalar->replaceAllUsesWith(Undef); 2568369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 2569369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n"); 2570ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines eraseInstruction(cast<Instruction>(Scalar)); 2571ac2678684689448d1c88f644f3fa88aa4f93997cNadav Rotem } 2572ac2678684689448d1c88f644f3fa88aa4f93997cNadav Rotem } 2573ac2678684689448d1c88f644f3fa88aa4f93997cNadav Rotem 2574c7ffbc019fdc6ae5265f1841eaabae34e301f59bNadav Rotem Builder.ClearInsertionPoint(); 2575a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 2576a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return VectorizableTree[0].VectorizedValue; 257753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem} 257853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 2579369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemvoid BoUpSLP::optimizeGatherSequence() { 2580369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size() 2581369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem << " gather sequences instructions.\n"); 25826959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem // LICM InsertElementInst sequences. 258353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem for (SetVector<Instruction *>::iterator it = GatherSeq.begin(), 25846959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem e = GatherSeq.end(); it != e; ++it) { 25856959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem InsertElementInst *Insert = dyn_cast<InsertElementInst>(*it); 258653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 258753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem if (!Insert) 258853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem continue; 258953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 259053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem // Check if this block is inside a loop. 25916959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem Loop *L = LI->getLoopFor(Insert->getParent()); 259253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem if (!L) 25936959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem continue; 259453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 259553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem // Check if it has a preheader. 259653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem BasicBlock *PreHeader = L->getLoopPreheader(); 259753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem if (!PreHeader) 259829acf7e03af9b5524daa1e7523e0296cc766ff24Nadav Rotem continue; 259953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 260053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem // If the vector or the element that we insert into it are 260153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem // instructions that are defined in this basic block then we can't 260253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem // hoist this instruction. 260353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem Instruction *CurrVec = dyn_cast<Instruction>(Insert->getOperand(0)); 260453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem Instruction *NewElem = dyn_cast<Instruction>(Insert->getOperand(1)); 260553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem if (CurrVec && L->contains(CurrVec)) 260653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem continue; 260753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem if (NewElem && L->contains(NewElem)) 260853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem continue; 260953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 261053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem // We can hoist this instruction. Move it to the pre-header. 26116959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem Insert->moveBefore(PreHeader->getTerminator()); 26126959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem } 26136959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem 2614dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Make a list of all reachable blocks in our CSE queue. 2615dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines SmallVector<const DomTreeNode *, 8> CSEWorkList; 2616dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines CSEWorkList.reserve(CSEBlocks.size()); 2617dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines for (BasicBlock *BB : CSEBlocks) 2618dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (DomTreeNode *N = DT->getNode(BB)) { 2619dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines assert(DT->isReachableFromEntry(N)); 2620dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines CSEWorkList.push_back(N); 2621dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 2622dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 26230c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer // Sort blocks by domination. This ensures we visit a block after all blocks 26240c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer // dominating it are visited. 262536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines std::stable_sort(CSEWorkList.begin(), CSEWorkList.end(), 2626dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines [this](const DomTreeNode *A, const DomTreeNode *B) { 262736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return DT->properlyDominates(A, B); 262836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines }); 26290c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer 26306959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem // Perform O(N^2) search over the gather sequences and merge identical 26316959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem // instructions. TODO: We can further optimize this scan if we split the 26326959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem // instructions into different buckets based on the insert lane. 26330c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer SmallVector<Instruction *, 16> Visited; 2634dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines for (auto I = CSEWorkList.begin(), E = CSEWorkList.end(); I != E; ++I) { 263536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines assert((I == CSEWorkList.begin() || !DT->dominates(*I, *std::prev(I))) && 26360c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer "Worklist not sorted properly!"); 2637dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines BasicBlock *BB = (*I)->getBlock(); 26380c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer // For all instructions in blocks containing gather sequences: 26390c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) { 26400c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer Instruction *In = it++; 2641a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In)) 26426959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem continue; 26436959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem 264429acf7e03af9b5524daa1e7523e0296cc766ff24Nadav Rotem // Check if we can replace this instruction with any of the 264529acf7e03af9b5524daa1e7523e0296cc766ff24Nadav Rotem // visited instructions. 26460c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer for (SmallVectorImpl<Instruction *>::iterator v = Visited.begin(), 26470c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer ve = Visited.end(); 26480c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer v != ve; ++v) { 2649523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem if (In->isIdenticalTo(*v) && 2650523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem DT->dominates((*v)->getParent(), In->getParent())) { 2651523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem In->replaceAllUsesWith(*v); 2652ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines eraseInstruction(In); 2653dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines In = nullptr; 26546959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem break; 26556959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem } 26566959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem } 26570c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer if (In) { 26580c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer assert(std::find(Visited.begin(), Visited.end(), In) == Visited.end()); 26590c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer Visited.push_back(In); 26600c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer } 26616959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem } 266253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 2663a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling CSEBlocks.clear(); 2664a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling GatherSeq.clear(); 266553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem} 266653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 266737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// Groups the instructions to a bundle (which is then a single scheduling entity) 266837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// and schedules instructions until the bundle gets ready. 266937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesbool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, 2670ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines BoUpSLP *SLP) { 267137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (isa<PHINode>(VL[0])) 267237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return true; 267337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 267437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Initialize the instruction bundle. 267537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Instruction *OldScheduleEnd = ScheduleEnd; 267637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *PrevInBundle = nullptr; 267737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *Bundle = nullptr; 267837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines bool ReSchedule = false; 267937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: bundle: " << *VL[0] << "\n"); 268037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (Value *V : VL) { 268137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines extendSchedulingRegion(V); 268237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *BundleMember = getScheduleData(V); 268337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(BundleMember && 268437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines "no ScheduleData for bundle member (maybe not in same basic block)"); 268537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (BundleMember->IsScheduled) { 268637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // A bundle member was scheduled as single instruction before and now 268737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // needs to be scheduled as part of the bundle. We just get rid of the 268837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // existing schedule. 268937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: reset schedule because " << *BundleMember 269037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines << " was already scheduled\n"); 269137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ReSchedule = true; 269237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 269337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(BundleMember->isSchedulingEntity() && 269437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines "bundle member already part of other bundle"); 269537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (PrevInBundle) { 269637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines PrevInBundle->NextInBundle = BundleMember; 269737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } else { 269837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Bundle = BundleMember; 269937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 270037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember->UnscheduledDepsInBundle = 0; 270137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Bundle->UnscheduledDepsInBundle += BundleMember->UnscheduledDeps; 270237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 270337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Group the instructions to a bundle. 270437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember->FirstInBundle = Bundle; 270537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines PrevInBundle = BundleMember; 270637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 270737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (ScheduleEnd != OldScheduleEnd) { 270837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // The scheduling region got new instructions at the lower end (or it is a 270937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // new region for the first bundle). This makes it necessary to 271037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // recalculate all dependencies. 271137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // It is seldom that this needs to be done a second time after adding the 271237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // initial bundle to the region. 271337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) { 271437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *SD = getScheduleData(I); 271537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SD->clearDependencies(); 271637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 271737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ReSchedule = true; 271837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 271937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (ReSchedule) { 272037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines resetSchedule(); 272137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines initialFillReadyList(ReadyInsts); 272237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 272337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 272437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle << " in block " 272537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines << BB->getName() << "\n"); 272637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 2727ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines calculateDependencies(Bundle, true, SLP); 272837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 272937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Now try to schedule the new bundle. As soon as the bundle is "ready" it 273037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // means that there are no cyclic dependencies and we can schedule it. 273137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Note that's important that we don't "schedule" the bundle yet (see 273237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // cancelScheduling). 273337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines while (!Bundle->isReady() && !ReadyInsts.empty()) { 273437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 273537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *pickedSD = ReadyInsts.back(); 273637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ReadyInsts.pop_back(); 273737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 273837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (pickedSD->isSchedulingEntity() && pickedSD->isReady()) { 273937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines schedule(pickedSD, ReadyInsts); 274037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 274137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 274237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return Bundle->isReady(); 274337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 274437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 274537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesvoid BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL) { 274637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (isa<PHINode>(VL[0])) 274737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return; 274837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 274937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *Bundle = getScheduleData(VL[0]); 275037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: cancel scheduling of " << *Bundle << "\n"); 275137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(!Bundle->IsScheduled && 275237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines "Can't cancel bundle which is already scheduled"); 275337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(Bundle->isSchedulingEntity() && Bundle->isPartOfBundle() && 275437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines "tried to unbundle something which is not a bundle"); 275537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 275637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Un-bundle: make single instructions out of the bundle. 275737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *BundleMember = Bundle; 275837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines while (BundleMember) { 275937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(BundleMember->FirstInBundle == Bundle && "corrupt bundle links"); 276037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember->FirstInBundle = BundleMember; 276137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *Next = BundleMember->NextInBundle; 276237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember->NextInBundle = nullptr; 276337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember->UnscheduledDepsInBundle = BundleMember->UnscheduledDeps; 276437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (BundleMember->UnscheduledDepsInBundle == 0) { 276537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ReadyInsts.insert(BundleMember); 276637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 276737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember = Next; 276837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 276937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 277037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 277137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesvoid BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) { 277237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (getScheduleData(V)) 277337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return; 277437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Instruction *I = dyn_cast<Instruction>(V); 277537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(I && "bundle member must be an instruction"); 277637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(!isa<PHINode>(I) && "phi nodes don't need to be scheduled"); 277737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!ScheduleStart) { 277837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // It's the first instruction in the new region. 277937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines initScheduleData(I, I->getNextNode(), nullptr, nullptr); 278037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleStart = I; 278137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleEnd = I->getNextNode(); 278237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(ScheduleEnd && "tried to vectorize a TerminatorInst?"); 278337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: initialize schedule region to " << *I << "\n"); 278437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return; 278537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 278637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Search up and down at the same time, because we don't know if the new 278737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // instruction is above or below the existing scheduling region. 278837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BasicBlock::reverse_iterator UpIter(ScheduleStart); 278937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BasicBlock::reverse_iterator UpperEnd = BB->rend(); 279037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BasicBlock::iterator DownIter(ScheduleEnd); 279137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BasicBlock::iterator LowerEnd = BB->end(); 279237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (;;) { 279337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (UpIter != UpperEnd) { 279437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (&*UpIter == I) { 279537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines initScheduleData(I, ScheduleStart, nullptr, FirstLoadStoreInRegion); 279637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleStart = I; 279737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: extend schedule region start to " << *I << "\n"); 279837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return; 279937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 280037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines UpIter++; 280137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 280237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (DownIter != LowerEnd) { 280337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (&*DownIter == I) { 280437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines initScheduleData(ScheduleEnd, I->getNextNode(), LastLoadStoreInRegion, 280537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines nullptr); 280637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleEnd = I->getNextNode(); 280737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(ScheduleEnd && "tried to vectorize a TerminatorInst?"); 280837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: extend schedule region end to " << *I << "\n"); 280937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return; 281037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 281137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DownIter++; 281237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 281337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert((UpIter != UpperEnd || DownIter != LowerEnd) && 281437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines "instruction not found in block"); 281537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 281637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 281737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 281837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesvoid BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI, 281937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Instruction *ToI, 282037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *PrevLoadStore, 282137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *NextLoadStore) { 282237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *CurrentLoadStore = PrevLoadStore; 282337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (Instruction *I = FromI; I != ToI; I = I->getNextNode()) { 282437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *SD = ScheduleDataMap[I]; 282537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!SD) { 282637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Allocate a new ScheduleData for the instruction. 282737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (ChunkPos >= ChunkSize) { 282837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleDataChunks.push_back( 282937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines llvm::make_unique<ScheduleData[]>(ChunkSize)); 283037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ChunkPos = 0; 283137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 283237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SD = &(ScheduleDataChunks.back()[ChunkPos++]); 283337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleDataMap[I] = SD; 283437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SD->Inst = I; 283537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 283637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(!isInSchedulingRegion(SD) && 283737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines "new ScheduleData already in scheduling region"); 283837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SD->init(SchedulingRegionID); 283937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 284037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (I->mayReadOrWriteMemory()) { 284137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Update the linked list of memory accessing instructions. 284237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (CurrentLoadStore) { 284337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines CurrentLoadStore->NextLoadStore = SD; 284437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } else { 284537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines FirstLoadStoreInRegion = SD; 284637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 284737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines CurrentLoadStore = SD; 284837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 284937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 285037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (NextLoadStore) { 285137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (CurrentLoadStore) 285237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines CurrentLoadStore->NextLoadStore = NextLoadStore; 285337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } else { 285437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines LastLoadStoreInRegion = CurrentLoadStore; 285537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 285637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 285737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 285837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesvoid BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD, 285937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines bool InsertInReadyList, 2860ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines BoUpSLP *SLP) { 286137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(SD->isSchedulingEntity()); 286237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 286337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SmallVector<ScheduleData *, 10> WorkList; 286437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines WorkList.push_back(SD); 286537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 286637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines while (!WorkList.empty()) { 286737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *SD = WorkList.back(); 286837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines WorkList.pop_back(); 286937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 287037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *BundleMember = SD; 287137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines while (BundleMember) { 287237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(isInSchedulingRegion(BundleMember)); 287337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!BundleMember->hasValidDependencies()) { 287437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 287537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: update deps of " << *BundleMember << "\n"); 287637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember->Dependencies = 0; 287737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember->resetUnscheduledDeps(); 287837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 287937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Handle def-use chain dependencies. 288037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (User *U : BundleMember->Inst->users()) { 288137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (isa<Instruction>(U)) { 288237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *UseSD = getScheduleData(U); 288337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) { 288437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember->Dependencies++; 288537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *DestBundle = UseSD->FirstInBundle; 288637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!DestBundle->IsScheduled) { 288737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember->incrementUnscheduledDeps(1); 288837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 288937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!DestBundle->hasValidDependencies()) { 289037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines WorkList.push_back(DestBundle); 289137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 289237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 289337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } else { 289437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // I'm not sure if this can ever happen. But we need to be safe. 289537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // This lets the instruction/bundle never be scheduled and eventally 289637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // disable vectorization. 289737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember->Dependencies++; 289837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember->incrementUnscheduledDeps(1); 289937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 290037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 290137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 290237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Handle the memory dependencies. 290337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *DepDest = BundleMember->NextLoadStore; 290437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (DepDest) { 2905ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Instruction *SrcInst = BundleMember->Inst; 2906ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines AliasAnalysis::Location SrcLoc = getLocation(SrcInst, SLP->AA); 290737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines bool SrcMayWrite = BundleMember->Inst->mayWriteToMemory(); 2908ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines unsigned numAliased = 0; 2909ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines unsigned DistToSrc = 1; 291037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 291137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines while (DepDest) { 291237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(isInSchedulingRegion(DepDest)); 2913ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 2914ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // We have two limits to reduce the complexity: 2915ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // 1) AliasedCheckLimit: It's a small limit to reduce calls to 2916ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // SLP->isAliased (which is the expensive part in this loop). 2917ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // 2) MaxMemDepDistance: It's for very large blocks and it aborts 2918ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // the whole loop (even if the loop is fast, it's quadratic). 2919ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // It's important for the loop break condition (see below) to 2920ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // check this limit even between two read-only instructions. 2921ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (DistToSrc >= MaxMemDepDistance || 2922ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines ((SrcMayWrite || DepDest->Inst->mayWriteToMemory()) && 2923ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines (numAliased >= AliasedCheckLimit || 2924ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines SLP->isAliased(SrcLoc, SrcInst, DepDest->Inst)))) { 2925ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 2926ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // We increment the counter only if the locations are aliased 2927ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // (instead of counting all alias checks). This gives a better 2928ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // balance between reduced runtime and accurate dependencies. 2929ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines numAliased++; 2930ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 2931ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines DepDest->MemoryDependencies.push_back(BundleMember); 2932ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines BundleMember->Dependencies++; 2933ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines ScheduleData *DestBundle = DepDest->FirstInBundle; 2934ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (!DestBundle->IsScheduled) { 2935ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines BundleMember->incrementUnscheduledDeps(1); 2936ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 2937ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (!DestBundle->hasValidDependencies()) { 2938ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines WorkList.push_back(DestBundle); 293937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 294037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 294137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DepDest = DepDest->NextLoadStore; 2942ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 2943ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Example, explaining the loop break condition: Let's assume our 2944ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // starting instruction is i0 and MaxMemDepDistance = 3. 2945ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // 2946ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // +--------v--v--v 2947ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // i0,i1,i2,i3,i4,i5,i6,i7,i8 2948ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // +--------^--^--^ 2949ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // 2950ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // MaxMemDepDistance let us stop alias-checking at i3 and we add 2951ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // dependencies from i0 to i3,i4,.. (even if they are not aliased). 2952ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Previously we already added dependencies from i3 to i6,i7,i8 2953ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // (because of MaxMemDepDistance). As we added a dependency from 2954ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // i0 to i3, we have transitive dependencies from i0 to i6,i7,i8 2955ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // and we can abort this loop at i6. 2956ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (DistToSrc >= 2 * MaxMemDepDistance) 2957ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines break; 2958ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines DistToSrc++; 295937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 296037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 296137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 296237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember = BundleMember->NextInBundle; 296337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 296437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (InsertInReadyList && SD->isReady()) { 296537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ReadyInsts.push_back(SD); 296637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: gets ready on update: " << *SD->Inst << "\n"); 296737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 296837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 296937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 297037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 297137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesvoid BoUpSLP::BlockScheduling::resetSchedule() { 297237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(ScheduleStart && 297337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines "tried to reset schedule on block which has not been scheduled"); 297437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (Instruction *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) { 297537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *SD = getScheduleData(I); 297637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(isInSchedulingRegion(SD)); 297737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SD->IsScheduled = false; 297837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SD->resetUnscheduledDeps(); 297937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 298037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ReadyInsts.clear(); 298137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 298237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 298337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesvoid BoUpSLP::scheduleBlock(BlockScheduling *BS) { 298437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 298537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!BS->ScheduleStart) 298637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return; 298737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 298837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: schedule block " << BS->BB->getName() << "\n"); 298937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 299037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS->resetSchedule(); 299137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 299237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // For the real scheduling we use a more sophisticated ready-list: it is 299337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // sorted by the original instruction location. This lets the final schedule 299437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // be as close as possible to the original instruction order. 299537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines struct ScheduleDataCompare { 299637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines bool operator()(ScheduleData *SD1, ScheduleData *SD2) { 299737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return SD2->SchedulingPriority < SD1->SchedulingPriority; 299837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 299937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines }; 300037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines std::set<ScheduleData *, ScheduleDataCompare> ReadyInsts; 300137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 300237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Ensure that all depencency data is updated and fill the ready-list with 300337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // initial instructions. 300437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int Idx = 0; 300537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int NumToSchedule = 0; 300637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (auto *I = BS->ScheduleStart; I != BS->ScheduleEnd; 300737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines I = I->getNextNode()) { 300837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *SD = BS->getScheduleData(I); 300937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert( 301037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SD->isPartOfBundle() == (ScalarToTreeEntry.count(SD->Inst) != 0) && 301137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines "scheduler and vectorizer have different opinion on what is a bundle"); 301237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SD->FirstInBundle->SchedulingPriority = Idx++; 301337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (SD->isSchedulingEntity()) { 3014ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines BS->calculateDependencies(SD, false, this); 301537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines NumToSchedule++; 301637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 301737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 301837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS->initialFillReadyList(ReadyInsts); 301937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 302037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Instruction *LastScheduledInst = BS->ScheduleEnd; 302137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 302237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Do the "real" scheduling. 302337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines while (!ReadyInsts.empty()) { 302437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *picked = *ReadyInsts.begin(); 302537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ReadyInsts.erase(ReadyInsts.begin()); 302637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 302737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Move the scheduled instruction(s) to their dedicated places, if not 302837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // there yet. 302937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *BundleMember = picked; 303037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines while (BundleMember) { 303137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Instruction *pickedInst = BundleMember->Inst; 303237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (LastScheduledInst->getNextNode() != pickedInst) { 303337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS->BB->getInstList().remove(pickedInst); 303437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS->BB->getInstList().insert(LastScheduledInst, pickedInst); 303537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 303637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines LastScheduledInst = pickedInst; 303737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember = BundleMember->NextInBundle; 303837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 303937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 304037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS->schedule(picked, ReadyInsts); 304137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines NumToSchedule--; 304237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 304337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(NumToSchedule == 0 && "could not schedule all instructions"); 304437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 304537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Avoid duplicate scheduling of the block. 304637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS->ScheduleStart = nullptr; 304737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 304837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 30498383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem/// The SLPVectorizer Pass. 3050e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotemstruct SLPVectorizer : public FunctionPass { 305153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem typedef SmallVector<StoreInst *, 8> StoreList; 305253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem typedef MapVector<Value *, StoreList> StoreListMap; 30538383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 30548383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem /// Pass identification, replacement for typeid 30558383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem static char ID; 30568383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 3057e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem explicit SLPVectorizer() : FunctionPass(ID) { 30588383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem initializeSLPVectorizerPass(*PassRegistry::getPassRegistry()); 30598383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem } 30608383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 30618383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem ScalarEvolution *SE; 30628383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem TargetTransformInfo *TTI; 3063dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines TargetLibraryInfo *TLI; 30648383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem AliasAnalysis *AA; 3065e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem LoopInfo *LI; 3066722b0a4d293b16eebaed94ae65d5f11743cbcea5Nadav Rotem DominatorTree *DT; 3067ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines AssumptionCache *AC; 3068e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem 306936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool runOnFunction(Function &F) override { 307036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (skipOptnoneFunction(F)) 307136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return false; 307236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 3073e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem SE = &getAnalysis<ScalarEvolution>(); 3074ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 3075ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 3076ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines TLI = TLIP ? &TLIP->getTLI() : nullptr; 3077e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem AA = &getAnalysis<AliasAnalysis>(); 3078ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 307936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 3080ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 3081e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem 3082e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem StoreRefs.clear(); 3083e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem bool Changed = false; 3084e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem 3085d072d1b2a36f6c6e1b4b513dc69e91b61ad8c21aRobert Lytton // If the target claims to have no vector registers don't attempt 3086d072d1b2a36f6c6e1b4b513dc69e91b61ad8c21aRobert Lytton // vectorization. 3087d072d1b2a36f6c6e1b4b513dc69e91b61ad8c21aRobert Lytton if (!TTI->getNumberOfRegisters(true)) 3088d072d1b2a36f6c6e1b4b513dc69e91b61ad8c21aRobert Lytton return false; 3089d072d1b2a36f6c6e1b4b513dc69e91b61ad8c21aRobert Lytton 30903202f6cdb9193fe5365462118f499f6e164a1738Nadav Rotem // Don't vectorize when the attribute NoImplicitFloat is used. 3091551dac1f62026ef32ad294d8c1cc5b545b05935aMatt Arsenault if (F.hasFnAttribute(Attribute::NoImplicitFloat)) 30923202f6cdb9193fe5365462118f499f6e164a1738Nadav Rotem return false; 30933202f6cdb9193fe5365462118f499f6e164a1738Nadav Rotem 30940b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n"); 309509ec4b21648700f9d4ef5bc90d732f90f32c930cNadav Rotem 309636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Use the bottom up slp vectorizer to construct chains that start with 3097dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // store instructions. 30984c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC); 3099ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 3100ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // A general note: the vectorizer must use BoUpSLP::eraseInstruction() to 3101ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // delete instructions. 310253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 310370d695801a2bd5eed4bd6ea05d40516e6e6fa276Nadav Rotem // Scan the blocks in the function in post order. 31040c7f116bb6950ef819323d855415b2f2b0aad987Pirama Arumuga Nainar for (auto BB : post_order(&F.getEntryBlock())) { 3105e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem // Vectorize trees that end at stores. 3106ef332b1ca1721be962c73e76b4c4e0e44ffaf5d9Nadav Rotem if (unsigned count = collectStores(BB, R)) { 3107d7e8cce287616c1cc4dcbab6a43328b01fbe7be4Nadav Rotem (void)count; 31080b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem DEBUG(dbgs() << "SLP: Found " << count << " stores to vectorize.\n"); 310953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem Changed |= vectorizeStoreChains(R); 3110e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem } 31116611eaa32f7941dd50a3ffe608f3f4a7665dbe91Nadav Rotem 31126611eaa32f7941dd50a3ffe608f3f4a7665dbe91Nadav Rotem // Vectorize trees that end at reductions. 31136611eaa32f7941dd50a3ffe608f3f4a7665dbe91Nadav Rotem Changed |= vectorizeChainsInBlock(BB, R); 3114e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem } 3115e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem 3116e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem if (Changed) { 31176959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem R.optimizeGatherSequence(); 31180b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem DEBUG(dbgs() << "SLP: vectorized \"" << F.getName() << "\"\n"); 3119e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem DEBUG(verifyFunction(F)); 3120e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem } 3121e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem return Changed; 3122e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem } 3123e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem 312436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines void getAnalysisUsage(AnalysisUsage &AU) const override { 3125e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem FunctionPass::getAnalysisUsage(AU); 3126ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines AU.addRequired<AssumptionCacheTracker>(); 3127e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem AU.addRequired<ScalarEvolution>(); 3128e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem AU.addRequired<AliasAnalysis>(); 3129ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines AU.addRequired<TargetTransformInfoWrapperPass>(); 3130ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines AU.addRequired<LoopInfoWrapperPass>(); 313136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines AU.addRequired<DominatorTreeWrapperPass>(); 3132ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines AU.addPreserved<LoopInfoWrapperPass>(); 313336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines AU.addPreserved<DominatorTreeWrapperPass>(); 3134d4a9ebc7341a1ed066fcdff8e7e4e9cbf1bc4368Nadav Rotem AU.setPreservesCFG(); 3135e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem } 3136e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem 3137e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotemprivate: 31388383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 31398383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem /// \brief Collect memory references and sort them according to their base 31408383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem /// object. We sort the stores to their base objects to reduce the cost of the 31418383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem /// quadratic search on the stores. TODO: We can further reduce this cost 31428383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem /// if we flush the chain creation every time we run into a memory barrier. 3143369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem unsigned collectStores(BasicBlock *BB, BoUpSLP &R); 31448383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 3145e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem /// \brief Try to vectorize a chain that starts at two arithmetic instrs. 3146369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R); 31478383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 3148931b861e3dcf966fde46d57683013e74736eb448Nadav Rotem /// \brief Try to vectorize a list of operands. 3149dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines /// \@param BuildVector A list of users to ignore for the purpose of 3150dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines /// scheduling and that don't need extracting. 3151d69d9f20bc3acee0fc233853745c1de015b541f2Nadav Rotem /// \returns true if a value was vectorized. 3152dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, 315337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ArrayRef<Value *> BuildVector = None, 315437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines bool allowReorder = false); 3155444e33e8987110c6669bc2d9b8efd768bb17faa1Nadav Rotem 3156e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem /// \brief Try to vectorize a chain that may start at the operands of \V; 3157369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem bool tryToVectorize(BinaryOperator *V, BoUpSLP &R); 31588383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 3159e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem /// \brief Vectorize the stores that were collected in StoreRefs. 3160369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem bool vectorizeStoreChains(BoUpSLP &R); 3161d69d9f20bc3acee0fc233853745c1de015b541f2Nadav Rotem 31625cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem /// \brief Scan the basic block and look for patterns that are likely to start 31635cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem /// a vectorization chain. 3164369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem bool vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R); 3165369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3166369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem bool vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold, 3167369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem BoUpSLP &R); 3168e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem 3169369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem bool vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold, 3170369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem BoUpSLP &R); 3171e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotemprivate: 3172e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem StoreListMap StoreRefs; 3173e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem}; 3174e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem 317536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines/// \brief Check that the Values in the slice in VL array are still existent in 3176dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling/// the WeakVH array. 3177dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling/// Vectorization of part of the VL array may cause later values in the VL array 3178dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling/// to become invalid. We track when this has happened in the WeakVH array. 31794c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainarstatic bool hasValueBeenRAUWed(ArrayRef<Value *> VL, ArrayRef<WeakVH> VH, 31804c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar unsigned SliceBegin, unsigned SliceSize) { 31814c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar VL = VL.slice(SliceBegin, SliceSize); 31824c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar VH = VH.slice(SliceBegin, SliceSize); 31834c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar return !std::equal(VL.begin(), VL.end(), VH.begin()); 3184dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling} 3185dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling 3186369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotembool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain, 3187369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem int CostThreshold, BoUpSLP &R) { 3188369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem unsigned ChainLen = Chain.size(); 3189369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen 3190369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem << "\n"); 3191369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType(); 31924c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar auto &DL = cast<StoreInst>(Chain[0])->getModule()->getDataLayout(); 31934c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar unsigned Sz = DL.getTypeSizeInBits(StoreTy); 3194369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem unsigned VF = MinVecRegSize / Sz; 3195369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3196369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (!isPowerOf2_32(Sz) || VF < 2) 3197369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return false; 3198369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 319936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Keep track of values that were deleted by vectorizing in the loop below. 3200dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling SmallVector<WeakVH, 8> TrackValues(Chain.begin(), Chain.end()); 3201dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling 3202369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem bool Changed = false; 3203369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Look for profitable vectorizable trees at all offsets, starting at zero. 3204369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 0, e = ChainLen; i < e; ++i) { 3205369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (i + VF > e) 3206369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem break; 3207dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling 3208dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling // Check that a previous iteration of this loop did not delete the Value. 3209dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling if (hasValueBeenRAUWed(Chain, TrackValues, i, VF)) 3210dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling continue; 3211dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling 3212369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i 3213369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem << "\n"); 3214369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ArrayRef<Value *> Operands = Chain.slice(i, VF); 3215369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3216369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem R.buildTree(Operands); 3217369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3218369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem int Cost = R.getTreeCost(); 3219369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3220369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n"); 3221369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (Cost < CostThreshold) { 3222369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n"); 3223369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem R.vectorizeTree(); 3224369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3225369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Move to the next bundle. 3226369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem i += VF - 1; 3227369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Changed = true; 3228369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 3229369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 3230369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 32318e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling return Changed; 3232369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 3233369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3234369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotembool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores, 323521508bf853354343266dbe6d830ff30bed006a68Nadav Rotem int costThreshold, BoUpSLP &R) { 32364c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar SetVector<StoreInst *> Heads, Tails; 32374c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain; 3238369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3239369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // We may run into multiple chains that merge into a single chain. We mark the 3240369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // stores that we vectorized so that we don't visit the same store twice. 3241369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem BoUpSLP::ValueSet VectorizedStores; 3242369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem bool Changed = false; 3243369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3244369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Do a quadratic search on all of the given stores and find 32456611eaa32f7941dd50a3ffe608f3f4a7665dbe91Nadav Rotem // all of the pairs of stores that follow each other. 324621508bf853354343266dbe6d830ff30bed006a68Nadav Rotem for (unsigned i = 0, e = Stores.size(); i < e; ++i) { 3247369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned j = 0; j < e; ++j) { 324868ccbf648ea544faad29115cdda929920739e154Nadav Rotem if (i == j) 3249369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem continue; 32504c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar const DataLayout &DL = Stores[i]->getModule()->getDataLayout(); 32514c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar if (R.isConsecutiveAccess(Stores[i], Stores[j], DL)) { 3252369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Tails.insert(Stores[j]); 3253369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Heads.insert(Stores[i]); 3254369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ConsecutiveChain[Stores[i]] = Stores[j]; 3255369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 3256369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 325721508bf853354343266dbe6d830ff30bed006a68Nadav Rotem } 3258369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3259369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // For stores that start but don't end a link in the chain: 32604c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end(); 3261369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem it != e; ++it) { 3262369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (Tails.count(*it)) 3263369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem continue; 3264369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3265369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // We found a store instr that starts a chain. Now follow the chain and try 3266369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // to vectorize it. 3267369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem BoUpSLP::ValueList Operands; 32684c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar StoreInst *I = *it; 3269369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Collect the chain into a list. 3270369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem while (Tails.count(I) || Heads.count(I)) { 3271369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (VectorizedStores.count(I)) 3272369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem break; 3273369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Operands.push_back(I); 3274369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Move to the next value in the chain. 3275369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem I = ConsecutiveChain[I]; 3276369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 3277369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3278369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem bool Vectorized = vectorizeStoreChain(Operands, costThreshold, R); 3279369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3280369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Mark the vectorized stores so that we don't vectorize them again. 3281369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (Vectorized) 3282369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem VectorizedStores.insert(Operands.begin(), Operands.end()); 3283369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Changed |= Vectorized; 3284369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 3285369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3286369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return Changed; 3287369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 3288369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3289369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3290369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemunsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) { 3291ef332b1ca1721be962c73e76b4c4e0e44ffaf5d9Nadav Rotem unsigned count = 0; 3292e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem StoreRefs.clear(); 32934c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar const DataLayout &DL = BB->getModule()->getDataLayout(); 3294e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { 3295e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem StoreInst *SI = dyn_cast<StoreInst>(it); 3296e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem if (!SI) 3297e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem continue; 3298e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem 3299fc1604ec7274e9b724ff9b2512c288a691167427Arnold Schwaighofer // Don't touch volatile stores. 3300fc1604ec7274e9b724ff9b2512c288a691167427Arnold Schwaighofer if (!SI->isSimple()) 3301fc1604ec7274e9b724ff9b2512c288a691167427Arnold Schwaighofer continue; 3302fc1604ec7274e9b724ff9b2512c288a691167427Arnold Schwaighofer 3303e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem // Check that the pointer points to scalars. 33044f38e16b89895b795ece58742195d0d95cbd4187Nadav Rotem Type *Ty = SI->getValueOperand()->getType(); 3305ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (!isValidElementType(Ty)) 3306dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines continue; 3307e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem 33086623d050c6f4351293bc1849e49bc0e37ec04596Benjamin Kramer // Find the base pointer. 33096623d050c6f4351293bc1849e49bc0e37ec04596Benjamin Kramer Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), DL); 3310e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem 3311e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem // Save the store locations. 3312e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem StoreRefs[Ptr].push_back(SI); 3313ef332b1ca1721be962c73e76b4c4e0e44ffaf5d9Nadav Rotem count++; 33148383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem } 3315ef332b1ca1721be962c73e76b4c4e0e44ffaf5d9Nadav Rotem return count; 3316e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem} 3317e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem 3318369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotembool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) { 33190b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem if (!A || !B) 33200b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem return false; 33216fe5cc49d88c9dd48a1eefe4c1bdba1567b8eef2Benjamin Kramer Value *VL[] = { A, B }; 332237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return tryToVectorizeList(VL, R, None, true); 3323444e33e8987110c6669bc2d9b8efd768bb17faa1Nadav Rotem} 3324444e33e8987110c6669bc2d9b8efd768bb17faa1Nadav Rotem 3325dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesbool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, 332637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ArrayRef<Value *> BuildVector, 332737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines bool allowReorder) { 33285cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem if (VL.size() < 2) 33295cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem return false; 33305cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem 33310b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem DEBUG(dbgs() << "SLP: Vectorizing a list of length = " << VL.size() << ".\n"); 33324f38e16b89895b795ece58742195d0d95cbd4187Nadav Rotem 33335cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem // Check that all of the parts are scalar instructions of the same type. 33345cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem Instruction *I0 = dyn_cast<Instruction>(VL[0]); 33350b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem if (!I0) 333689008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang return false; 33375cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem 33385cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem unsigned Opcode0 = I0->getOpcode(); 33394c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar const DataLayout &DL = I0->getModule()->getDataLayout(); 33408e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling 334189008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang Type *Ty0 = I0->getType(); 33424c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar unsigned Sz = DL.getTypeSizeInBits(Ty0); 334389008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang unsigned VF = MinVecRegSize / Sz; 33445cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem 33454f38e16b89895b795ece58742195d0d95cbd4187Nadav Rotem for (int i = 0, e = VL.size(); i < e; ++i) { 33464f38e16b89895b795ece58742195d0d95cbd4187Nadav Rotem Type *Ty = VL[i]->getType(); 3347ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (!isValidElementType(Ty)) 334889008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang return false; 33495cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem Instruction *Inst = dyn_cast<Instruction>(VL[i]); 33505cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem if (!Inst || Inst->getOpcode() != Opcode0) 335189008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang return false; 33524f38e16b89895b795ece58742195d0d95cbd4187Nadav Rotem } 33534f38e16b89895b795ece58742195d0d95cbd4187Nadav Rotem 335489008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang bool Changed = false; 33558e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling 3356dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Keep track of values that were deleted by vectorizing in the loop below. 3357dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end()); 3358dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling 335989008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang for (unsigned i = 0, e = VL.size(); i < e; ++i) { 336089008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang unsigned OpsWidth = 0; 33618e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling 33628e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling if (i + VF > e) 336389008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang OpsWidth = e - i; 336489008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang else 336589008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang OpsWidth = VF; 336689008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang 336789008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2) 336889008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang break; 3369d6f0c34273dd3536102f2d643403252468dfc4a3Nadav Rotem 3370dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling // Check that a previous iteration of this loop did not delete the Value. 3371dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling if (hasValueBeenRAUWed(VL, TrackValues, i, OpsWidth)) 3372dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling continue; 3373dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling 33748e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations " 33758e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling << "\n"); 337689008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang ArrayRef<Value *> Ops = VL.slice(i, OpsWidth); 33778e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling 3378dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines ArrayRef<Value *> BuildVectorSlice; 3379dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!BuildVector.empty()) 3380dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines BuildVectorSlice = BuildVector.slice(i, OpsWidth); 3381dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 3382dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines R.buildTree(Ops, BuildVectorSlice); 338337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // TODO: check if we can allow reordering also for other cases than 338437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // tryToVectorizePair() 338537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (allowReorder && R.shouldReorder()) { 338637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(Ops.size() == 2); 338737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(BuildVectorSlice.empty()); 338837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Value *ReorderedOps[] = { Ops[1], Ops[0] }; 338937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines R.buildTree(ReorderedOps, None); 339037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 339189008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang int Cost = R.getTreeCost(); 33928e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling 339389008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang if (Cost < -SLPCostThreshold) { 339436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n"); 3395dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Value *VectorizedRoot = R.vectorizeTree(); 3396dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 3397dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Reconstruct the build vector by extracting the vectorized root. This 3398dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // way we handle the case where some elements of the vector are undefined. 3399dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2)) 3400dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!BuildVectorSlice.empty()) { 3401dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // The insert point is the last build vector instruction. The vectorized 3402dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // root will precede it. This guarantees that we get an instruction. The 3403dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // vectorized tree could have been constant folded. 3404dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back()); 3405dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned VecIdx = 0; 3406dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines for (auto &V : BuildVectorSlice) { 3407dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines IRBuilder<true, NoFolder> Builder( 3408dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines ++BasicBlock::iterator(InsertAfter)); 3409dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines InsertElementInst *IE = cast<InsertElementInst>(V); 3410dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Instruction *Extract = cast<Instruction>(Builder.CreateExtractElement( 3411dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines VectorizedRoot, Builder.getInt32(VecIdx++))); 3412dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines IE->setOperand(1, Extract); 3413dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines IE->removeFromParent(); 3414dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines IE->insertAfter(Extract); 3415dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines InsertAfter = IE; 3416dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 3417dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 341889008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang // Move to the next bundle. 341989008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang i += VF - 1; 342089008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang Changed = true; 342189008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang } 342289008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang } 34238e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling 34248e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling return Changed; 3425e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem} 34268383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 3427369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotembool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) { 34280b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem if (!V) 34290b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem return false; 343053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 3431e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem // Try to vectorize V. 3432e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R)) 3433f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem return true; 3434f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem 3435e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0)); 3436e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1)); 3437e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem // Try to skip B. 3438e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem if (B && B->hasOneUse()) { 3439e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0)); 3440e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem BinaryOperator *B1 = dyn_cast<BinaryOperator>(B->getOperand(1)); 3441e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem if (tryToVectorizePair(A, B0, R)) { 3442ab105ae95fc473c19d9f0b019fc7c7a16d17b1a5Nadav Rotem return true; 3443f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem } 3444e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem if (tryToVectorizePair(A, B1, R)) { 3445e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem return true; 3446f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem } 3447f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem } 3448f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem 34497fac0ef71cfaeafd91b9520b553d00d91f83a442Nadav Rotem // Try to skip A. 3450e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem if (A && A->hasOneUse()) { 3451e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem BinaryOperator *A0 = dyn_cast<BinaryOperator>(A->getOperand(0)); 3452e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem BinaryOperator *A1 = dyn_cast<BinaryOperator>(A->getOperand(1)); 3453e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem if (tryToVectorizePair(A0, B, R)) { 3454e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem return true; 3455e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem } 3456e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem if (tryToVectorizePair(A1, B, R)) { 3457e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem return true; 3458f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem } 3459f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem } 3460e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem return 0; 3461e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem} 3462f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem 3463a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// \brief Generate a shuffle mask to be used in a reduction tree. 3464a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// 3465a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// \param VecLen The length of the vector to be reduced. 3466a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// \param NumEltsToRdx The number of elements that should be reduced in the 3467a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// vector. 3468a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// \param IsPairwise Whether the reduction is a pairwise or splitting 3469a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// reduction. A pairwise reduction will generate a mask of 3470a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// <0,2,...> or <1,3,..> while a splitting reduction will generate 3471a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// <2,3, undef,undef> for a vector of 4 and NumElts = 2. 3472a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// \param IsLeft True will generate a mask of even elements, odd otherwise. 3473a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighoferstatic Value *createRdxShuffleMask(unsigned VecLen, unsigned NumEltsToRdx, 3474a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer bool IsPairwise, bool IsLeft, 3475a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer IRBuilder<> &Builder) { 3476a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer assert((IsPairwise || !IsLeft) && "Don't support a <0,1,undef,...> mask"); 3477a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3478a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer SmallVector<Constant *, 32> ShuffleMask( 3479a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer VecLen, UndefValue::get(Builder.getInt32Ty())); 3480a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3481a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (IsPairwise) 3482a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Build a mask of 0, 2, ... (left) or 1, 3, ... (right). 3483a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer for (unsigned i = 0; i != NumEltsToRdx; ++i) 3484a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ShuffleMask[i] = Builder.getInt32(2 * i + !IsLeft); 3485a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer else 3486a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Move the upper half of the vector to the lower half. 3487a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer for (unsigned i = 0; i != NumEltsToRdx; ++i) 3488a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ShuffleMask[i] = Builder.getInt32(NumEltsToRdx + i); 3489a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3490a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return ConstantVector::get(ShuffleMask); 3491a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer} 3492a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3493a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3494a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// Model horizontal reductions. 3495a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// 3496a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// A horizontal reduction is a tree of reduction operations (currently add and 3497a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// fadd) that has operations that can be put into a vector as its leaf. 3498a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// For example, this tree: 3499a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// 3500a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// mul mul mul mul 3501a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// \ / \ / 3502a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// + + 3503a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// \ / 3504a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// + 3505a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// This tree has "mul" as its reduced values and "+" as its reduction 3506a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// operations. A reduction might be feeding into a store or a binary operation 3507a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// feeding a phi. 3508a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// ... 3509a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// \ / 3510a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// + 351138bf2d62b6be4b496e8f6d176578699b9c6e08bbArnold Schwaighofer/// | 3512a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// phi += 3513a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// 3514a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// Or: 3515a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// ... 3516a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// \ / 3517a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// + 351838bf2d62b6be4b496e8f6d176578699b9c6e08bbArnold Schwaighofer/// | 3519a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// *p = 3520a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// 3521a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighoferclass HorizontalReduction { 3522dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines SmallVector<Value *, 16> ReductionOps; 3523a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer SmallVector<Value *, 32> ReducedVals; 3524a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3525a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer BinaryOperator *ReductionRoot; 3526a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer PHINode *ReductionPHI; 3527a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3528a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer /// The opcode of the reduction. 3529a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer unsigned ReductionOpcode; 3530a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer /// The opcode of the values we perform a reduction on. 3531a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer unsigned ReducedValueOpcode; 3532a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer /// The width of one full horizontal reduction operation. 3533a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer unsigned ReduxWidth; 3534a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer /// Should we model this reduction as a pairwise reduction tree or a tree that 3535a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer /// splits the vector in halves and adds those halves. 3536a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer bool IsPairwiseReduction; 3537a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3538a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighoferpublic: 3539a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer HorizontalReduction() 3540dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines : ReductionRoot(nullptr), ReductionPHI(nullptr), ReductionOpcode(0), 3541a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ReducedValueOpcode(0), ReduxWidth(0), IsPairwiseReduction(false) {} 3542a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3543a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer /// \brief Try to find a reduction tree. 35444c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B) { 3545a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer assert((!Phi || 3546a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer std::find(Phi->op_begin(), Phi->op_end(), B) != Phi->op_end()) && 3547a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer "Thi phi needs to use the binary operator"); 3548a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3549a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // We could have a initial reductions that is not an add. 3550a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // r *= v1 + v2 + v3 + v4 3551a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // In such a case start looking for a tree rooted in the first '+'. 3552a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (Phi) { 3553a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (B->getOperand(0) == Phi) { 3554dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Phi = nullptr; 3555a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer B = dyn_cast<BinaryOperator>(B->getOperand(1)); 3556a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } else if (B->getOperand(1) == Phi) { 3557dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Phi = nullptr; 3558a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer B = dyn_cast<BinaryOperator>(B->getOperand(0)); 3559a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 3560a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 3561a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3562a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (!B) 3563a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return false; 3564a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3565a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Type *Ty = B->getType(); 3566ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (!isValidElementType(Ty)) 3567a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return false; 3568a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 35694c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar const DataLayout &DL = B->getModule()->getDataLayout(); 3570a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ReductionOpcode = B->getOpcode(); 3571a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ReducedValueOpcode = 0; 35724c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar ReduxWidth = MinVecRegSize / DL.getTypeSizeInBits(Ty); 3573a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ReductionRoot = B; 3574a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ReductionPHI = Phi; 3575a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3576a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (ReduxWidth < 4) 3577a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return false; 3578a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3579a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // We currently only support adds. 3580a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (ReductionOpcode != Instruction::Add && 3581a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ReductionOpcode != Instruction::FAdd) 3582a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return false; 3583a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3584a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Post order traverse the reduction tree starting at B. We only handle true 3585a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // trees containing only binary operators. 3586a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer SmallVector<std::pair<BinaryOperator *, unsigned>, 32> Stack; 3587a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Stack.push_back(std::make_pair(B, 0)); 3588a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer while (!Stack.empty()) { 3589a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer BinaryOperator *TreeN = Stack.back().first; 3590a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer unsigned EdgeToVist = Stack.back().second++; 3591a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer bool IsReducedValue = TreeN->getOpcode() != ReductionOpcode; 3592a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3593a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Only handle trees in the current basic block. 3594a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (TreeN->getParent() != B->getParent()) 3595a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return false; 3596a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3597a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Each tree node needs to have one user except for the ultimate 3598a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // reduction. 3599a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (!TreeN->hasOneUse() && TreeN != B) 3600a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return false; 3601a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3602a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Postorder vist. 3603a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (EdgeToVist == 2 || IsReducedValue) { 3604a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (IsReducedValue) { 3605a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Make sure that the opcodes of the operations that we are going to 3606a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // reduce match. 3607a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (!ReducedValueOpcode) 3608a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ReducedValueOpcode = TreeN->getOpcode(); 3609a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer else if (ReducedValueOpcode != TreeN->getOpcode()) 3610a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return false; 3611a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ReducedVals.push_back(TreeN); 3612a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } else { 3613a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // We need to be able to reassociate the adds. 3614a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (!TreeN->isAssociative()) 3615a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return false; 3616dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines ReductionOps.push_back(TreeN); 3617a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 3618a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Retract. 3619a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Stack.pop_back(); 3620a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer continue; 3621a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 3622a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3623a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Visit left or right. 3624a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Value *NextV = TreeN->getOperand(EdgeToVist); 3625a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer BinaryOperator *Next = dyn_cast<BinaryOperator>(NextV); 3626a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (Next) 3627a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Stack.push_back(std::make_pair(Next, 0)); 3628a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer else if (NextV != Phi) 3629a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return false; 3630a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 3631a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return true; 3632a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 3633a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3634a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer /// \brief Attempt to vectorize the tree found by 3635a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer /// matchAssociativeReduction. 3636a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer bool tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) { 3637a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (ReducedVals.empty()) 3638a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return false; 3639a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3640a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer unsigned NumReducedVals = ReducedVals.size(); 3641a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (NumReducedVals < ReduxWidth) 3642a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return false; 3643a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3644dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Value *VectorizedTree = nullptr; 3645a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer IRBuilder<> Builder(ReductionRoot); 3646a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer FastMathFlags Unsafe; 3647a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Unsafe.setUnsafeAlgebra(); 3648a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Builder.SetFastMathFlags(Unsafe); 3649a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer unsigned i = 0; 3650a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3651a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) { 365237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines V.buildTree(makeArrayRef(&ReducedVals[i], ReduxWidth), ReductionOps); 3653a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3654a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Estimate cost. 3655a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i]); 3656a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (Cost >= -SLPCostThreshold) 3657a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer break; 3658a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3659a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:" << Cost 3660a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer << ". (HorRdx)\n"); 3661a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3662a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Vectorize a tree. 3663a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer DebugLoc Loc = cast<Instruction>(ReducedVals[i])->getDebugLoc(); 3664a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Value *VectorizedRoot = V.vectorizeTree(); 3665a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3666a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Emit a reduction. 3667a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Value *ReducedSubTree = emitReduction(VectorizedRoot, Builder); 3668a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (VectorizedTree) { 3669a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Builder.SetCurrentDebugLocation(Loc); 3670a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer VectorizedTree = createBinOp(Builder, ReductionOpcode, VectorizedTree, 3671a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ReducedSubTree, "bin.rdx"); 3672a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } else 3673a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer VectorizedTree = ReducedSubTree; 3674a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 3675a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3676a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (VectorizedTree) { 3677a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Finish the reduction. 3678a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer for (; i < NumReducedVals; ++i) { 3679a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Builder.SetCurrentDebugLocation( 3680a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer cast<Instruction>(ReducedVals[i])->getDebugLoc()); 3681a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer VectorizedTree = createBinOp(Builder, ReductionOpcode, VectorizedTree, 3682a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ReducedVals[i]); 3683a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 3684a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Update users. 3685a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (ReductionPHI) { 3686dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines assert(ReductionRoot && "Need a reduction operation"); 3687a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ReductionRoot->setOperand(0, VectorizedTree); 3688a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ReductionRoot->setOperand(1, ReductionPHI); 3689a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } else 3690a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ReductionRoot->replaceAllUsesWith(VectorizedTree); 3691a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 3692dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return VectorizedTree != nullptr; 3693a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 3694a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3695a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighoferprivate: 3696a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3697a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer /// \brief Calcuate the cost of a reduction. 3698a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer int getReductionCost(TargetTransformInfo *TTI, Value *FirstReducedVal) { 3699a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Type *ScalarTy = FirstReducedVal->getType(); 3700a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Type *VecTy = VectorType::get(ScalarTy, ReduxWidth); 3701a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3702a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer int PairwiseRdxCost = TTI->getReductionCost(ReductionOpcode, VecTy, true); 3703a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer int SplittingRdxCost = TTI->getReductionCost(ReductionOpcode, VecTy, false); 3704a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3705a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer IsPairwiseReduction = PairwiseRdxCost < SplittingRdxCost; 3706a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer int VecReduxCost = IsPairwiseReduction ? PairwiseRdxCost : SplittingRdxCost; 3707a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3708a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer int ScalarReduxCost = 3709a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ReduxWidth * TTI->getArithmeticInstrCost(ReductionOpcode, VecTy); 3710a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3711a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer DEBUG(dbgs() << "SLP: Adding cost " << VecReduxCost - ScalarReduxCost 3712a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer << " for reduction that starts with " << *FirstReducedVal 3713a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer << " (It is a " 3714a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer << (IsPairwiseReduction ? "pairwise" : "splitting") 3715a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer << " reduction)\n"); 3716a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3717a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return VecReduxCost - ScalarReduxCost; 3718a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 3719a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3720a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer static Value *createBinOp(IRBuilder<> &Builder, unsigned Opcode, Value *L, 3721a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Value *R, const Twine &Name = "") { 3722a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (Opcode == Instruction::FAdd) 3723a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return Builder.CreateFAdd(L, R, Name); 3724a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return Builder.CreateBinOp((Instruction::BinaryOps)Opcode, L, R, Name); 3725a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 3726a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3727a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer /// \brief Emit a horizontal reduction of the vectorized value. 3728a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Value *emitReduction(Value *VectorizedValue, IRBuilder<> &Builder) { 3729a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer assert(VectorizedValue && "Need to have a vectorized tree node"); 3730a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer assert(isPowerOf2_32(ReduxWidth) && 3731a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer "We only handle power-of-two reductions for now"); 3732a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3733ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Value *TmpVec = VectorizedValue; 3734a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) { 3735a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (IsPairwiseReduction) { 3736a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Value *LeftMask = 3737a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer createRdxShuffleMask(ReduxWidth, i, true, true, Builder); 3738a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Value *RightMask = 3739a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer createRdxShuffleMask(ReduxWidth, i, true, false, Builder); 3740a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3741a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Value *LeftShuf = Builder.CreateShuffleVector( 3742a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer TmpVec, UndefValue::get(TmpVec->getType()), LeftMask, "rdx.shuf.l"); 3743a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Value *RightShuf = Builder.CreateShuffleVector( 3744a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer TmpVec, UndefValue::get(TmpVec->getType()), (RightMask), 3745a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer "rdx.shuf.r"); 3746a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer TmpVec = createBinOp(Builder, ReductionOpcode, LeftShuf, RightShuf, 3747a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer "bin.rdx"); 3748a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } else { 3749a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Value *UpperHalf = 3750a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer createRdxShuffleMask(ReduxWidth, i, false, false, Builder); 3751a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Value *Shuf = Builder.CreateShuffleVector( 3752a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer TmpVec, UndefValue::get(TmpVec->getType()), UpperHalf, "rdx.shuf"); 3753a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer TmpVec = createBinOp(Builder, ReductionOpcode, TmpVec, Shuf, "bin.rdx"); 3754a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 3755a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 3756a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3757a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // The result is in the first element of the vector. 3758a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); 3759a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 3760a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer}; 3761a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 37621b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault/// \brief Recognize construction of vectors like 37631b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault/// %ra = insertelement <4 x float> undef, float %s0, i32 0 37641b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault/// %rb = insertelement <4 x float> %ra, float %s1, i32 1 37651b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault/// %rc = insertelement <4 x float> %rb, float %s2, i32 2 37661b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault/// %rd = insertelement <4 x float> %rc, float %s3, i32 3 37671b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault/// 37681b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault/// Returns true if it matches 37691b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault/// 3770dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesstatic bool findBuildVector(InsertElementInst *FirstInsertElem, 3771dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines SmallVectorImpl<Value *> &BuildVector, 3772dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines SmallVectorImpl<Value *> &BuildVectorOpds) { 3773dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!isa<UndefValue>(FirstInsertElem->getOperand(0))) 37741b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault return false; 37751b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault 3776dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines InsertElementInst *IE = FirstInsertElem; 37771b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault while (true) { 3778dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines BuildVector.push_back(IE); 3779dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines BuildVectorOpds.push_back(IE->getOperand(1)); 37801b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault 37811b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault if (IE->use_empty()) 37821b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault return false; 37831b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault 378436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines InsertElementInst *NextUse = dyn_cast<InsertElementInst>(IE->user_back()); 37851b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault if (!NextUse) 37861b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault return true; 37871b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault 37881b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault // If this isn't the final use, make sure the next insertelement is the only 37891b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault // use. It's OK if the final constructed vector is used multiple times 37901b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault if (!IE->hasOneUse()) 37911b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault return false; 37921b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault 37931b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault IE = NextUse; 37941b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault } 37951b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault 37961b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault return false; 37971b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault} 37981b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault 379924732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighoferstatic bool PhiTypeSorterFunc(Value *V, Value *V2) { 380024732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer return V->getType() < V2->getType(); 380124732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer} 380224732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer 3803369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotembool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { 3804e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem bool Changed = false; 3805931b861e3dcf966fde46d57683013e74736eb448Nadav Rotem SmallVector<Value *, 4> Incoming; 380624732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer SmallSet<Value *, 16> VisitedInstrs; 380724732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer 380824732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer bool HaveVectorizedPhiNodes = true; 380924732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer while (HaveVectorizedPhiNodes) { 381024732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer HaveVectorizedPhiNodes = false; 381124732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer 381224732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer // Collect the incoming values from the PHIs. 381324732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer Incoming.clear(); 381424732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer for (BasicBlock::iterator instr = BB->begin(), ie = BB->end(); instr != ie; 381524732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer ++instr) { 381624732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer PHINode *P = dyn_cast<PHINode>(instr); 381724732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer if (!P) 381824732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer break; 381916a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer 382024732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer if (!VisitedInstrs.count(P)) 382124732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer Incoming.push_back(P); 382224732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer } 3823931b861e3dcf966fde46d57683013e74736eb448Nadav Rotem 382424732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer // Sort by type. 382524732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer std::stable_sort(Incoming.begin(), Incoming.end(), PhiTypeSorterFunc); 3826931b861e3dcf966fde46d57683013e74736eb448Nadav Rotem 382724732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer // Try to vectorize elements base on their type. 382824732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer for (SmallVector<Value *, 4>::iterator IncIt = Incoming.begin(), 382924732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer E = Incoming.end(); 383024732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer IncIt != E;) { 383124732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer 383224732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer // Look for the next elements with the same type. 383324732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer SmallVector<Value *, 4>::iterator SameTypeIt = IncIt; 383424732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer while (SameTypeIt != E && 383524732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer (*SameTypeIt)->getType() == (*IncIt)->getType()) { 383624732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer VisitedInstrs.insert(*SameTypeIt); 383724732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer ++SameTypeIt; 383824732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer } 383916a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer 384024732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer // Try to vectorize them. 384124732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer unsigned NumElts = (SameTypeIt - IncIt); 384224732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer DEBUG(errs() << "SLP: Trying to vectorize starting at PHIs (" << NumElts << ")\n"); 384337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R)) { 384424732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer // Success start over because instructions might have been changed. 384524732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer HaveVectorizedPhiNodes = true; 384616a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer Changed = true; 384724732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer break; 384816a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer } 384916a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer 385036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Start over at the next instruction of a different type (or the end). 385124732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer IncIt = SameTypeIt; 3852931b861e3dcf966fde46d57683013e74736eb448Nadav Rotem } 3853931b861e3dcf966fde46d57683013e74736eb448Nadav Rotem } 3854931b861e3dcf966fde46d57683013e74736eb448Nadav Rotem 385516a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer VisitedInstrs.clear(); 385616a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer 385716a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) { 385816a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer // We may go through BB multiple times so skip the one we have checked. 385937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!VisitedInstrs.insert(it).second) 386016a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer continue; 386116a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer 386216a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer if (isa<DbgInfoIntrinsic>(it)) 38630b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem continue; 3864e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem 3865e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem // Try to vectorize reductions that use PHINodes. 386616a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer if (PHINode *P = dyn_cast<PHINode>(it)) { 3867e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem // Check that the PHI is a reduction PHI. 38680b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem if (P->getNumIncomingValues() != 2) 38690b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem return Changed; 38700b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem Value *Rdx = 38710b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem (P->getIncomingBlock(0) == BB 38720b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem ? (P->getIncomingValue(0)) 3873dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines : (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) 3874dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines : nullptr)); 3875e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem // Check if this is a Binary Operator. 3876e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx); 3877e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem if (!BI) 38788383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem continue; 3879196ee11f85ce0148d2c2e33fbe1f1171ac5a8828Nadav Rotem 3880a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Try to match and vectorize a horizontal reduction. 3881a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer HorizontalReduction HorRdx; 38824c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar if (ShouldVectorizeHor && HorRdx.matchAssociativeReduction(P, BI) && 3883a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer HorRdx.tryToReduce(R, TTI)) { 3884a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Changed = true; 3885a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer it = BB->begin(); 3886a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer e = BB->end(); 3887a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer continue; 3888a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 3889a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3890a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Value *Inst = BI->getOperand(0); 38910b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem if (Inst == P) 38920b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem Inst = BI->getOperand(1); 389353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 389416a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer if (tryToVectorize(dyn_cast<BinaryOperator>(Inst), R)) { 389516a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer // We would like to start over since some instructions are deleted 389616a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer // and the iterator may become invalid value. 389716a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer Changed = true; 389816a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer it = BB->begin(); 389916a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer e = BB->end(); 3900a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer continue; 390116a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer } 3902a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3903e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem continue; 3904e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem } 3905196ee11f85ce0148d2c2e33fbe1f1171ac5a8828Nadav Rotem 3906a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Try to vectorize horizontal reductions feeding into a store. 39079660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer if (ShouldStartVectorizeHorAtStore) 39089660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer if (StoreInst *SI = dyn_cast<StoreInst>(it)) 39099660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer if (BinaryOperator *BinOp = 39109660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer dyn_cast<BinaryOperator>(SI->getValueOperand())) { 39119660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer HorizontalReduction HorRdx; 39124c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar if (((HorRdx.matchAssociativeReduction(nullptr, BinOp) && 39139660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer HorRdx.tryToReduce(R, TTI)) || 39149660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer tryToVectorize(BinOp, R))) { 39159660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer Changed = true; 39169660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer it = BB->begin(); 39179660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer e = BB->end(); 39189660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer continue; 39199660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer } 3920a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 3921a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 392237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Try to vectorize horizontal reductions feeding into a return. 392337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (ReturnInst *RI = dyn_cast<ReturnInst>(it)) 392437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (RI->getNumOperands() != 0) 392537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (BinaryOperator *BinOp = 392637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines dyn_cast<BinaryOperator>(RI->getOperand(0))) { 392737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: Found a return to vectorize.\n"); 392837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (tryToVectorizePair(BinOp->getOperand(0), 392937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BinOp->getOperand(1), R)) { 393037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Changed = true; 393137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines it = BB->begin(); 393237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines e = BB->end(); 393337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines continue; 393437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 393537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 393637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 3937e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem // Try to vectorize trees that start at compare instructions. 393816a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer if (CmpInst *CI = dyn_cast<CmpInst>(it)) { 3939e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) { 394016a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer Changed = true; 394116a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer // We would like to start over since some instructions are deleted 394216a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer // and the iterator may become invalid value. 394316a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer it = BB->begin(); 394416a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer e = BB->end(); 3945e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem continue; 3946e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem } 394716a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer 394816a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer for (int i = 0; i < 2; ++i) { 394937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i))) { 395037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R)) { 395137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Changed = true; 395237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // We would like to start over since some instructions are deleted 395337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // and the iterator may become invalid value. 395437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines it = BB->begin(); 395537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines e = BB->end(); 3956ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines break; 395737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 395837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 395916a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer } 3960e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem continue; 39618383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem } 39621b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault 39631b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault // Try to vectorize trees that start at insertelement instructions. 3964dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (InsertElementInst *FirstInsertElem = dyn_cast<InsertElementInst>(it)) { 3965dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines SmallVector<Value *, 16> BuildVector; 3966dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines SmallVector<Value *, 16> BuildVectorOpds; 3967dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!findBuildVector(FirstInsertElem, BuildVector, BuildVectorOpds)) 39681b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault continue; 39691b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault 3970dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Vectorize starting with the build vector operands ignoring the 3971dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // BuildVector instructions for the purpose of scheduling and user 3972dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // extraction. 3973dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (tryToVectorizeList(BuildVectorOpds, R, BuildVector)) { 39741b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault Changed = true; 39751b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault it = BB->begin(); 39761b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault e = BB->end(); 39771b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault } 39781b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault 39791b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault continue; 39801b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault } 39818383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem } 39828383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 3983e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem return Changed; 3984e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem} 39858383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 3986369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotembool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) { 3987e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem bool Changed = false; 3988e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem // Attempt to sort and vectorize each of the store-groups. 3989e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem for (StoreListMap::iterator it = StoreRefs.begin(), e = StoreRefs.end(); 3990e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem it != e; ++it) { 3991e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem if (it->second.size() < 2) 3992e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem continue; 3993f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem 39940b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem DEBUG(dbgs() << "SLP: Analyzing a store chain of length " 399521508bf853354343266dbe6d830ff30bed006a68Nadav Rotem << it->second.size() << ".\n"); 39968383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 399721508bf853354343266dbe6d830ff30bed006a68Nadav Rotem // Process the stores in chunks of 16. 399821508bf853354343266dbe6d830ff30bed006a68Nadav Rotem for (unsigned CI = 0, CE = it->second.size(); CI < CE; CI+=16) { 399921508bf853354343266dbe6d830ff30bed006a68Nadav Rotem unsigned Len = std::min<unsigned>(CE - CI, 16); 400037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Changed |= vectorizeStores(makeArrayRef(&it->second[CI], Len), 400137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines -SLPCostThreshold, R); 400221508bf853354343266dbe6d830ff30bed006a68Nadav Rotem } 40038383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem } 4004e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem return Changed; 4005e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem} 40068383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 40078383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem} // end anonymous namespace 40088383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 40098383b539ff4c039108ee0c202a27b787621d96cfNadav Rotemchar SLPVectorizer::ID = 0; 40108383b539ff4c039108ee0c202a27b787621d96cfNadav Rotemstatic const char lv_name[] = "SLP Vectorizer"; 40118383b539ff4c039108ee0c202a27b787621d96cfNadav RotemINITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false) 40128383b539ff4c039108ee0c202a27b787621d96cfNadav RotemINITIALIZE_AG_DEPENDENCY(AliasAnalysis) 4013ebe69fe11e48d322045d5949c83283927a0d790bStephen HinesINITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 4014ebe69fe11e48d322045d5949c83283927a0d790bStephen HinesINITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 40158383b539ff4c039108ee0c202a27b787621d96cfNadav RotemINITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 40168383b539ff4c039108ee0c202a27b787621d96cfNadav RotemINITIALIZE_PASS_DEPENDENCY(LoopSimplify) 40178383b539ff4c039108ee0c202a27b787621d96cfNadav RotemINITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false) 40188383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 40198383b539ff4c039108ee0c202a27b787621d96cfNadav Rotemnamespace llvm { 40200b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav RotemPass *createSLPVectorizerPass() { return new SLPVectorizer(); } 40218383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem} 4022