18383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem//===- SLPVectorizer.cpp - A bottom up SLP Vectorizer ---------------------===// 28383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// 38383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// The LLVM Compiler Infrastructure 48383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// 58383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// This file is distributed under the University of Illinois Open Source 68383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// License. See LICENSE.TXT for details. 78383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// 88383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem//===----------------------------------------------------------------------===// 98383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// This pass implements the Bottom Up SLP vectorizer. It detects consecutive 108383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// stores that can be put together into vector-stores. Next, it attempts to 118383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// construct vectorizable tree using the use-def chains. If a profitable tree 128383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// was found, the SLP vectorizer performs vectorization on the tree. 138383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// 148383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// The pass is inspired by the work described in the paper: 158383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks. 168383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem// 178383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem//===----------------------------------------------------------------------===// 18de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar#include "llvm/Transforms/Vectorize/SLPVectorizer.h" 194c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar#include "llvm/ADT/Optional.h" 206959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem#include "llvm/ADT/PostOrderIterator.h" 2153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem#include "llvm/ADT/SetVector.h" 2237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/ADT/Statistic.h" 2337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/Analysis/CodeMetrics.h" 24de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar#include "llvm/Analysis/GlobalsModRef.h" 25de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar#include "llvm/Analysis/LoopAccessAnalysis.h" 2653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem#include "llvm/Analysis/ScalarEvolutionExpressions.h" 276623d050c6f4351293bc1849e49bc0e37ec04596Benjamin Kramer#include "llvm/Analysis/ValueTracking.h" 28de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar#include "llvm/Analysis/VectorUtils.h" 298383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/IR/DataLayout.h" 3036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/IR/Dominators.h" 3136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/IR/IRBuilder.h" 328383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/IR/Instructions.h" 33f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem#include "llvm/IR/IntrinsicInst.h" 348383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/IR/Module.h" 35dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/IR/NoFolder.h" 368383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/IR/Type.h" 378383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/IR/Value.h" 3836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/IR/Verifier.h" 398383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/Pass.h" 408383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/Support/CommandLine.h" 418383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/Support/Debug.h" 428383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem#include "llvm/Support/raw_ostream.h" 43de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar#include "llvm/Transforms/Vectorize.h" 4453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem#include <algorithm> 4537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include <memory> 468383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 478383b539ff4c039108ee0c202a27b787621d96cfNadav Rotemusing namespace llvm; 48de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarusing namespace slpvectorizer; 498383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 50dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define SV_NAME "slp-vectorizer" 51dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define DEBUG_TYPE "SLP" 52dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 5337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen HinesSTATISTIC(NumVectorInstructions, "Number of vector instructions generated"); 5437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 558383b539ff4c039108ee0c202a27b787621d96cfNadav Rotemstatic cl::opt<int> 560b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden, 5708e20fbea18922e9f5f1cfb0ea7931c90006100eNadav Rotem cl::desc("Only vectorize if you gain more than this " 5808e20fbea18922e9f5f1cfb0ea7931c90006100eNadav Rotem "number ")); 59a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 60a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighoferstatic cl::opt<bool> 61f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga NainarShouldVectorizeHor("slp-vectorize-hor", cl::init(true), cl::Hidden, 62a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer cl::desc("Attempt to vectorize horizontal reductions")); 63a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 649660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighoferstatic cl::opt<bool> ShouldStartVectorizeHorAtStore( 659660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer "slp-vectorize-hor-store", cl::init(false), cl::Hidden, 669660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer cl::desc( 679660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer "Attempt to vectorize horizontal reductions feeding into a store")); 689660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer 69f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarstatic cl::opt<int> 70f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga NainarMaxVectorRegSizeOption("slp-max-reg-size", cl::init(128), cl::Hidden, 71f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar cl::desc("Attempt to vectorize for this register size in bits")); 72f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 73f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// Limits the size of scheduling regions in a block. 74f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// It avoid long compile times for _very_ large blocks where vector 75f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// instructions are spread over a wide range. 76f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// This limit is way higher than needed by real-world functions. 77f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarstatic cl::opt<int> 78f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga NainarScheduleRegionSizeBudget("slp-schedule-budget", cl::init(100000), cl::Hidden, 79f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar cl::desc("Limit the size of the SLP scheduling region per block")); 80f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 81de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarstatic cl::opt<int> MinVectorRegSizeOption( 82de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar "slp-min-reg-size", cl::init(128), cl::Hidden, 83de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar cl::desc("Attempt to vectorize for this register size in bits")); 848383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 85f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar// FIXME: Set this via cl::opt to allow overriding. 8625961b469a9debe69b915bcb4fa49d35d2ee9544Nadav Rotemstatic const unsigned RecursionMaxDepth = 12; 8753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 88ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// Limit the number of alias checks. The limit is chosen so that 89ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// it has no negative effect on the llvm benchmarks. 90ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesstatic const unsigned AliasedCheckLimit = 10; 91ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 92ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// Another limit for the alias checks: The maximum distance between load/store 93ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// instructions where alias checks are done. 94ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// This limit is useful for very large basic blocks. 95ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesstatic const unsigned MaxMemDepDistance = 160; 96ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 97f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// If the ScheduleRegionSizeBudget is exhausted, we allow small scheduling 98f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// regions to be handled. 99f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarstatic const int MinScheduleRegionSize = 16; 100f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 101ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// \brief Predicate for the element types that the SLP vectorizer supports. 102ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// 103ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// The most important thing to filter here are types which are invalid in LLVM 104ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// vectors. We also filter target specific types which have absolutely no 105ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// meaningful vectorization path such as x86_fp80 and ppc_f128. This just 106ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// avoids spending time checking the cost model and realizing that they will 107ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// be inevitably scalarized. 108ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesstatic bool isValidElementType(Type *Ty) { 109ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return VectorType::isValidElementType(Ty) && !Ty->isX86_FP80Ty() && 110ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines !Ty->isPPC_FP128Ty(); 111ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines} 112ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 113369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// \returns the parent basic block if all of the instructions in \p VL 114369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// are in the same block or null otherwise. 115369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemstatic BasicBlock *getSameBlock(ArrayRef<Value *> VL) { 116369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Instruction *I0 = dyn_cast<Instruction>(VL[0]); 117369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (!I0) 118dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return nullptr; 119369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem BasicBlock *BB = I0->getParent(); 120369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (int i = 1, e = VL.size(); i < e; i++) { 121369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Instruction *I = dyn_cast<Instruction>(VL[i]); 122369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (!I) 123dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return nullptr; 124369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 125369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (BB != I->getParent()) 126dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return nullptr; 127369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 128369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return BB; 129369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 130369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 131369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// \returns True if all of the values in \p VL are constants. 132369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemstatic bool allConstant(ArrayRef<Value *> VL) { 133de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (Value *i : VL) 134de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!isa<Constant>(i)) 135369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return false; 136369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return true; 137369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 138369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 139369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// \returns True if all of the values in \p VL are identical. 140369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemstatic bool isSplat(ArrayRef<Value *> VL) { 141369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 1, e = VL.size(); i < e; ++i) 142369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (VL[i] != VL[0]) 143369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return false; 144369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return true; 145369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 146369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 147c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines///\returns Opcode that can be clubbed with \p Op to create an alternate 148c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines/// sequence which can later be merged as a ShuffleVector instruction. 149c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hinesstatic unsigned getAltOpcode(unsigned Op) { 150c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines switch (Op) { 151c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case Instruction::FAdd: 152c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return Instruction::FSub; 153c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case Instruction::FSub: 154c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return Instruction::FAdd; 155c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case Instruction::Add: 156c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return Instruction::Sub; 157c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case Instruction::Sub: 158c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return Instruction::Add; 159c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines default: 160c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return 0; 161c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 162c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines} 163c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 164c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines///\returns bool representing if Opcode \p Op can be part 165c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines/// of an alternate sequence which can later be merged as 166c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines/// a ShuffleVector instruction. 167c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hinesstatic bool canCombineAsAltInst(unsigned Op) { 168f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return Op == Instruction::FAdd || Op == Instruction::FSub || 169f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Op == Instruction::Sub || Op == Instruction::Add; 170c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines} 171c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 172f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// \returns ShuffleVector instruction if instructions in \p VL have 173c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines/// alternate fadd,fsub / fsub,fadd/add,sub/sub,add sequence. 174c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines/// (i.e. e.g. opcodes of fadd,fsub,fadd,fsub...) 175c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hinesstatic unsigned isAltInst(ArrayRef<Value *> VL) { 176c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Instruction *I0 = dyn_cast<Instruction>(VL[0]); 177c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines unsigned Opcode = I0->getOpcode(); 178c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines unsigned AltOpcode = getAltOpcode(Opcode); 179c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines for (int i = 1, e = VL.size(); i < e; i++) { 180c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Instruction *I = dyn_cast<Instruction>(VL[i]); 181c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (!I || I->getOpcode() != ((i & 1) ? AltOpcode : Opcode)) 182c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return 0; 183c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 184c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return Instruction::ShuffleVector; 185c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines} 186c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 187369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// \returns The opcode if all of the Instructions in \p VL have the same 188369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// opcode, or zero. 189369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemstatic unsigned getSameOpcode(ArrayRef<Value *> VL) { 190369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Instruction *I0 = dyn_cast<Instruction>(VL[0]); 191369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (!I0) 192369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return 0; 193369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem unsigned Opcode = I0->getOpcode(); 194369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (int i = 1, e = VL.size(); i < e; i++) { 195369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Instruction *I = dyn_cast<Instruction>(VL[i]); 196c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (!I || Opcode != I->getOpcode()) { 197c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (canCombineAsAltInst(Opcode) && i == 1) 198c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return isAltInst(VL); 199369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return 0; 200c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 201369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 202369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return Opcode; 203369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 204369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 20537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// Get the intersection (logical and) of all of the potential IR flags 20637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// of each scalar operation (VL) that will be converted into a vector (I). 20737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// Flag set: NSW, NUW, exact, and all of fast-math. 20837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesstatic void propagateIRFlags(Value *I, ArrayRef<Value *> VL) { 20937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (auto *VecOp = dyn_cast<BinaryOperator>(I)) { 21037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (auto *Intersection = dyn_cast<BinaryOperator>(VL[0])) { 21137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Intersection is initialized to the 0th scalar, 21237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // so start counting from index '1'. 21337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (int i = 1, e = VL.size(); i < e; ++i) { 21437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (auto *Scalar = dyn_cast<BinaryOperator>(VL[i])) 21537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Intersection->andIRFlags(Scalar); 21637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 21737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines VecOp->copyIRFlags(Intersection); 21837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 21937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 22037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 221fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling 222369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// \returns The type that all of the values in \p VL have or null if there 223369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// are different types. 224369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemstatic Type* getSameType(ArrayRef<Value *> VL) { 225369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Type *Ty = VL[0]->getType(); 226369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (int i = 1, e = VL.size(); i < e; i++) 22730bbf070a2683fc95c105ad78f921ca59c56bb35Nadav Rotem if (VL[i]->getType() != Ty) 228dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return nullptr; 229369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 230369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return Ty; 231369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 232369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 233de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar/// \returns True if Extract{Value,Element} instruction extracts element Idx. 234de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarstatic bool matchExtractIndex(Instruction *E, unsigned Idx, unsigned Opcode) { 235de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar assert(Opcode == Instruction::ExtractElement || 236de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Opcode == Instruction::ExtractValue); 237de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (Opcode == Instruction::ExtractElement) { 238369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ConstantInt *CI = dyn_cast<ConstantInt>(E->getOperand(1)); 239de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return CI && CI->getZExtValue() == Idx; 240de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } else { 241de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar ExtractValueInst *EI = cast<ExtractValueInst>(E); 242de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return EI->getNumIndices() == 1 && *EI->idx_begin() == Idx; 243369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 244369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 245369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 24637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// \returns True if in-tree use also needs extract. This refers to 24737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// possible scalar operand in vectorized instruction. 24837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesstatic bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst, 24937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines TargetLibraryInfo *TLI) { 25037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 25137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned Opcode = UserInst->getOpcode(); 25237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines switch (Opcode) { 25337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case Instruction::Load: { 25437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines LoadInst *LI = cast<LoadInst>(UserInst); 25537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return (LI->getPointerOperand() == Scalar); 25637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 25737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case Instruction::Store: { 25837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines StoreInst *SI = cast<StoreInst>(UserInst); 25937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return (SI->getPointerOperand() == Scalar); 26037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 26137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case Instruction::Call: { 26237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines CallInst *CI = cast<CallInst>(UserInst); 263de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); 26437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (hasVectorInstrinsicScalarOpd(ID, 1)) { 26537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return (CI->getArgOperand(1) == Scalar); 26637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 26737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 26837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines default: 26937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 27037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 27137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 27237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 273ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// \returns the AA location that is being access by the instruction. 274f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarstatic MemoryLocation getLocation(Instruction *I, AliasAnalysis *AA) { 275ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (StoreInst *SI = dyn_cast<StoreInst>(I)) 2766948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar return MemoryLocation::get(SI); 277ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (LoadInst *LI = dyn_cast<LoadInst>(I)) 2786948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar return MemoryLocation::get(LI); 279f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return MemoryLocation(); 280ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines} 281ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 282ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines/// \returns True if the instruction is not a volatile or atomic load/store. 283ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesstatic bool isSimple(Instruction *I) { 284ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (LoadInst *LI = dyn_cast<LoadInst>(I)) 285ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return LI->isSimple(); 286ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (StoreInst *SI = dyn_cast<StoreInst>(I)) 287ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return SI->isSimple(); 288ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) 289ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return !MI->isVolatile(); 290ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return true; 291ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines} 292ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 293de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarnamespace llvm { 294de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarnamespace slpvectorizer { 295369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem/// Bottom Up SLP Vectorizer. 296369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemclass BoUpSLP { 297369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotempublic: 29853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem typedef SmallVector<Value *, 8> ValueList; 29953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem typedef SmallVector<Instruction *, 16> InstrList; 30053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem typedef SmallPtrSet<Value *, 16> ValueSet; 30153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem typedef SmallVector<StoreInst *, 8> StoreList; 30253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 3034c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti, 3044c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li, 305de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DominatorTree *Dt, AssumptionCache *AC, DemandedBits *DB, 306de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const DataLayout *DL) 307ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines : NumLoadsWantToKeepOrder(0), NumLoadsWantToChangeOrder(0), F(Func), 308de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), AC(AC), DB(DB), 309de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DL(DL), Builder(Se->getContext()) { 310ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines CodeMetrics::collectEphemeralValues(F, AC, EphValues); 311de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Use the vector register size specified by the target unless overridden 312de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // by a command-line option. 313de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // TODO: It would be better to limit the vectorization factor based on 314de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // data type rather than just register size. For example, x86 AVX has 315de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // 256-bit registers, but it does not support integer operations 316de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // at that width (that requires AVX2). 317de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (MaxVectorRegSizeOption.getNumOccurrences()) 318de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MaxVecRegSize = MaxVectorRegSizeOption; 319de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar else 320de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MaxVecRegSize = TTI->getRegisterBitWidth(true); 321de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 322de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MinVecRegSize = MinVectorRegSizeOption; 32337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 32453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 32553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem /// \brief Vectorize the tree that starts with the elements in \p VL. 326a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer /// Returns the vectorized root. 327a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Value *vectorizeTree(); 32853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 32937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// \returns the cost incurred by unwanted spills and fills, caused by 33037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// holding live values over call sites. 33137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int getSpillCost(); 33237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 33353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem /// \returns the vectorization cost of the subtree that starts at \p VL. 33453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem /// A negative number means that this is profitable. 335369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem int getTreeCost(); 336369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 337dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines /// Construct a vectorizable tree that starts at \p Roots, ignoring users for 338dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines /// the purpose of scheduling and extraction in the \p UserIgnoreLst. 339dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines void buildTree(ArrayRef<Value *> Roots, 340dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines ArrayRef<Value *> UserIgnoreLst = None); 341369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 342369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// Clear the internal data structures that are created by 'buildTree'. 343369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem void deleteTree() { 344369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem VectorizableTree.clear(); 345369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ScalarToTreeEntry.clear(); 346369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem MustGather.clear(); 347a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem ExternalUses.clear(); 34837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines NumLoadsWantToKeepOrder = 0; 34937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines NumLoadsWantToChangeOrder = 0; 35037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (auto &Iter : BlocksSchedules) { 35137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BlockScheduling *BS = Iter.second.get(); 35237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS->clear(); 35337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 354de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MinBWs.clear(); 355369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 35653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 357369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// \brief Perform LICM and CSE on the newly generated gather sequences. 358369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem void optimizeGatherSequence(); 359c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 360f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar /// \returns true if it is beneficial to reverse the vector order. 36137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines bool shouldReorder() const { 36237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return NumLoadsWantToChangeOrder > NumLoadsWantToKeepOrder; 36337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 36437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 365de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar /// \return The vector element size in bits to use when vectorizing the 366de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar /// expression tree ending at \p V. If V is a store, the size is the width of 367de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar /// the stored value. Otherwise, the size is the width of the largest loaded 368de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar /// value reaching V. This method is used by the vectorizer to calculate 369de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar /// vectorization factors. 370de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned getVectorElementSize(Value *V); 371de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 372de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar /// Compute the minimum type sizes required to represent the entries in a 373de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar /// vectorizable tree. 374de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar void computeMinimumValueSizes(); 375de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 376de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // \returns maximum vector register size as set by TTI or overridden by cl::opt. 377de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned getMaxVecRegSize() const { 378de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return MaxVecRegSize; 379de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 380de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 381de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // \returns minimum vector register size as set by cl::opt. 382de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned getMinVecRegSize() const { 383de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return MinVecRegSize; 384de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 385de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 386de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar /// \brief Check if ArrayType or StructType is isomorphic to some VectorType. 387de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar /// 388de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar /// \returns number of elements in vector if isomorphism exists, 0 otherwise. 389de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned canMapToVector(Type *T, const DataLayout &DL) const; 390de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 391369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemprivate: 392369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem struct TreeEntry; 39353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 394369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// \returns the cost of the vectorizable entry. 395369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem int getEntryCost(TreeEntry *E); 39653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 397369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// This is the recursive part of buildTree. 398369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth); 39953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 400de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar /// \returns True if the ExtractElement/ExtractValue instructions in VL can 401de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar /// be vectorized to use the original vector (or aggregate "bitcast" to a vector). 402de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar bool canReuseExtract(ArrayRef<Value *> VL, unsigned Opcode) const; 403de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 40462657090de3a5731bf644437701ccd78c247119fNadav Rotem /// Vectorize a single entry in the tree. 405369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *vectorizeTree(TreeEntry *E); 406369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 40762657090de3a5731bf644437701ccd78c247119fNadav Rotem /// Vectorize a single entry in the tree, starting in \p VL. 408369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *vectorizeTree(ArrayRef<Value *> VL); 40953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 41062657090de3a5731bf644437701ccd78c247119fNadav Rotem /// \returns the pointer to the vectorized value if \p VL is already 41162657090de3a5731bf644437701ccd78c247119fNadav Rotem /// vectorized, or NULL. They may happen in cycles. 4126a804acc4ae77c014e4ef97c37f8e720ef360394Matt Arsenault Value *alreadyVectorized(ArrayRef<Value *> VL) const; 41362657090de3a5731bf644437701ccd78c247119fNadav Rotem 41453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem /// \returns the scalarization cost for this type. Scalarization in this 41553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem /// context means the creation of vectors from a group of scalars. 41653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem int getGatherCost(Type *Ty); 41753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 418d6f0c34273dd3536102f2d643403252468dfc4a3Nadav Rotem /// \returns the scalarization cost for this list of values. Assuming that 419d6f0c34273dd3536102f2d643403252468dfc4a3Nadav Rotem /// this subtree gets vectorized, we may need to extract the values from the 420d6f0c34273dd3536102f2d643403252468dfc4a3Nadav Rotem /// roots. This method calculates the cost of extracting the values. 421d6f0c34273dd3536102f2d643403252468dfc4a3Nadav Rotem int getGatherCost(ArrayRef<Value *> VL); 422d6f0c34273dd3536102f2d643403252468dfc4a3Nadav Rotem 4234b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault /// \brief Set the Builder insert point to one after the last instruction in 4244b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault /// the bundle 4254b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault void setInsertPointAfterBundle(ArrayRef<Value *> VL); 4264b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault 42753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem /// \returns a vector from a collection of scalars in \p VL. 42853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem Value *Gather(ArrayRef<Value *> VL, VectorType *Ty); 42953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 430f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar /// \returns whether the VectorizableTree is fully vectorizable and will 431d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang /// be beneficial even the tree height is tiny. 43236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool isFullyVectorizableTinyTree(); 433d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang 434ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// \reorder commutative operands in alt shuffle if they result in 435ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// vectorized code. 436ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines void reorderAltShuffleOperands(ArrayRef<Value *> VL, 437ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines SmallVectorImpl<Value *> &Left, 438ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines SmallVectorImpl<Value *> &Right); 439ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// \reorder commutative operands to get better probability of 440ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// generating vectorized code. 441ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL, 442ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines SmallVectorImpl<Value *> &Left, 443ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines SmallVectorImpl<Value *> &Right); 444369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem struct TreeEntry { 44537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines TreeEntry() : Scalars(), VectorizedValue(nullptr), 446369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem NeedToGather(0) {} 44753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 448369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// \returns true if the scalars in VL are equal to this entry. 4496a804acc4ae77c014e4ef97c37f8e720ef360394Matt Arsenault bool isSame(ArrayRef<Value *> VL) const { 450369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem assert(VL.size() == Scalars.size() && "Invalid size"); 4516623d050c6f4351293bc1849e49bc0e37ec04596Benjamin Kramer return std::equal(VL.begin(), VL.end(), Scalars.begin()); 452369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 453369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 454369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// A vector of scalars. 455369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ValueList Scalars; 456369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 457369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// The Scalars are vectorized into this value. It is initialized to Null. 458369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *VectorizedValue; 459369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 460369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// Do we need to gather this sequence ? 461369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem bool NeedToGather; 462369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem }; 46353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 464369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// Create a new VectorizableTree entry. 465369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem TreeEntry *newTreeEntry(ArrayRef<Value *> VL, bool Vectorized) { 4666948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar VectorizableTree.emplace_back(); 467369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem int idx = VectorizableTree.size() - 1; 468369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem TreeEntry *Last = &VectorizableTree[idx]; 469369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end()); 470369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Last->NeedToGather = !Vectorized; 471369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (Vectorized) { 472369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (int i = 0, e = VL.size(); i != e; ++i) { 473369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem assert(!ScalarToTreeEntry.count(VL[i]) && "Scalar already in tree!"); 474369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ScalarToTreeEntry[VL[i]] = idx; 475369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 476369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } else { 477369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem MustGather.insert(VL.begin(), VL.end()); 478369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 479369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return Last; 480ac2678684689448d1c88f644f3fa88aa4f93997cNadav Rotem } 481de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 48253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem /// -- Vectorization State -- 483369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// Holds all of the tree entries. 484369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem std::vector<TreeEntry> VectorizableTree; 48553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 486369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// Maps a specific scalar to its tree entry. 487369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem SmallDenseMap<Value*, int> ScalarToTreeEntry; 48853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 489369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem /// A list of scalars that we found that we need to keep as scalars. 49053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem ValueSet MustGather; 49153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 492a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem /// This POD struct describes one external user in the vectorized tree. 493a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem struct ExternalUser { 494a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem ExternalUser (Value *S, llvm::User *U, int L) : 495f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Scalar(S), User(U), Lane(L){} 496a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem // Which scalar in our function. 497a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem Value *Scalar; 498a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem // Which user that uses the scalar. 499a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem llvm::User *User; 500a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem // Which lane does the scalar belong to. 501a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem int Lane; 502a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem }; 503a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem typedef SmallVector<ExternalUser, 16> UserList; 504a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 505ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// Checks if two instructions may access the same memory. 506ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// 507ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// \p Loc1 is the location of \p Inst1. It is passed explicitly because it 508ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// is invariant in the calling loop. 509f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar bool isAliased(const MemoryLocation &Loc1, Instruction *Inst1, 510ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Instruction *Inst2) { 511ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 512ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // First check if the result is already in the cache. 513ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines AliasCacheKey key = std::make_pair(Inst1, Inst2); 514ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Optional<bool> &result = AliasCache[key]; 515ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (result.hasValue()) { 516ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return result.getValue(); 517ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 518f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MemoryLocation Loc2 = getLocation(Inst2, AA); 519ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines bool aliased = true; 520ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (Loc1.Ptr && Loc2.Ptr && isSimple(Inst1) && isSimple(Inst2)) { 521ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Do the alias check. 522ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines aliased = AA->alias(Loc1, Loc2); 523ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 524ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Store the result in the cache. 525ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines result = aliased; 526ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return aliased; 527ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 528ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 529ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines typedef std::pair<Instruction *, Instruction *> AliasCacheKey; 530ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 531ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// Cache for alias results. 532ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// TODO: consider moving this to the AliasAnalysis itself. 533ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines DenseMap<AliasCacheKey, Optional<bool>> AliasCache; 534ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 535ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// Removes an instruction from its block and eventually deletes it. 536ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// It's like Instruction::eraseFromParent() except that the actual deletion 537ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// is delayed until BoUpSLP is destructed. 538ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// This is required to ensure that there are no incorrect collisions in the 539ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// AliasCache, which can happen if a new instruction is allocated at the 540ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// same address as a previously deleted instruction. 541ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines void eraseInstruction(Instruction *I) { 542ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines I->removeFromParent(); 543ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines I->dropAllReferences(); 544ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines DeletedInstructions.push_back(std::unique_ptr<Instruction>(I)); 545ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 546ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 547ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// Temporary store for deleted instructions. Instructions will be deleted 548ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines /// eventually when the BoUpSLP is destructed. 549ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines SmallVector<std::unique_ptr<Instruction>, 8> DeletedInstructions; 550ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 551a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem /// A list of values that need to extracted out of the tree. 552a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem /// This list holds pairs of (Internal Scalar : External User). 553a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem UserList ExternalUses; 554a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 55537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Values used only by @llvm.assume calls. 55637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SmallPtrSet<const Value *, 32> EphValues; 55753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 55853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem /// Holds all of the instructions that we gathered. 55953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem SetVector<Instruction *> GatherSeq; 560a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling /// A list of blocks that we are going to CSE. 56136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SetVector<BasicBlock *> CSEBlocks; 56253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 56337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Contains all scheduling relevant data for an instruction. 56437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// A ScheduleData either represents a single instruction or a member of an 56537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// instruction bundle (= a group of instructions which is combined into a 56637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// vector instruction). 56737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines struct ScheduleData { 56837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 56937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // The initial value for the dependency counters. It means that the 57037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // dependencies are not calculated yet. 57137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines enum { InvalidDeps = -1 }; 57237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 57337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData() 57437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines : Inst(nullptr), FirstInBundle(nullptr), NextInBundle(nullptr), 57537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines NextLoadStore(nullptr), SchedulingRegionID(0), SchedulingPriority(0), 57637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Dependencies(InvalidDeps), UnscheduledDeps(InvalidDeps), 57737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines UnscheduledDepsInBundle(InvalidDeps), IsScheduled(false) {} 57837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 57937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines void init(int BlockSchedulingRegionID) { 58037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines FirstInBundle = this; 58137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines NextInBundle = nullptr; 58237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines NextLoadStore = nullptr; 58337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines IsScheduled = false; 58437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SchedulingRegionID = BlockSchedulingRegionID; 58537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines UnscheduledDepsInBundle = UnscheduledDeps; 58637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines clearDependencies(); 58737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 58837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 58937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Returns true if the dependency information has been calculated. 59037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines bool hasValidDependencies() const { return Dependencies != InvalidDeps; } 59137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 59237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Returns true for single instructions and for bundle representatives 59337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// (= the head of a bundle). 59437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines bool isSchedulingEntity() const { return FirstInBundle == this; } 59537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 59637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Returns true if it represents an instruction bundle and not only a 59737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// single instruction. 59837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines bool isPartOfBundle() const { 59937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return NextInBundle != nullptr || FirstInBundle != this; 60037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 60137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 60237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Returns true if it is ready for scheduling, i.e. it has no more 60337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// unscheduled depending instructions/bundles. 60437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines bool isReady() const { 60537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(isSchedulingEntity() && 60637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines "can't consider non-scheduling entity for ready list"); 60737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return UnscheduledDepsInBundle == 0 && !IsScheduled; 60837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 60937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 61037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Modifies the number of unscheduled dependencies, also updating it for 61137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// the whole bundle. 61237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int incrementUnscheduledDeps(int Incr) { 61337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines UnscheduledDeps += Incr; 61437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return FirstInBundle->UnscheduledDepsInBundle += Incr; 61537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 61637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 61737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Sets the number of unscheduled dependencies to the number of 61837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// dependencies. 61937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines void resetUnscheduledDeps() { 62037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines incrementUnscheduledDeps(Dependencies - UnscheduledDeps); 62137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 62237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 62337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Clears all dependency information. 62437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines void clearDependencies() { 62537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Dependencies = InvalidDeps; 62637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines resetUnscheduledDeps(); 62737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines MemoryDependencies.clear(); 62837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 62937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 63037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines void dump(raw_ostream &os) const { 63137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!isSchedulingEntity()) { 63237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines os << "/ " << *Inst; 63337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } else if (NextInBundle) { 63437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines os << '[' << *Inst; 63537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *SD = NextInBundle; 63637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines while (SD) { 63737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines os << ';' << *SD->Inst; 63837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SD = SD->NextInBundle; 63937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 64037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines os << ']'; 64137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } else { 64237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines os << *Inst; 64337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 64437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 64553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 64637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Instruction *Inst; 64737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 64837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Points to the head in an instruction bundle (and always to this for 64937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// single instructions). 65037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *FirstInBundle; 65137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 65237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Single linked list of all instructions in a bundle. Null if it is a 65337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// single instruction. 65437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *NextInBundle; 65537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 65637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Single linked list of all memory instructions (e.g. load, store, call) 65737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// in the block - until the end of the scheduling region. 65837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *NextLoadStore; 65937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 66037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// The dependent memory instructions. 66137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// This list is derived on demand in calculateDependencies(). 66237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SmallVector<ScheduleData *, 4> MemoryDependencies; 66337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 66437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// This ScheduleData is in the current scheduling region if this matches 66537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// the current SchedulingRegionID of BlockScheduling. 66637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int SchedulingRegionID; 66737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 66837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Used for getting a "good" final ordering of instructions. 66937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int SchedulingPriority; 67037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 67137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// The number of dependencies. Constitutes of the number of users of the 67237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// instruction plus the number of dependent memory instructions (if any). 67337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// This value is calculated on demand. 67437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// If InvalidDeps, the number of dependencies is not calculated yet. 67537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// 67637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int Dependencies; 67737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 67837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// The number of dependencies minus the number of dependencies of scheduled 67937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// instructions. As soon as this is zero, the instruction/bundle gets ready 68037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// for scheduling. 68137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Note that this is negative as long as Dependencies is not calculated. 68237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int UnscheduledDeps; 68337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 68437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// The sum of UnscheduledDeps in a bundle. Equals to UnscheduledDeps for 68537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// single instructions. 68637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int UnscheduledDepsInBundle; 68737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 68837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// True if this instruction is scheduled (or considered as scheduled in the 68937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// dry-run). 69037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines bool IsScheduled; 69137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines }; 69237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 69337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#ifndef NDEBUG 694de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar friend inline raw_ostream &operator<<(raw_ostream &os, 695de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const BoUpSLP::ScheduleData &SD) { 696de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SD.dump(os); 697de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return os; 698de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 69937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#endif 70037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 70137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Contains all scheduling data for a basic block. 70237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// 70337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines struct BlockScheduling { 70437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 70537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BlockScheduling(BasicBlock *BB) 70637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines : BB(BB), ChunkSize(BB->size()), ChunkPos(ChunkSize), 70737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleStart(nullptr), ScheduleEnd(nullptr), 70837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines FirstLoadStoreInRegion(nullptr), LastLoadStoreInRegion(nullptr), 709f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar ScheduleRegionSize(0), 710f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar ScheduleRegionSizeLimit(ScheduleRegionSizeBudget), 71137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Make sure that the initial SchedulingRegionID is greater than the 71237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // initial SchedulingRegionID in ScheduleData (which is 0). 71337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SchedulingRegionID(1) {} 71437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 71537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines void clear() { 71637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ReadyInsts.clear(); 71737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleStart = nullptr; 71837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleEnd = nullptr; 71937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines FirstLoadStoreInRegion = nullptr; 72037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines LastLoadStoreInRegion = nullptr; 72137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 722f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Reduce the maximum schedule region size by the size of the 723f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // previous scheduling run. 724f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar ScheduleRegionSizeLimit -= ScheduleRegionSize; 725f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (ScheduleRegionSizeLimit < MinScheduleRegionSize) 726f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar ScheduleRegionSizeLimit = MinScheduleRegionSize; 727f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar ScheduleRegionSize = 0; 728f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 72937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Make a new scheduling region, i.e. all existing ScheduleData is not 73037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // in the new region yet. 73137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ++SchedulingRegionID; 73237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 73337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 73437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *getScheduleData(Value *V) { 73537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *SD = ScheduleDataMap[V]; 73637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (SD && SD->SchedulingRegionID == SchedulingRegionID) 73737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return SD; 73837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return nullptr; 73937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 74037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 74137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines bool isInSchedulingRegion(ScheduleData *SD) { 74237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return SD->SchedulingRegionID == SchedulingRegionID; 74337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 74437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 74537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Marks an instruction as scheduled and puts all dependent ready 74637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// instructions into the ready-list. 74737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines template <typename ReadyListType> 74837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines void schedule(ScheduleData *SD, ReadyListType &ReadyList) { 74937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SD->IsScheduled = true; 75037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: schedule " << *SD << "\n"); 75137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 75237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *BundleMember = SD; 75337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines while (BundleMember) { 75437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Handle the def-use chain dependencies. 75537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (Use &U : BundleMember->Inst->operands()) { 75637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *OpDef = getScheduleData(U.get()); 75737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (OpDef && OpDef->hasValidDependencies() && 75837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines OpDef->incrementUnscheduledDeps(-1) == 0) { 75937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // There are no more unscheduled dependencies after decrementing, 76037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // so we can put the dependent instruction into the ready list. 76137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *DepBundle = OpDef->FirstInBundle; 76237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(!DepBundle->IsScheduled && 76337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines "already scheduled bundle gets ready"); 76437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ReadyList.insert(DepBundle); 76537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: gets ready (def): " << *DepBundle << "\n"); 76637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 76737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 76837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Handle the memory dependencies. 76937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (ScheduleData *MemoryDepSD : BundleMember->MemoryDependencies) { 77037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (MemoryDepSD->incrementUnscheduledDeps(-1) == 0) { 77137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // There are no more unscheduled dependencies after decrementing, 77237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // so we can put the dependent instruction into the ready list. 77337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *DepBundle = MemoryDepSD->FirstInBundle; 77437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(!DepBundle->IsScheduled && 77537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines "already scheduled bundle gets ready"); 77637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ReadyList.insert(DepBundle); 77737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: gets ready (mem): " << *DepBundle << "\n"); 77837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 77937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 78037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember = BundleMember->NextInBundle; 78137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 78237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 78337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 78437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Put all instructions into the ReadyList which are ready for scheduling. 78537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines template <typename ReadyListType> 78637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines void initialFillReadyList(ReadyListType &ReadyList) { 78737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) { 78837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *SD = getScheduleData(I); 78937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (SD->isSchedulingEntity() && SD->isReady()) { 79037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ReadyList.insert(SD); 79137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: initially in ready list: " << *I << "\n"); 79237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 79337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 79437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 79537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 79637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Checks if a bundle of instructions can be scheduled, i.e. has no 79737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// cyclic dependencies. This is only a dry-run, no instructions are 79837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// actually moved at this stage. 799ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines bool tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP); 80037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 80137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Un-bundles a group of instructions. 80237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines void cancelScheduling(ArrayRef<Value *> VL); 80337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 80437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Extends the scheduling region so that V is inside the region. 805f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar /// \returns true if the region size is within the limit. 806f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar bool extendSchedulingRegion(Value *V); 80737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 80837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Initialize the ScheduleData structures for new instructions in the 80937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// scheduling region. 81037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines void initScheduleData(Instruction *FromI, Instruction *ToI, 81137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *PrevLoadStore, 81237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *NextLoadStore); 81337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 81437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Updates the dependency information of a bundle and of all instructions/ 81537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// bundles which depend on the original bundle. 81637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines void calculateDependencies(ScheduleData *SD, bool InsertInReadyList, 817ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines BoUpSLP *SLP); 81837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 81937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Sets all instruction in the scheduling region to un-scheduled. 82037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines void resetSchedule(); 82137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 82237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BasicBlock *BB; 82337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 82437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Simple memory allocation for ScheduleData. 82537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines std::vector<std::unique_ptr<ScheduleData[]>> ScheduleDataChunks; 82637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 82737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// The size of a ScheduleData array in ScheduleDataChunks. 82837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int ChunkSize; 82937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 83037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// The allocator position in the current chunk, which is the last entry 83137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// of ScheduleDataChunks. 83237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int ChunkPos; 83337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 83437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Attaches ScheduleData to Instruction. 83537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Note that the mapping survives during all vectorization iterations, i.e. 83637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// ScheduleData structures are recycled. 83737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DenseMap<Value *, ScheduleData *> ScheduleDataMap; 83837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 83937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines struct ReadyList : SmallVector<ScheduleData *, 8> { 84037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines void insert(ScheduleData *SD) { push_back(SD); } 84137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines }; 84237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 84337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// The ready-list for scheduling (only used for the dry-run). 84437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ReadyList ReadyInsts; 84537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 84637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// The first instruction of the scheduling region. 84737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Instruction *ScheduleStart; 84837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 84937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// The first instruction _after_ the scheduling region. 85037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Instruction *ScheduleEnd; 85137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 85237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// The first memory accessing instruction in the scheduling region 85337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// (can be null). 85437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *FirstLoadStoreInRegion; 85537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 85637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// The last memory accessing instruction in the scheduling region 85737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// (can be null). 85837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *LastLoadStoreInRegion; 85937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 860f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar /// The current size of the scheduling region. 861f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar int ScheduleRegionSize; 862de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 863f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar /// The maximum size allowed for the scheduling region. 864f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar int ScheduleRegionSizeLimit; 865f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 86637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// The ID of the scheduling region. For a new vectorization iteration this 86737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// is incremented which "removes" all ScheduleData from the region. 86837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int SchedulingRegionID; 86937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines }; 87037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 87137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Attaches the BlockScheduling structures to basic blocks. 872ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines MapVector<BasicBlock *, std::unique_ptr<BlockScheduling>> BlocksSchedules; 87337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 87437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// Performs the "real" scheduling. Done before vectorization is actually 87537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines /// performed in a basic block. 87637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines void scheduleBlock(BlockScheduling *BS); 877dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 878dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines /// List of users to ignore during scheduling and that don't need extracting. 879dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines ArrayRef<Value *> UserIgnoreList; 880a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 88137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Number of load-bundles, which contain consecutive loads. 88237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int NumLoadsWantToKeepOrder; 88337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 88437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Number of load-bundles of size 2, which are consecutive loads if reversed. 88537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int NumLoadsWantToChangeOrder; 88637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 88753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem // Analysis and block reference. 88853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem Function *F; 88953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem ScalarEvolution *SE; 89053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem TargetTransformInfo *TTI; 891dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines TargetLibraryInfo *TLI; 89253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem AliasAnalysis *AA; 89353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem LoopInfo *LI; 894722b0a4d293b16eebaed94ae65d5f11743cbcea5Nadav Rotem DominatorTree *DT; 895de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar AssumptionCache *AC; 896de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DemandedBits *DB; 897de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const DataLayout *DL; 898de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned MaxVecRegSize; // This is set by TTI or overridden by cl::opt. 899de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned MinVecRegSize; // Set by cl::opt (default: 128). 90053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem /// Instruction builder to construct the vectorized tree. 90153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem IRBuilder<> Builder; 902de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 903de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar /// A map of scalar integer values to the smallest bit width with which they 904de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar /// can legally be represented. 905de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MapVector<Value *, uint64_t> MinBWs; 90653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem}; 90753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 908de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} // end namespace llvm 909de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} // end namespace slpvectorizer 91037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 911dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesvoid BoUpSLP::buildTree(ArrayRef<Value *> Roots, 912dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines ArrayRef<Value *> UserIgnoreLst) { 913369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem deleteTree(); 914dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines UserIgnoreList = UserIgnoreLst; 91530bbf070a2683fc95c105ad78f921ca59c56bb35Nadav Rotem if (!getSameType(Roots)) 91630bbf070a2683fc95c105ad78f921ca59c56bb35Nadav Rotem return; 917369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem buildTree_rec(Roots, 0); 918a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 919a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem // Collect the values that we need to extract from the tree. 920de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (TreeEntry &EIdx : VectorizableTree) { 921de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar TreeEntry *Entry = &EIdx; 922a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 923a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem // For each lane: 924a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) { 925a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem Value *Scalar = Entry->Scalars[Lane]; 926a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 927a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem // No need to handle users of gathered values. 928a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem if (Entry->NeedToGather) 929a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem continue; 930a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 93136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (User *U : Scalar->users()) { 93236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n"); 933a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 93436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Instruction *UserInst = dyn_cast<Instruction>(U); 935a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (!UserInst) 936a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer continue; 937a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 93837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Skip in-tree scalars that become vectors 93937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (ScalarToTreeEntry.count(U)) { 94037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int Idx = ScalarToTreeEntry[U]; 94137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines TreeEntry *UseEntry = &VectorizableTree[Idx]; 94237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Value *UseScalar = UseEntry->Scalars[0]; 94337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Some in-tree scalars will remain as scalar in vectorized 94437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // instructions. If that is the case, the one in Lane 0 will 94537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // be used. 94637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (UseScalar != U || 94737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines !InTreeUserNeedToExtract(Scalar, UserInst, TLI)) { 94837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U 94937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines << ".\n"); 95037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(!VectorizableTree[Idx].NeedToGather && "Bad state"); 95137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines continue; 95237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 95337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 95437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 955dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Ignore users in the user ignore list. 956dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UserInst) != 957dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines UserIgnoreList.end()) 958a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem continue; 959a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 96036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " << 961a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem Lane << " from " << *Scalar << ".\n"); 96236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ExternalUses.push_back(ExternalUser(Scalar, U, Lane)); 963a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem } 964a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem } 965a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem } 96653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem} 96753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 96853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 969369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemvoid BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { 970de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar bool SameTy = allConstant(VL) || getSameType(VL); (void)SameTy; 971c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines bool isAltShuffle = false; 972369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem assert(SameTy && "Invalid types!"); 97353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 974369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (Depth == RecursionMaxDepth) { 975369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n"); 976369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, false); 977369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 978369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 97953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 980369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Don't handle vectors. 981369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (VL[0]->getType()->isVectorTy()) { 982369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Gathering due to vector type.\n"); 983369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, false); 984369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 985369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 98653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 987369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) 988369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (SI->getValueOperand()->getType()->isVectorTy()) { 989369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n"); 990369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, false); 991369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 992369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 993c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines unsigned Opcode = getSameOpcode(VL); 994c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 995c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // Check that this shuffle vector refers to the alternate 996c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // sequence of opcodes. 997c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (Opcode == Instruction::ShuffleVector) { 998c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Instruction *I0 = dyn_cast<Instruction>(VL[0]); 999c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines unsigned Op = I0->getOpcode(); 1000c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (Op != Instruction::ShuffleVector) 1001c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines isAltShuffle = true; 1002c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 100353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1004369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // If all of the operands are identical or constant we have a simple solution. 1005c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (allConstant(VL) || isSplat(VL) || !getSameBlock(VL) || !Opcode) { 1006369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n"); 1007369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, false); 1008369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 1009369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 101053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1011369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // We now know that this is a vector of instructions of the same type from 1012369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // the same block. 1013369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 101437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Don't vectorize ephemeral values. 101537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (unsigned i = 0, e = VL.size(); i != e; ++i) { 101637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (EphValues.count(VL[i])) { 101737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] << 101837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ") is ephemeral.\n"); 101937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines newTreeEntry(VL, false); 102037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return; 102137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 102237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 102337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1024369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Check if this is a duplicate of another entry. 1025369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (ScalarToTreeEntry.count(VL[0])) { 1026369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem int Idx = ScalarToTreeEntry[VL[0]]; 1027369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem TreeEntry *E = &VectorizableTree[Idx]; 1028369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 0, e = VL.size(); i != e; ++i) { 1029369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: \tChecking bundle: " << *VL[i] << ".\n"); 1030369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (E->Scalars[i] != VL[i]) { 1031369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n"); 1032369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, false); 1033369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 1034369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1035369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1036369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *VL[0] << ".\n"); 1037369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 1038369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 103953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1040369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Check that none of the instructions in the bundle are already in the tree. 1041369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 0, e = VL.size(); i != e; ++i) { 1042369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (ScalarToTreeEntry.count(VL[i])) { 1043369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] << 1044369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ") is already in tree.\n"); 1045369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, false); 1046369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 1047369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1048369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 104953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1050ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // If any of the scalars is marked as a value that needs to stay scalar then 1051ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // we need to gather the scalars. 1052369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 0, e = VL.size(); i != e; ++i) { 1053ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (MustGather.count(VL[i])) { 1054ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n"); 1055369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, false); 1056369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 1057369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1058369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 105953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1060369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Check that all of the users of the scalars that we want to vectorize are 1061369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // schedulable. 1062369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Instruction *VL0 = cast<Instruction>(VL[0]); 1063369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem BasicBlock *BB = cast<Instruction>(VL0)->getParent(); 106453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 106537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!DT->isReachableFromEntry(BB)) { 106637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Don't go into unreachable blocks. They may contain instructions with 106737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // dependency cycles which confuse the final scheduling. 106837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: bundle in unreachable block.\n"); 106937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines newTreeEntry(VL, false); 107037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return; 107153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 1072de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1073369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Check that every instructions appears once in this bundle. 107453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem for (unsigned i = 0, e = VL.size(); i < e; ++i) 1075369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned j = i+1; j < e; ++j) 1076369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (VL[i] == VL[j]) { 1077369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n"); 1078369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, false); 1079369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 1080369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 108153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 108237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines auto &BSRef = BlocksSchedules[BB]; 108337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!BSRef) { 108437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BSRef = llvm::make_unique<BlockScheduling>(BB); 108553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 108637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BlockScheduling &BS = *BSRef.get(); 108753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1088ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (!BS.tryScheduleBundle(VL, this)) { 108937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n"); 1090f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar assert((!BS.getScheduleData(VL[0]) || 1091f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar !BS.getScheduleData(VL[0])->isPartOfBundle()) && 1092f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar "tryScheduleBundle should cancelScheduling on failure"); 109337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines newTreeEntry(VL, false); 109437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return; 109553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 109637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n"); 109753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1098369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem switch (Opcode) { 1099369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::PHI: { 1100369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem PHINode *PH = dyn_cast<PHINode>(VL0); 11013c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer 11023c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer // Check for terminator values (e.g. invoke). 11033c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer for (unsigned j = 0; j < VL.size(); ++j) 11043c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) { 110536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines TerminatorInst *Term = dyn_cast<TerminatorInst>( 110636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines cast<PHINode>(VL[j])->getIncomingValueForBlock(PH->getIncomingBlock(i))); 11073c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer if (Term) { 11083c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (TerminatorInst use).\n"); 110937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 11103c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer newTreeEntry(VL, false); 11113c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer return; 11123c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer } 11133c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer } 11143c940067424204ecffb48ddc269895d48442279aArnold Schwaighofer 1115369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, true); 1116369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n"); 1117369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 1118369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) { 1119369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ValueList Operands; 1120369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Prepare the operand vector. 1121de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (Value *j : VL) 1122de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Operands.push_back(cast<PHINode>(j)->getIncomingValueForBlock( 112336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines PH->getIncomingBlock(i))); 1124369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 1125369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem buildTree_rec(Operands, Depth + 1); 1126369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1127369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 1128369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1129de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case Instruction::ExtractValue: 1130369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::ExtractElement: { 1131de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar bool Reuse = canReuseExtract(VL, Opcode); 1132369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (Reuse) { 1133369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Reusing extract sequence.\n"); 113437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } else { 113537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 1136369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1137369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, Reuse); 1138369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 1139369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1140369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Load: { 1141f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Check that a vectorized load would load the same memory as a scalar 1142f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // load. 1143f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // For example we don't want vectorize loads that are smaller than 8 bit. 1144f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Even though we have a packed struct {<i2, i2, i2, i2>} LLVM treats 1145f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // loading/storing it as an i8 struct. If we vectorize loads/stores from 1146f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // such a struct we read/write packed bits disagreeing with the 1147f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // unvectorized version. 1148f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Type *ScalarTy = VL[0]->getType(); 1149f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 1150de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (DL->getTypeSizeInBits(ScalarTy) != 1151de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DL->getTypeAllocSizeInBits(ScalarTy)) { 1152f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar BS.cancelScheduling(VL); 1153f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar newTreeEntry(VL, false); 1154f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n"); 1155f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return; 1156f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 1157369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Check if the loads are consecutive or of we need to swizzle them. 1158fc1604ec7274e9b724ff9b2512c288a691167427Arnold Schwaighofer for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) { 1159fc1604ec7274e9b724ff9b2512c288a691167427Arnold Schwaighofer LoadInst *L = cast<LoadInst>(VL[i]); 116037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!L->isSimple()) { 116137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 1162369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, false); 116337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n"); 116437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return; 116537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 1166f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 1167de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) { 1168de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], *DL, *SE)) { 116937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ++NumLoadsWantToChangeOrder; 117037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 117137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 117237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines newTreeEntry(VL, false); 117337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n"); 1174369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 1175369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1176fc1604ec7274e9b724ff9b2512c288a691167427Arnold Schwaighofer } 117737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ++NumLoadsWantToKeepOrder; 1178369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, true); 1179369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: added a vector of loads.\n"); 1180369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 1181369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1182369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::ZExt: 1183369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::SExt: 1184369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FPToUI: 1185369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FPToSI: 1186369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FPExt: 1187369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::PtrToInt: 1188369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::IntToPtr: 1189369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::SIToFP: 1190369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::UIToFP: 1191369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Trunc: 1192369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FPTrunc: 1193369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::BitCast: { 1194369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Type *SrcTy = VL0->getOperand(0)->getType(); 1195369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 0; i < VL.size(); ++i) { 1196369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Type *Ty = cast<Instruction>(VL[i])->getOperand(0)->getType(); 1197ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (Ty != SrcTy || !isValidElementType(Ty)) { 119837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 1199369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, false); 1200369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n"); 1201369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 1202369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1203369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1204369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, true); 1205369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: added a vector of casts.\n"); 120653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1207369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { 1208369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ValueList Operands; 1209369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Prepare the operand vector. 1210de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (Value *j : VL) 1211de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Operands.push_back(cast<Instruction>(j)->getOperand(i)); 121253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1213369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem buildTree_rec(Operands, Depth+1); 1214369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 121553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem return; 1216369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1217369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::ICmp: 1218369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FCmp: { 1219369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Check that all of the compares have the same predicate. 12200c7f116bb6950ef819323d855415b2f2b0aad987Pirama Arumuga Nainar CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate(); 1221135e81efe3c1848a308c96dfd65e4d88b0d8667bNadav Rotem Type *ComparedTy = cast<Instruction>(VL[0])->getOperand(0)->getType(); 1222369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 1, e = VL.size(); i < e; ++i) { 1223369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem CmpInst *Cmp = cast<CmpInst>(VL[i]); 1224135e81efe3c1848a308c96dfd65e4d88b0d8667bNadav Rotem if (Cmp->getPredicate() != P0 || 1225135e81efe3c1848a308c96dfd65e4d88b0d8667bNadav Rotem Cmp->getOperand(0)->getType() != ComparedTy) { 122637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 1227369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, false); 1228369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n"); 1229369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 1230369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1231369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 123253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1233369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, true); 1234369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: added a vector of compares.\n"); 123553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1236369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { 1237369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ValueList Operands; 1238369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Prepare the operand vector. 1239de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (Value *j : VL) 1240de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Operands.push_back(cast<Instruction>(j)->getOperand(i)); 124153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1242369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem buildTree_rec(Operands, Depth+1); 1243805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem } 1244369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 124553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 1246369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Select: 1247369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Add: 1248369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FAdd: 1249369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Sub: 1250369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FSub: 1251369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Mul: 1252369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FMul: 1253369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::UDiv: 1254369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::SDiv: 1255369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FDiv: 1256369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::URem: 1257369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::SRem: 1258369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FRem: 1259369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Shl: 1260369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::LShr: 1261369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::AShr: 1262369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::And: 1263369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Or: 1264369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Xor: { 1265369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, true); 1266369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: added a vector of bin op.\n"); 1267369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 1268af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer // Sort operands of the instructions so that each side is more likely to 1269af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer // have the same opcode. 1270af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) { 1271af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer ValueList Left, Right; 1272af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer reorderInputsAccordingToOpcode(VL, Left, Right); 127337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines buildTree_rec(Left, Depth + 1); 127437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines buildTree_rec(Right, Depth + 1); 1275af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer return; 1276af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer } 1277af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer 1278369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { 1279369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ValueList Operands; 1280369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Prepare the operand vector. 1281de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (Value *j : VL) 1282de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Operands.push_back(cast<Instruction>(j)->getOperand(i)); 1283369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 1284369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem buildTree_rec(Operands, Depth+1); 1285369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1286369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 128753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 1288c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case Instruction::GetElementPtr: { 1289c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // We don't combine GEPs with complicated (nested) indexing. 1290c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines for (unsigned j = 0; j < VL.size(); ++j) { 1291c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (cast<Instruction>(VL[j])->getNumOperands() != 2) { 1292c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n"); 129337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 1294c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines newTreeEntry(VL, false); 1295c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return; 1296c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1297c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1298c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 1299c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // We can't combine several GEPs into one vector if they operate on 1300c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // different types. 1301c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Type *Ty0 = cast<Instruction>(VL0)->getOperand(0)->getType(); 1302c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines for (unsigned j = 0; j < VL.size(); ++j) { 1303c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Type *CurTy = cast<Instruction>(VL[j])->getOperand(0)->getType(); 1304c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (Ty0 != CurTy) { 1305c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n"); 130637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 1307c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines newTreeEntry(VL, false); 1308c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return; 1309c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1310c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1311c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 1312c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // We don't combine GEPs with non-constant indexes. 1313c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines for (unsigned j = 0; j < VL.size(); ++j) { 1314c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines auto Op = cast<Instruction>(VL[j])->getOperand(1); 1315c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (!isa<ConstantInt>(Op)) { 1316c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines DEBUG( 1317c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n"); 131837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 1319c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines newTreeEntry(VL, false); 1320c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return; 1321c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1322c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1323c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 1324c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines newTreeEntry(VL, true); 1325c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines DEBUG(dbgs() << "SLP: added a vector of GEPs.\n"); 1326c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines for (unsigned i = 0, e = 2; i < e; ++i) { 1327c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines ValueList Operands; 1328c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // Prepare the operand vector. 1329de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (Value *j : VL) 1330de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Operands.push_back(cast<Instruction>(j)->getOperand(i)); 1331c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 1332c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines buildTree_rec(Operands, Depth + 1); 1333c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1334c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return; 1335c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1336369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Store: { 1337369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Check if the stores are consecutive or of we need to swizzle them. 1338369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) 1339de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) { 134037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 1341369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, false); 134236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << "SLP: Non-consecutive store.\n"); 1343369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 1344369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1345805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem 1346369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, true); 1347369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: added a vector of stores.\n"); 1348805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem 1349805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem ValueList Operands; 1350de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (Value *j : VL) 1351de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Operands.push_back(cast<Instruction>(j)->getOperand(0)); 1352805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem 1353369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem buildTree_rec(Operands, Depth + 1); 135453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem return; 135553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 135636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case Instruction::Call: { 135736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Check if the calls are all to the same vectorizable intrinsic. 1358dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines CallInst *CI = cast<CallInst>(VL[0]); 1359dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Check if this is an Intrinsic call or something that can be 1360dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // represented by an intrinsic call 1361de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); 1362dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!isTriviallyVectorizable(ID)) { 136337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 136436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines newTreeEntry(VL, false); 136536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << "SLP: Non-vectorizable call.\n"); 136636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return; 136736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 1368dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Function *Int = CI->getCalledFunction(); 1369c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Value *A1I = nullptr; 1370c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (hasVectorInstrinsicScalarOpd(ID, 1)) 1371c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines A1I = CI->getArgOperand(1); 137236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (unsigned i = 1, e = VL.size(); i != e; ++i) { 1373dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines CallInst *CI2 = dyn_cast<CallInst>(VL[i]); 1374dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!CI2 || CI2->getCalledFunction() != Int || 1375de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar getVectorIntrinsicIDForCall(CI2, TLI) != ID || 1376de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar !CI->hasIdenticalOperandBundleSchema(*CI2)) { 137737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 137836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines newTreeEntry(VL, false); 1379dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i] 138036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines << "\n"); 138136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return; 138236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 1383c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // ctlz,cttz and powi are special intrinsics whose second argument 1384c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // should be same in order for them to be vectorized. 1385c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (hasVectorInstrinsicScalarOpd(ID, 1)) { 1386c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Value *A1J = CI2->getArgOperand(1); 1387c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (A1I != A1J) { 138837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 1389c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines newTreeEntry(VL, false); 1390c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI 1391c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines << " argument "<< A1I<<"!=" << A1J 1392c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines << "\n"); 1393c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return; 1394c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1395c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1396de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Verify that the bundle operands are identical between the two calls. 1397de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (CI->hasOperandBundles() && 1398de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar !std::equal(CI->op_begin() + CI->getBundleOperandsStartIndex(), 1399de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar CI->op_begin() + CI->getBundleOperandsEndIndex(), 1400de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar CI2->op_begin() + CI2->getBundleOperandsStartIndex())) { 1401de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar BS.cancelScheduling(VL); 1402de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar newTreeEntry(VL, false); 1403de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:" << *CI << "!=" 1404de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar << *VL[i] << '\n'); 1405de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return; 1406de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 140736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 140836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 140936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines newTreeEntry(VL, true); 1410dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) { 141136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ValueList Operands; 141236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Prepare the operand vector. 1413de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (Value *j : VL) { 1414de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar CallInst *CI2 = dyn_cast<CallInst>(j); 1415dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Operands.push_back(CI2->getArgOperand(i)); 141636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 141736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines buildTree_rec(Operands, Depth + 1); 141836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 141936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return; 142036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 1421c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case Instruction::ShuffleVector: { 1422c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // If this is not an alternate sequence of opcode like add-sub 1423c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // then do not vectorize this instruction. 1424c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (!isAltShuffle) { 142537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 1426c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines newTreeEntry(VL, false); 1427c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n"); 1428c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return; 1429c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1430c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines newTreeEntry(VL, true); 1431c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n"); 1432ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1433ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Reorder operands if reordering would enable vectorization. 1434ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (isa<BinaryOperator>(VL0)) { 1435ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines ValueList Left, Right; 1436ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines reorderAltShuffleOperands(VL, Left, Right); 1437ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines buildTree_rec(Left, Depth + 1); 1438ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines buildTree_rec(Right, Depth + 1); 1439ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return; 1440ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1441ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1442c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { 1443c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines ValueList Operands; 1444c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // Prepare the operand vector. 1445de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (Value *j : VL) 1446de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Operands.push_back(cast<Instruction>(j)->getOperand(i)); 1447c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 1448c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines buildTree_rec(Operands, Depth + 1); 1449c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1450c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return; 1451c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1452369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem default: 145337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS.cancelScheduling(VL); 1454369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem newTreeEntry(VL, false); 1455369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n"); 1456369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return; 145753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 145853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem} 145953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1460de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarunsigned BoUpSLP::canMapToVector(Type *T, const DataLayout &DL) const { 1461de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned N; 1462de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Type *EltTy; 1463de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *ST = dyn_cast<StructType>(T); 1464de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (ST) { 1465de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar N = ST->getNumElements(); 1466de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar EltTy = *ST->element_begin(); 1467de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } else { 1468de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar N = cast<ArrayType>(T)->getNumElements(); 1469de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar EltTy = cast<ArrayType>(T)->getElementType(); 1470de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 1471de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!isValidElementType(EltTy)) 1472de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return 0; 1473de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar uint64_t VTSize = DL.getTypeStoreSizeInBits(VectorType::get(EltTy, N)); 1474de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (VTSize < MinVecRegSize || VTSize > MaxVecRegSize || VTSize != DL.getTypeStoreSizeInBits(T)) 1475de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return 0; 1476de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (ST) { 1477de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Check that struct is homogeneous. 1478de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (const auto *Ty : ST->elements()) 1479de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (Ty != EltTy) 1480de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return 0; 1481de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 1482de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return N; 1483de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 1484de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1485de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool BoUpSLP::canReuseExtract(ArrayRef<Value *> VL, unsigned Opcode) const { 1486de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar assert(Opcode == Instruction::ExtractElement || 1487de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Opcode == Instruction::ExtractValue); 1488de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar assert(Opcode == getSameOpcode(VL) && "Invalid opcode"); 1489de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Check if all of the extracts come from the same vector and from the 1490de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // correct offset. 1491de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Value *VL0 = VL[0]; 1492de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Instruction *E0 = cast<Instruction>(VL0); 1493de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Value *Vec = E0->getOperand(0); 1494de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1495de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // We have to extract from a vector/aggregate with the same number of elements. 1496de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned NElts; 1497de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (Opcode == Instruction::ExtractValue) { 1498de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const DataLayout &DL = E0->getModule()->getDataLayout(); 1499de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar NElts = canMapToVector(Vec->getType(), DL); 1500de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!NElts) 1501de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 1502de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Check if load can be rewritten as load of vector. 1503de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar LoadInst *LI = dyn_cast<LoadInst>(Vec); 1504de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!LI || !LI->isSimple() || !LI->hasNUses(VL.size())) 1505de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 1506de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } else { 1507de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar NElts = Vec->getType()->getVectorNumElements(); 1508de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 1509de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1510de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (NElts != VL.size()) 1511de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 1512de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1513de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Check that all of the indices extract from the correct offset. 1514de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!matchExtractIndex(E0, 0, Opcode)) 1515de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 1516de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1517de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (unsigned i = 1, e = VL.size(); i < e; ++i) { 1518de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Instruction *E = cast<Instruction>(VL[i]); 1519de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!matchExtractIndex(E, i, Opcode)) 1520de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 1521de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (E->getOperand(0) != Vec) 1522de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 1523de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 1524de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1525de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return true; 1526de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 1527de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1528369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemint BoUpSLP::getEntryCost(TreeEntry *E) { 1529369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ArrayRef<Value*> VL = E->Scalars; 153053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 153153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem Type *ScalarTy = VL[0]->getType(); 153253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) 153353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem ScalarTy = SI->getValueOperand()->getType(); 153425961b469a9debe69b915bcb4fa49d35d2ee9544Nadav Rotem VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); 153525961b469a9debe69b915bcb4fa49d35d2ee9544Nadav Rotem 1536de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // If we have computed a smaller type for the expression, update VecTy so 1537de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // that the costs will be accurate. 1538de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (MinBWs.count(VL[0])) 1539de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar VecTy = VectorType::get(IntegerType::get(F->getContext(), MinBWs[VL[0]]), 1540de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar VL.size()); 1541de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1542369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (E->NeedToGather) { 1543369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (allConstant(VL)) 1544369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return 0; 1545369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (isSplat(VL)) { 1546369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0); 154753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 1548369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return getGatherCost(E->Scalars); 154953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 1550c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines unsigned Opcode = getSameOpcode(VL); 1551c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines assert(Opcode && getSameType(VL) && getSameBlock(VL) && "Invalid VL"); 155253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem Instruction *VL0 = cast<Instruction>(VL[0]); 155353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem switch (Opcode) { 1554369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::PHI: { 155553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem return 0; 155653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 1557de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case Instruction::ExtractValue: 1558369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::ExtractElement: { 1559de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (canReuseExtract(VL, Opcode)) { 156036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines int DeadCost = 0; 156136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (unsigned i = 0, e = VL.size(); i < e; ++i) { 1562de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Instruction *E = cast<Instruction>(VL[i]); 156336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (E->hasOneUse()) 156436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Take credit for instruction that will become dead. 156536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DeadCost += 156636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i); 156736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 156836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return -DeadCost; 156936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 1570369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return getGatherCost(VecTy); 157125961b469a9debe69b915bcb4fa49d35d2ee9544Nadav Rotem } 1572369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::ZExt: 1573369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::SExt: 1574369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FPToUI: 1575369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FPToSI: 1576369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FPExt: 1577369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::PtrToInt: 1578369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::IntToPtr: 1579369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::SIToFP: 1580369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::UIToFP: 1581369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Trunc: 1582369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FPTrunc: 1583369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::BitCast: { 1584369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Type *SrcTy = VL0->getOperand(0)->getType(); 1585369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 1586369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Calculate the cost of this instruction. 1587369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(), 1588369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem VL0->getType(), SrcTy); 1589369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 1590369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size()); 1591369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy); 1592369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return VecCost - ScalarCost; 159353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 1594369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FCmp: 1595369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::ICmp: 1596de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case Instruction::Select: { 1597de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Calculate the cost of this instruction. 1598de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size()); 1599de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar int ScalarCost = VecTy->getNumElements() * 1600de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.getInt1Ty()); 1601de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar int VecCost = TTI->getCmpSelInstrCost(Opcode, VecTy, MaskTy); 1602de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return VecCost - ScalarCost; 1603de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 1604369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Add: 1605369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FAdd: 1606369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Sub: 1607369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FSub: 1608369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Mul: 1609369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FMul: 1610369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::UDiv: 1611369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::SDiv: 1612369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FDiv: 1613369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::URem: 1614369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::SRem: 1615369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FRem: 1616369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Shl: 1617369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::LShr: 1618369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::AShr: 1619369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::And: 1620369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Or: 1621369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Xor: { 1622de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Certain instructions can be cheaper to vectorize if they have a 1623de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // constant second vector operand. 1624de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar TargetTransformInfo::OperandValueKind Op1VK = 1625de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar TargetTransformInfo::OK_AnyValue; 1626de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar TargetTransformInfo::OperandValueKind Op2VK = 1627de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar TargetTransformInfo::OK_UniformConstantValue; 1628de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar TargetTransformInfo::OperandValueProperties Op1VP = 1629de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar TargetTransformInfo::OP_None; 1630de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar TargetTransformInfo::OperandValueProperties Op2VP = 1631de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar TargetTransformInfo::OP_None; 1632de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1633de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // If all operands are exactly the same ConstantInt then set the 1634de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // operand kind to OK_UniformConstantValue. 1635de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // If instead not all operands are constants, then set the operand kind 1636de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // to OK_AnyValue. If all operands are constants but not the same, 1637de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // then set the operand kind to OK_NonUniformConstantValue. 1638de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar ConstantInt *CInt = nullptr; 1639de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (unsigned i = 0; i < VL.size(); ++i) { 1640de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const Instruction *I = cast<Instruction>(VL[i]); 1641de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!isa<ConstantInt>(I->getOperand(1))) { 1642de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Op2VK = TargetTransformInfo::OK_AnyValue; 1643de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar break; 164436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 1645de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (i == 0) { 1646de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar CInt = cast<ConstantInt>(I->getOperand(1)); 1647de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar continue; 1648de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 1649de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (Op2VK == TargetTransformInfo::OK_UniformConstantValue && 1650de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar CInt != cast<ConstantInt>(I->getOperand(1))) 1651de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Op2VK = TargetTransformInfo::OK_NonUniformConstantValue; 1652369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 1653de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // FIXME: Currently cost of model modification for division by power of 1654de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // 2 is handled for X86 and AArch64. Add support for other targets. 1655de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (Op2VK == TargetTransformInfo::OK_UniformConstantValue && CInt && 1656de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar CInt->getValue().isPowerOf2()) 1657de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Op2VP = TargetTransformInfo::OP_PowerOf2; 1658de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1659de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar int ScalarCost = VecTy->getNumElements() * 1660de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar TTI->getArithmeticInstrCost(Opcode, ScalarTy, Op1VK, 1661de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Op2VK, Op1VP, Op2VP); 1662de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy, Op1VK, Op2VK, 1663de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Op1VP, Op2VP); 1664369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return VecCost - ScalarCost; 166553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 1666c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case Instruction::GetElementPtr: { 1667c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines TargetTransformInfo::OperandValueKind Op1VK = 1668c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines TargetTransformInfo::OK_AnyValue; 1669c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines TargetTransformInfo::OperandValueKind Op2VK = 1670c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines TargetTransformInfo::OK_UniformConstantValue; 1671c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 1672c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines int ScalarCost = 1673c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines VecTy->getNumElements() * 1674c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, Op1VK, Op2VK); 1675c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines int VecCost = 1676c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines TTI->getArithmeticInstrCost(Instruction::Add, VecTy, Op1VK, Op2VK); 1677c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 1678c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return VecCost - ScalarCost; 1679c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1680369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Load: { 1681369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Cost of wide load - cost of scalar loads. 1682de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned alignment = dyn_cast<LoadInst>(VL0)->getAlignment(); 1683369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem int ScalarLdCost = VecTy->getNumElements() * 1684de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0); 1685de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, 1686de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar VecTy, alignment, 0); 1687369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return VecLdCost - ScalarLdCost; 168853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 1689369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Store: { 1690369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // We know that we can merge the stores. Calculate the cost. 1691de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned alignment = dyn_cast<StoreInst>(VL0)->getAlignment(); 1692369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem int ScalarStCost = VecTy->getNumElements() * 1693de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar TTI->getMemoryOpCost(Instruction::Store, ScalarTy, alignment, 0); 1694de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar int VecStCost = TTI->getMemoryOpCost(Instruction::Store, 1695de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar VecTy, alignment, 0); 1696369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return VecStCost - ScalarStCost; 169725961b469a9debe69b915bcb4fa49d35d2ee9544Nadav Rotem } 169836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case Instruction::Call: { 169936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines CallInst *CI = cast<CallInst>(VL0); 1700de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); 170136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 170236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Calculate the cost of the scalar and vector calls. 170336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SmallVector<Type*, 4> ScalarTys, VecTys; 1704dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines for (unsigned op = 0, opc = CI->getNumArgOperands(); op!= opc; ++op) { 170536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ScalarTys.push_back(CI->getArgOperand(op)->getType()); 170636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines VecTys.push_back(VectorType::get(CI->getArgOperand(op)->getType(), 170736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines VecTy->getNumElements())); 170836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 170936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 1710de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar FastMathFlags FMF; 1711de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (auto *FPMO = dyn_cast<FPMathOperator>(CI)) 1712de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar FMF = FPMO->getFastMathFlags(); 1713de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 171436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines int ScalarCallCost = VecTy->getNumElements() * 1715de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys, FMF); 171636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 1717de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar int VecCallCost = TTI->getIntrinsicInstrCost(ID, VecTy, VecTys, FMF); 171836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 171936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << "SLP: Call cost "<< VecCallCost - ScalarCallCost 172036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines << " (" << VecCallCost << "-" << ScalarCallCost << ")" 1721dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines << " for " << *CI << "\n"); 172236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 172336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return VecCallCost - ScalarCallCost; 172436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 1725c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case Instruction::ShuffleVector: { 1726c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines TargetTransformInfo::OperandValueKind Op1VK = 1727c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines TargetTransformInfo::OK_AnyValue; 1728c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines TargetTransformInfo::OperandValueKind Op2VK = 1729c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines TargetTransformInfo::OK_AnyValue; 1730c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines int ScalarCost = 0; 1731c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines int VecCost = 0; 1732de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (Value *i : VL) { 1733de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Instruction *I = cast<Instruction>(i); 1734c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (!I) 1735c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines break; 1736c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines ScalarCost += 1737c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines TTI->getArithmeticInstrCost(I->getOpcode(), ScalarTy, Op1VK, Op2VK); 1738c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1739c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // VecCost is equal to sum of the cost of creating 2 vectors 1740c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // and the cost of creating shuffle. 1741c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Instruction *I0 = cast<Instruction>(VL[0]); 1742c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines VecCost = 1743c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines TTI->getArithmeticInstrCost(I0->getOpcode(), VecTy, Op1VK, Op2VK); 1744c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Instruction *I1 = cast<Instruction>(VL[1]); 1745c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines VecCost += 1746c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines TTI->getArithmeticInstrCost(I1->getOpcode(), VecTy, Op1VK, Op2VK); 1747c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines VecCost += 1748c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, VecTy, 0); 1749c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return VecCost - ScalarCost; 1750c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1751369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem default: 1752369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem llvm_unreachable("Unknown instruction"); 175353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 1754369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 175525961b469a9debe69b915bcb4fa49d35d2ee9544Nadav Rotem 1756d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiangbool BoUpSLP::isFullyVectorizableTinyTree() { 1757d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang DEBUG(dbgs() << "SLP: Check whether the tree with height " << 1758d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang VectorizableTree.size() << " is fully vectorizable .\n"); 1759d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang 1760d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang // We only handle trees of height 2. 1761d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang if (VectorizableTree.size() != 2) 1762d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang return false; 1763d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang 1764f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Handle splat and all-constants stores. 1765f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (!VectorizableTree[0].NeedToGather && 1766f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar (allConstant(VectorizableTree[1].Scalars) || 1767f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar isSplat(VectorizableTree[1].Scalars))) 176836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return true; 176936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 1770d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang // Gathering cost would be too much for tiny trees. 177136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (VectorizableTree[0].NeedToGather || VectorizableTree[1].NeedToGather) 177236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return false; 1773d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang 177436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return true; 1775d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang} 1776d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang 177737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesint BoUpSLP::getSpillCost() { 177837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Walk from the bottom of the tree to the top, tracking which values are 177937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // live. When we see a call instruction that is not part of our tree, 178037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // query TTI to see if there is a cost to keeping values live over it 178137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // (for example, if spills and fills are required). 178237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned BundleWidth = VectorizableTree.front().Scalars.size(); 178337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int Cost = 0; 178437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 178537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SmallPtrSet<Instruction*, 4> LiveValues; 1786de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Instruction *PrevInst = nullptr; 178737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1788de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (const auto &N : VectorizableTree) { 1789de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Instruction *Inst = dyn_cast<Instruction>(N.Scalars[0]); 179037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!Inst) 179137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines continue; 179237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 179337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!PrevInst) { 179437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines PrevInst = Inst; 179537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines continue; 179637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 179737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1798de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Update LiveValues. 1799de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar LiveValues.erase(PrevInst); 1800de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (auto &J : PrevInst->operands()) { 1801de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isa<Instruction>(&*J) && ScalarToTreeEntry.count(&*J)) 1802de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar LiveValues.insert(cast<Instruction>(&*J)); 1803de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 1804de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 180537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG( 180637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines dbgs() << "SLP: #LV: " << LiveValues.size(); 180737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (auto *X : LiveValues) 180837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines dbgs() << " " << X->getName(); 180937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines dbgs() << ", Looking at "; 181037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Inst->dump(); 181137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ); 181237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 181337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Now find the sequence of instructions between PrevInst and Inst. 1814f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar BasicBlock::reverse_iterator InstIt(Inst->getIterator()), 1815f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar PrevInstIt(PrevInst->getIterator()); 181637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines --PrevInstIt; 181737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines while (InstIt != PrevInstIt) { 181837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (PrevInstIt == PrevInst->getParent()->rend()) { 181937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines PrevInstIt = Inst->getParent()->rbegin(); 182037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines continue; 182137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 182237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 182337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (isa<CallInst>(&*PrevInstIt) && &*PrevInstIt != PrevInst) { 182437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SmallVector<Type*, 4> V; 182537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (auto *II : LiveValues) 182637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines V.push_back(VectorType::get(II->getType(), BundleWidth)); 182737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Cost += TTI->getCostOfKeepingLiveOverCall(V); 182837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 182937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 183037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ++PrevInstIt; 183137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 183237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 183337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines PrevInst = Inst; 183437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 183537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 183637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return Cost; 183737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 183837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1839369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemint BoUpSLP::getTreeCost() { 1840369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem int Cost = 0; 1841369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Calculating cost for tree of size " << 1842369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem VectorizableTree.size() << ".\n"); 1843369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 1844d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang // We only vectorize tiny trees if it is fully vectorizable. 1845d0132a783341696eba8ac97b83ae3388d95b4563Yi Jiang if (VectorizableTree.size() < 3 && !isFullyVectorizableTinyTree()) { 1846ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (VectorizableTree.empty()) { 184767a38a2875f05ea9c219ab73c4398ee675eb4292Nadav Rotem assert(!ExternalUses.size() && "We should not have any external users"); 184867a38a2875f05ea9c219ab73c4398ee675eb4292Nadav Rotem } 1849085e23841e9c4f4682385fce456704a5f75f9cdcYi Jiang return INT_MAX; 1850a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem } 1851a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 1852a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem unsigned BundleWidth = VectorizableTree[0].Scalars.size(); 1853a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 1854de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (TreeEntry &TE : VectorizableTree) { 1855de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar int C = getEntryCost(&TE); 1856369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Adding cost " << C << " for bundle that starts with " 1857de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar << *TE.Scalars[0] << ".\n"); 1858369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Cost += C; 185953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 1860a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 186136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SmallSet<Value *, 16> ExtractCostCalculated; 1862a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem int ExtractCost = 0; 1863de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (ExternalUser &EU : ExternalUses) { 186436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // We only add extract cost once for the same scalar. 1865de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!ExtractCostCalculated.insert(EU.Scalar).second) 186637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines continue; 186737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 186837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Uses by ephemeral values are free (because the ephemeral value will be 186937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // removed prior to code generation, and so the extraction will be 187037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // removed as well). 1871de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (EphValues.count(EU.User)) 187236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines continue; 1873a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 1874de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // If we plan to rewrite the tree in a smaller type, we will need to sign 1875de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // extend the extracted value back to the original type. Here, we account 1876de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // for the extract and the added cost of the sign extend if needed. 1877de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *VecTy = VectorType::get(EU.Scalar->getType(), BundleWidth); 1878de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *ScalarRoot = VectorizableTree[0].Scalars[0]; 1879de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (MinBWs.count(ScalarRoot)) { 1880de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot]); 1881de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar VecTy = VectorType::get(MinTy, BundleWidth); 1882de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar ExtractCost += TTI->getExtractWithExtendCost( 1883de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Instruction::SExt, EU.Scalar->getType(), VecTy, EU.Lane); 1884de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } else { 1885de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar ExtractCost += 1886de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, EU.Lane); 1887de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 1888a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem } 1889a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 1890de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar int SpillCost = getSpillCost(); 1891de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Cost += SpillCost + ExtractCost; 189237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1893de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DEBUG(dbgs() << "SLP: Spill Cost = " << SpillCost << ".\n" 1894de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar << "SLP: Extract Cost = " << ExtractCost << ".\n" 1895de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar << "SLP: Total Cost = " << Cost << ".\n"); 1896de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return Cost; 1897369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 189853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1899369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemint BoUpSLP::getGatherCost(Type *Ty) { 1900369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem int Cost = 0; 1901369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i) 1902369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); 1903369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return Cost; 1904369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 190553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1906369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemint BoUpSLP::getGatherCost(ArrayRef<Value *> VL) { 1907369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Find the type of the operands in VL. 1908369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Type *ScalarTy = VL[0]->getType(); 1909369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) 1910369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ScalarTy = SI->getValueOperand()->getType(); 1911369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); 1912369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Find the cost of inserting/extracting values from the vector. 1913369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return getGatherCost(VecTy); 191453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem} 191553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 1916ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// Reorder commutative operations in alternate shuffle if the resulting vectors 1917ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// are consecutive loads. This would allow us to vectorize the tree. 1918ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// If we have something like- 1919ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// load a[0] - load b[0] 1920ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// load b[1] + load a[1] 1921ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// load a[2] - load b[2] 1922ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// load a[3] + load b[3] 1923ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// Reordering the second load b[1] load a[1] would allow us to vectorize this 1924ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// code. 1925ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesvoid BoUpSLP::reorderAltShuffleOperands(ArrayRef<Value *> VL, 1926ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines SmallVectorImpl<Value *> &Left, 1927ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines SmallVectorImpl<Value *> &Right) { 1928ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Push left and right operands of binary operation into Left and Right 1929de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (Value *i : VL) { 1930de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Left.push_back(cast<Instruction>(i)->getOperand(0)); 1931de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Right.push_back(cast<Instruction>(i)->getOperand(1)); 1932ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1933ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1934ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Reorder if we have a commutative operation and consecutive access 1935ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // are on either side of the alternate instructions. 1936ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines for (unsigned j = 0; j < VL.size() - 1; ++j) { 1937ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) { 1938ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) { 1939ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Instruction *VL1 = cast<Instruction>(VL[j]); 1940ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Instruction *VL2 = cast<Instruction>(VL[j + 1]); 1941de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (VL1->isCommutative() && isConsecutiveAccess(L, L1, *DL, *SE)) { 1942ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines std::swap(Left[j], Right[j]); 1943ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines continue; 1944de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } else if (VL2->isCommutative() && 1945de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar isConsecutiveAccess(L, L1, *DL, *SE)) { 1946ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines std::swap(Left[j + 1], Right[j + 1]); 1947ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines continue; 1948ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1949ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // else unchanged 1950ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1951ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1952ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) { 1953ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) { 1954ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Instruction *VL1 = cast<Instruction>(VL[j]); 1955ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Instruction *VL2 = cast<Instruction>(VL[j + 1]); 1956de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (VL1->isCommutative() && isConsecutiveAccess(L, L1, *DL, *SE)) { 1957ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines std::swap(Left[j], Right[j]); 1958ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines continue; 1959de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } else if (VL2->isCommutative() && 1960de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar isConsecutiveAccess(L, L1, *DL, *SE)) { 1961ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines std::swap(Left[j + 1], Right[j + 1]); 1962ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines continue; 1963ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1964ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // else unchanged 1965ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1966ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1967ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1968ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines} 1969ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1970f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar// Return true if I should be commuted before adding it's left and right 1971f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar// operands to the arrays Left and Right. 1972f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar// 1973f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar// The vectorizer is trying to either have all elements one side being 1974f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar// instruction with the same opcode to enable further vectorization, or having 1975f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar// a splat to lower the vectorizing cost. 1976f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarstatic bool shouldReorderOperands(int i, Instruction &I, 1977f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar SmallVectorImpl<Value *> &Left, 1978f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar SmallVectorImpl<Value *> &Right, 1979f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar bool AllSameOpcodeLeft, 1980f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar bool AllSameOpcodeRight, bool SplatLeft, 1981f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar bool SplatRight) { 1982f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Value *VLeft = I.getOperand(0); 1983f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Value *VRight = I.getOperand(1); 1984f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // If we have "SplatRight", try to see if commuting is needed to preserve it. 1985f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (SplatRight) { 1986f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (VRight == Right[i - 1]) 1987f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Preserve SplatRight 1988f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return false; 1989f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (VLeft == Right[i - 1]) { 1990f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Commuting would preserve SplatRight, but we don't want to break 1991f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // SplatLeft either, i.e. preserve the original order if possible. 1992f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // (FIXME: why do we care?) 1993f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (SplatLeft && VLeft == Left[i - 1]) 1994f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return false; 1995f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return true; 1996f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 1997f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 1998f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Symmetrically handle Right side. 1999f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (SplatLeft) { 2000f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (VLeft == Left[i - 1]) 2001f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Preserve SplatLeft 2002f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return false; 2003f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (VRight == Left[i - 1]) 2004f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return true; 2005f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 2006f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2007f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Instruction *ILeft = dyn_cast<Instruction>(VLeft); 2008f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Instruction *IRight = dyn_cast<Instruction>(VRight); 2009f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2010f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // If we have "AllSameOpcodeRight", try to see if the left operands preserves 2011f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // it and not the right, in this case we want to commute. 2012f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (AllSameOpcodeRight) { 2013f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned RightPrevOpcode = cast<Instruction>(Right[i - 1])->getOpcode(); 2014f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (IRight && RightPrevOpcode == IRight->getOpcode()) 2015f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Do not commute, a match on the right preserves AllSameOpcodeRight 2016f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return false; 2017f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (ILeft && RightPrevOpcode == ILeft->getOpcode()) { 2018f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // We have a match and may want to commute, but first check if there is 2019f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // not also a match on the existing operands on the Left to preserve 2020f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // AllSameOpcodeLeft, i.e. preserve the original order if possible. 2021f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // (FIXME: why do we care?) 2022f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (AllSameOpcodeLeft && ILeft && 2023f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar cast<Instruction>(Left[i - 1])->getOpcode() == ILeft->getOpcode()) 2024f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return false; 2025f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return true; 2026f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 2027f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 2028f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Symmetrically handle Left side. 2029f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (AllSameOpcodeLeft) { 2030f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned LeftPrevOpcode = cast<Instruction>(Left[i - 1])->getOpcode(); 2031f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (ILeft && LeftPrevOpcode == ILeft->getOpcode()) 2032f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return false; 2033f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (IRight && LeftPrevOpcode == IRight->getOpcode()) 2034f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return true; 2035f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 2036f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return false; 2037f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar} 2038f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2039ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesvoid BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL, 2040ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines SmallVectorImpl<Value *> &Left, 2041ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines SmallVectorImpl<Value *> &Right) { 2042ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 2043f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (VL.size()) { 2044f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Peel the first iteration out of the loop since there's nothing 2045f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // interesting to do anyway and it simplifies the checks in the loop. 2046f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar auto VLeft = cast<Instruction>(VL[0])->getOperand(0); 2047f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar auto VRight = cast<Instruction>(VL[0])->getOperand(1); 2048f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (!isa<Instruction>(VRight) && isa<Instruction>(VLeft)) 2049f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Favor having instruction to the right. FIXME: why? 2050f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar std::swap(VLeft, VRight); 2051ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Left.push_back(VLeft); 2052ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Right.push_back(VRight); 2053ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 2054ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 2055f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Keep track if we have instructions with all the same opcode on one side. 2056f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar bool AllSameOpcodeLeft = isa<Instruction>(Left[0]); 2057f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar bool AllSameOpcodeRight = isa<Instruction>(Right[0]); 2058f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Keep track if we have one side with all the same value (broadcast). 2059f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar bool SplatLeft = true; 2060f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar bool SplatRight = true; 2061ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 2062f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar for (unsigned i = 1, e = VL.size(); i != e; ++i) { 2063f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Instruction *I = cast<Instruction>(VL[i]); 2064f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar assert(I->isCommutative() && "Can only process commutative instruction"); 2065f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Commute to favor either a splat or maximizing having the same opcodes on 2066f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // one side. 2067f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (shouldReorderOperands(i, *I, Left, Right, AllSameOpcodeLeft, 2068f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar AllSameOpcodeRight, SplatLeft, SplatRight)) { 2069f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Left.push_back(I->getOperand(1)); 2070f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Right.push_back(I->getOperand(0)); 2071f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } else { 2072f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Left.push_back(I->getOperand(0)); 2073f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Right.push_back(I->getOperand(1)); 2074f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 2075f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Update Splat* and AllSameOpcode* after the insertion. 2076f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar SplatRight = SplatRight && (Right[i - 1] == Right[i]); 2077f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar SplatLeft = SplatLeft && (Left[i - 1] == Left[i]); 2078f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar AllSameOpcodeLeft = AllSameOpcodeLeft && isa<Instruction>(Left[i]) && 2079f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar (cast<Instruction>(Left[i - 1])->getOpcode() == 2080f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar cast<Instruction>(Left[i])->getOpcode()); 2081f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar AllSameOpcodeRight = AllSameOpcodeRight && isa<Instruction>(Right[i]) && 2082f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar (cast<Instruction>(Right[i - 1])->getOpcode() == 2083f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar cast<Instruction>(Right[i])->getOpcode()); 2084ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 2085ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 2086f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // If one operand end up being broadcast, return this operand order. 2087f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (SplatRight || SplatLeft) 2088f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return; 2089f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2090ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Finally check if we can get longer vectorizable chain by reordering 2091ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // without breaking the good operand order detected above. 2092ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // E.g. If we have something like- 2093ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // load a[0] load b[0] 2094ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // load b[1] load a[1] 2095ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // load a[2] load b[2] 2096ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // load a[3] load b[3] 2097ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Reordering the second load b[1] load a[1] would allow us to vectorize 2098ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // this code and we still retain AllSameOpcode property. 2099ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // FIXME: This load reordering might break AllSameOpcode in some rare cases 2100ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // such as- 2101ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // add a[0],c[0] load b[0] 2102ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // add a[1],c[2] load b[1] 2103ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // b[2] load b[2] 2104ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // add a[3],c[3] load b[3] 2105ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines for (unsigned j = 0; j < VL.size() - 1; ++j) { 2106ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) { 2107ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) { 2108de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isConsecutiveAccess(L, L1, *DL, *SE)) { 2109ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines std::swap(Left[j + 1], Right[j + 1]); 2110ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines continue; 2111ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 2112ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 2113ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 2114ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) { 2115ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) { 2116de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isConsecutiveAccess(L, L1, *DL, *SE)) { 2117ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines std::swap(Left[j + 1], Right[j + 1]); 2118ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines continue; 2119ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 2120ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 2121ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 2122ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // else unchanged 2123ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 2124ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines} 2125ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 21264b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenaultvoid BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) { 21274b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault Instruction *VL0 = cast<Instruction>(VL[0]); 2128f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar BasicBlock::iterator NextInst(VL0); 21294b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault ++NextInst; 21304b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault Builder.SetInsertPoint(VL0->getParent(), NextInst); 21314b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault Builder.SetCurrentDebugLocation(VL0->getDebugLoc()); 21324b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault} 21334b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault 2134369cc938d261de3295eb70d0738f54ef1a82806cNadav RotemValue *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) { 213553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem Value *Vec = UndefValue::get(Ty); 213653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem // Generate the 'InsertElement' instruction. 213753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem for (unsigned i = 0; i < Ty->getNumElements(); ++i) { 213853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i)); 2139a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem if (Instruction *Insrt = dyn_cast<Instruction>(Vec)) { 2140a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem GatherSeq.insert(Insrt); 2141a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling CSEBlocks.insert(Insrt->getParent()); 2142a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 2143a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem // Add to our 'need-to-extract' list. 2144a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem if (ScalarToTreeEntry.count(VL[i])) { 2145a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem int Idx = ScalarToTreeEntry[VL[i]]; 2146a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem TreeEntry *E = &VectorizableTree[Idx]; 2147a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem // Find which lane we need to extract. 2148a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem int FoundLane = -1; 2149a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem for (unsigned Lane = 0, LE = VL.size(); Lane != LE; ++Lane) { 2150a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem // Is this the lane of the scalar that we are looking for ? 2151a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem if (E->Scalars[Lane] == VL[i]) { 2152a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem FoundLane = Lane; 2153a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem break; 2154a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem } 2155a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem } 2156a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem assert(FoundLane >= 0 && "Could not find the correct lane"); 2157a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem ExternalUses.push_back(ExternalUser(VL[i], Insrt, FoundLane)); 2158a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem } 2159a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem } 216053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 216153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 216253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem return Vec; 216353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem} 216453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 21656a804acc4ae77c014e4ef97c37f8e720ef360394Matt ArsenaultValue *BoUpSLP::alreadyVectorized(ArrayRef<Value *> VL) const { 21666a804acc4ae77c014e4ef97c37f8e720ef360394Matt Arsenault SmallDenseMap<Value*, int>::const_iterator Entry 21676a804acc4ae77c014e4ef97c37f8e720ef360394Matt Arsenault = ScalarToTreeEntry.find(VL[0]); 21686a804acc4ae77c014e4ef97c37f8e720ef360394Matt Arsenault if (Entry != ScalarToTreeEntry.end()) { 21696a804acc4ae77c014e4ef97c37f8e720ef360394Matt Arsenault int Idx = Entry->second; 21706a804acc4ae77c014e4ef97c37f8e720ef360394Matt Arsenault const TreeEntry *En = &VectorizableTree[Idx]; 217162657090de3a5731bf644437701ccd78c247119fNadav Rotem if (En->isSame(VL) && En->VectorizedValue) 217262657090de3a5731bf644437701ccd78c247119fNadav Rotem return En->VectorizedValue; 217362657090de3a5731bf644437701ccd78c247119fNadav Rotem } 2174dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return nullptr; 217562657090de3a5731bf644437701ccd78c247119fNadav Rotem} 217662657090de3a5731bf644437701ccd78c247119fNadav Rotem 2177369cc938d261de3295eb70d0738f54ef1a82806cNadav RotemValue *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) { 2178369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (ScalarToTreeEntry.count(VL[0])) { 2179369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem int Idx = ScalarToTreeEntry[VL[0]]; 2180369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem TreeEntry *E = &VectorizableTree[Idx]; 2181369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (E->isSame(VL)) 2182369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return vectorizeTree(E); 2183369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 218453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 218553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem Type *ScalarTy = VL[0]->getType(); 218653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) 218753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem ScalarTy = SI->getValueOperand()->getType(); 218853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); 218953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 2190369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return Gather(VL, VecTy); 2191369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 2192369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 2193369cc938d261de3295eb70d0738f54ef1a82806cNadav RotemValue *BoUpSLP::vectorizeTree(TreeEntry *E) { 2194adb412daa41aef94a9f724dfd1ade9f579bb3a84Benjamin Kramer IRBuilder<>::InsertPointGuard Guard(Builder); 219553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 2196369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (E->VectorizedValue) { 2197369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Diamond merged for " << *E->Scalars[0] << ".\n"); 2198369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return E->VectorizedValue; 219953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 220053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 22011b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault Instruction *VL0 = cast<Instruction>(E->Scalars[0]); 22021b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault Type *ScalarTy = VL0->getType(); 22031b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault if (StoreInst *SI = dyn_cast<StoreInst>(VL0)) 2204369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ScalarTy = SI->getValueOperand()->getType(); 2205369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem VectorType *VecTy = VectorType::get(ScalarTy, E->Scalars.size()); 220653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 2207369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (E->NeedToGather) { 22084b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault setInsertPointAfterBundle(E->Scalars); 2209369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return Gather(E->Scalars, VecTy); 2210369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 221137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 2212c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines unsigned Opcode = getSameOpcode(E->Scalars); 2213805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem 2214369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem switch (Opcode) { 2215369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::PHI: { 2216369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem PHINode *PH = dyn_cast<PHINode>(VL0); 2217d237e834a816399b7e1561dd4db2c501f5095712Justin Bogner Builder.SetInsertPoint(PH->getParent()->getFirstNonPHI()); 221879c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem Builder.SetCurrentDebugLocation(PH->getDebugLoc()); 2219369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues()); 2220369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem E->VectorizedValue = NewPhi; 2221369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 2222353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem // PHINodes may have multiple entries from the same block. We want to 2223353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem // visit every block once. 2224353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem SmallSet<BasicBlock*, 4> VisitedBBs; 2225353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem 2226369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) { 2227369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ValueList Operands; 2228369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem BasicBlock *IBB = PH->getIncomingBlock(i); 2229369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 223037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!VisitedBBs.insert(IBB).second) { 2231353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem NewPhi->addIncoming(NewPhi->getIncomingValueForBlock(IBB), IBB); 2232353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem continue; 2233353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem } 2234353476cbbb80881a754a90abc13851a884a4e26cNadav Rotem 2235369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Prepare the operand vector. 2236f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar for (Value *V : E->Scalars) 2237f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Operands.push_back(cast<PHINode>(V)->getIncomingValueForBlock(IBB)); 2238369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 2239369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Builder.SetInsertPoint(IBB->getTerminator()); 224079c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem Builder.SetCurrentDebugLocation(PH->getDebugLoc()); 2241369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *Vec = vectorizeTree(Operands); 2242369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem NewPhi->addIncoming(Vec, IBB); 2243369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 2244805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem 2245369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem assert(NewPhi->getNumIncomingValues() == PH->getNumIncomingValues() && 2246369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem "Invalid number of incoming values"); 2247369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return NewPhi; 2248805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem } 2249805e8a01fe3b527edbad914636d55fd61d81e51cNadav Rotem 2250369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::ExtractElement: { 2251de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (canReuseExtract(E->Scalars, Instruction::ExtractElement)) { 2252369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *V = VL0->getOperand(0); 2253369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem E->VectorizedValue = V; 2254369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return V; 2255369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 2256369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return Gather(E->Scalars, VecTy); 225753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 2258de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case Instruction::ExtractValue: { 2259de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (canReuseExtract(E->Scalars, Instruction::ExtractValue)) { 2260de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar LoadInst *LI = cast<LoadInst>(VL0->getOperand(0)); 2261de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Builder.SetInsertPoint(LI); 2262de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar PointerType *PtrTy = PointerType::get(VecTy, LI->getPointerAddressSpace()); 2263de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Value *Ptr = Builder.CreateBitCast(LI->getOperand(0), PtrTy); 2264de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar LoadInst *V = Builder.CreateAlignedLoad(Ptr, LI->getAlignment()); 2265de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar E->VectorizedValue = V; 2266de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return propagateMetadata(V, E->Scalars); 2267de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 2268de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return Gather(E->Scalars, VecTy); 2269de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 2270369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::ZExt: 2271369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::SExt: 2272369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FPToUI: 2273369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FPToSI: 2274369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FPExt: 2275369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::PtrToInt: 2276369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::IntToPtr: 2277369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::SIToFP: 2278369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::UIToFP: 2279369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Trunc: 2280369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FPTrunc: 2281369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::BitCast: { 2282369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ValueList INVL; 2283f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar for (Value *V : E->Scalars) 2284f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar INVL.push_back(cast<Instruction>(V)->getOperand(0)); 2285369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 22864b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault setInsertPointAfterBundle(E->Scalars); 228779c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem 2288369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *InVec = vectorizeTree(INVL); 228962657090de3a5731bf644437701ccd78c247119fNadav Rotem 229062657090de3a5731bf644437701ccd78c247119fNadav Rotem if (Value *V = alreadyVectorized(E->Scalars)) 229162657090de3a5731bf644437701ccd78c247119fNadav Rotem return V; 229262657090de3a5731bf644437701ccd78c247119fNadav Rotem 2293369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem CastInst *CI = dyn_cast<CastInst>(VL0); 2294369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy); 2295369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem E->VectorizedValue = V; 229637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ++NumVectorInstructions; 2297369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return V; 229853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 2299369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FCmp: 2300369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::ICmp: { 2301369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ValueList LHSV, RHSV; 2302f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar for (Value *V : E->Scalars) { 2303f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar LHSV.push_back(cast<Instruction>(V)->getOperand(0)); 2304f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar RHSV.push_back(cast<Instruction>(V)->getOperand(1)); 2305369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 230653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 23074b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault setInsertPointAfterBundle(E->Scalars); 230879c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem 2309369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *L = vectorizeTree(LHSV); 2310369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *R = vectorizeTree(RHSV); 231162657090de3a5731bf644437701ccd78c247119fNadav Rotem 231262657090de3a5731bf644437701ccd78c247119fNadav Rotem if (Value *V = alreadyVectorized(E->Scalars)) 231362657090de3a5731bf644437701ccd78c247119fNadav Rotem return V; 231453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 23150c7f116bb6950ef819323d855415b2f2b0aad987Pirama Arumuga Nainar CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate(); 231662657090de3a5731bf644437701ccd78c247119fNadav Rotem Value *V; 2317369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (Opcode == Instruction::FCmp) 2318369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem V = Builder.CreateFCmp(P0, L, R); 2319369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem else 2320369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem V = Builder.CreateICmp(P0, L, R); 232153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 2322369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem E->VectorizedValue = V; 232337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ++NumVectorInstructions; 2324369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return V; 232553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 2326369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Select: { 2327369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ValueList TrueVec, FalseVec, CondVec; 2328f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar for (Value *V : E->Scalars) { 2329f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar CondVec.push_back(cast<Instruction>(V)->getOperand(0)); 2330f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar TrueVec.push_back(cast<Instruction>(V)->getOperand(1)); 2331f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar FalseVec.push_back(cast<Instruction>(V)->getOperand(2)); 2332369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 233353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 23344b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault setInsertPointAfterBundle(E->Scalars); 233579c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem 2336369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *Cond = vectorizeTree(CondVec); 2337369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *True = vectorizeTree(TrueVec); 2338369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *False = vectorizeTree(FalseVec); 233962657090de3a5731bf644437701ccd78c247119fNadav Rotem 234062657090de3a5731bf644437701ccd78c247119fNadav Rotem if (Value *V = alreadyVectorized(E->Scalars)) 234162657090de3a5731bf644437701ccd78c247119fNadav Rotem return V; 234257aa3aad33b50583d5a82735777d0f0dc03ff122Matt Arsenault 2343369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *V = Builder.CreateSelect(Cond, True, False); 2344369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem E->VectorizedValue = V; 234537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ++NumVectorInstructions; 2346369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return V; 234753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 2348369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Add: 2349369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FAdd: 2350369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Sub: 2351369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FSub: 2352369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Mul: 2353369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FMul: 2354369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::UDiv: 2355369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::SDiv: 2356369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FDiv: 2357369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::URem: 2358369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::SRem: 2359369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::FRem: 2360369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Shl: 2361369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::LShr: 2362369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::AShr: 2363369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::And: 2364369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Or: 2365369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Xor: { 2366369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ValueList LHSVL, RHSVL; 2367af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) 2368af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer reorderInputsAccordingToOpcode(E->Scalars, LHSVL, RHSVL); 2369af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer else 2370f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar for (Value *V : E->Scalars) { 2371f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar LHSVL.push_back(cast<Instruction>(V)->getOperand(0)); 2372f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar RHSVL.push_back(cast<Instruction>(V)->getOperand(1)); 2373af57bdf7d673a3731fb887218e7a9ccd1576ab4fArnold Schwaighofer } 237453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 23754b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault setInsertPointAfterBundle(E->Scalars); 237679c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem 2377369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *LHS = vectorizeTree(LHSVL); 2378369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *RHS = vectorizeTree(RHSVL); 237953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 2380369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (LHS == RHS && isa<Instruction>(LHS)) { 2381369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem assert((VL0->getOperand(0) == VL0->getOperand(1)) && "Invalid order"); 2382369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 238353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 238462657090de3a5731bf644437701ccd78c247119fNadav Rotem if (Value *V = alreadyVectorized(E->Scalars)) 238562657090de3a5731bf644437701ccd78c247119fNadav Rotem return V; 238662657090de3a5731bf644437701ccd78c247119fNadav Rotem 2387369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem BinaryOperator *BinOp = cast<BinaryOperator>(VL0); 2388369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS, RHS); 2389369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem E->VectorizedValue = V; 239037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines propagateIRFlags(E->VectorizedValue, E->Scalars); 239137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ++NumVectorInstructions; 2392fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling 2393fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling if (Instruction *I = dyn_cast<Instruction>(V)) 2394fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling return propagateMetadata(I, E->Scalars); 2395fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling 2396369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return V; 2397369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 2398369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Load: { 2399369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Loads are inserted at the head of the tree because we don't want to 2400369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // sink them all the way down past store instructions. 24014b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault setInsertPointAfterBundle(E->Scalars); 240279c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem 2403369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem LoadInst *LI = cast<LoadInst>(VL0); 240437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Type *ScalarLoadTy = LI->getType(); 24059e93ba225f9f4d0cfab175b73d2a744d01104451Matt Arsenault unsigned AS = LI->getPointerAddressSpace(); 24069e93ba225f9f4d0cfab175b73d2a744d01104451Matt Arsenault 24079e93ba225f9f4d0cfab175b73d2a744d01104451Matt Arsenault Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(), 24089e93ba225f9f4d0cfab175b73d2a744d01104451Matt Arsenault VecTy->getPointerTo(AS)); 240937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 241037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // The pointer operand uses an in-tree scalar so we add the new BitCast to 241137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // ExternalUses list to make sure that an extract will be generated in the 241237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // future. 241337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (ScalarToTreeEntry.count(LI->getPointerOperand())) 241437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ExternalUses.push_back( 241537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ExternalUser(LI->getPointerOperand(), cast<User>(VecPtr), 0)); 241637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 2417369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem unsigned Alignment = LI->getAlignment(); 2418369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem LI = Builder.CreateLoad(VecPtr); 24194c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar if (!Alignment) { 2420de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Alignment = DL->getABITypeAlignment(ScalarLoadTy); 24214c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar } 2422369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem LI->setAlignment(Alignment); 2423369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem E->VectorizedValue = LI; 242437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ++NumVectorInstructions; 2425fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling return propagateMetadata(LI, E->Scalars); 2426369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 2427369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem case Instruction::Store: { 2428369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem StoreInst *SI = cast<StoreInst>(VL0); 2429369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem unsigned Alignment = SI->getAlignment(); 24309e93ba225f9f4d0cfab175b73d2a744d01104451Matt Arsenault unsigned AS = SI->getPointerAddressSpace(); 2431369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 2432369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ValueList ValueOp; 2433f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar for (Value *V : E->Scalars) 2434f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar ValueOp.push_back(cast<StoreInst>(V)->getValueOperand()); 2435369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 24364b272493b7deaa139484b16ee45ae87a46ceef57Matt Arsenault setInsertPointAfterBundle(E->Scalars); 243779c6bee7a93b9f5ff9f6d5bf5aa82dea3acf0f6aNadav Rotem 2438369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *VecValue = vectorizeTree(ValueOp); 24399e93ba225f9f4d0cfab175b73d2a744d01104451Matt Arsenault Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(), 24409e93ba225f9f4d0cfab175b73d2a744d01104451Matt Arsenault VecTy->getPointerTo(AS)); 2441369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem StoreInst *S = Builder.CreateStore(VecValue, VecPtr); 244237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 244337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // The pointer operand uses an in-tree scalar so we add the new BitCast to 244437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // ExternalUses list to make sure that an extract will be generated in the 244537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // future. 244637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (ScalarToTreeEntry.count(SI->getPointerOperand())) 244737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ExternalUses.push_back( 244837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ExternalUser(SI->getPointerOperand(), cast<User>(VecPtr), 0)); 244937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 24504c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar if (!Alignment) { 2451de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Alignment = DL->getABITypeAlignment(SI->getValueOperand()->getType()); 24524c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar } 2453369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem S->setAlignment(Alignment); 2454369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem E->VectorizedValue = S; 245537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ++NumVectorInstructions; 2456fc1f9531d3f9bf14b4b20b80f158317795d3d1d8Bill Wendling return propagateMetadata(S, E->Scalars); 2457369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 2458c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case Instruction::GetElementPtr: { 2459c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines setInsertPointAfterBundle(E->Scalars); 2460c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2461c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines ValueList Op0VL; 2462f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar for (Value *V : E->Scalars) 2463f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Op0VL.push_back(cast<GetElementPtrInst>(V)->getOperand(0)); 2464c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2465c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Value *Op0 = vectorizeTree(Op0VL); 2466c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2467c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines std::vector<Value *> OpVecs; 2468c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines for (int j = 1, e = cast<GetElementPtrInst>(VL0)->getNumOperands(); j < e; 2469c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines ++j) { 2470c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines ValueList OpVL; 2471f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar for (Value *V : E->Scalars) 2472f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar OpVL.push_back(cast<GetElementPtrInst>(V)->getOperand(j)); 2473c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2474c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Value *OpVec = vectorizeTree(OpVL); 2475c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines OpVecs.push_back(OpVec); 2476c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 2477c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 24784c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar Value *V = Builder.CreateGEP( 24794c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar cast<GetElementPtrInst>(VL0)->getSourceElementType(), Op0, OpVecs); 2480c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines E->VectorizedValue = V; 248137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ++NumVectorInstructions; 2482c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2483c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (Instruction *I = dyn_cast<Instruction>(V)) 2484c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return propagateMetadata(I, E->Scalars); 2485c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2486c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return V; 2487c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 248836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case Instruction::Call: { 248936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines CallInst *CI = cast<CallInst>(VL0); 249036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines setInsertPointAfterBundle(E->Scalars); 2491c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Function *FI; 2492c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Intrinsic::ID IID = Intrinsic::not_intrinsic; 249337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Value *ScalarArg = nullptr; 2494c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (CI && (FI = CI->getCalledFunction())) { 24956948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar IID = FI->getIntrinsicID(); 2496c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 249736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines std::vector<Value *> OpVecs; 249836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) { 249936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ValueList OpVL; 2500c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // ctlz,cttz and powi are special intrinsics whose second argument is 2501c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // a scalar. This argument should not be vectorized. 2502c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (hasVectorInstrinsicScalarOpd(IID, 1) && j == 1) { 2503c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines CallInst *CEI = cast<CallInst>(E->Scalars[0]); 250437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScalarArg = CEI->getArgOperand(j); 2505c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines OpVecs.push_back(CEI->getArgOperand(j)); 2506c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines continue; 2507c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 2508f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar for (Value *V : E->Scalars) { 2509f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar CallInst *CEI = cast<CallInst>(V); 251036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines OpVL.push_back(CEI->getArgOperand(j)); 251136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 251236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 251336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Value *OpVec = vectorizeTree(OpVL); 251436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << "SLP: OpVec[" << j << "]: " << *OpVec << "\n"); 251536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines OpVecs.push_back(OpVec); 251636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 251736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 251836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Module *M = F->getParent(); 2519de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); 252036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Type *Tys[] = { VectorType::get(CI->getType(), E->Scalars.size()) }; 252136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Function *CF = Intrinsic::getDeclaration(M, ID, Tys); 2522de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SmallVector<OperandBundleDef, 1> OpBundles; 2523de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar CI->getOperandBundlesAsDefs(OpBundles); 2524de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Value *V = Builder.CreateCall(CF, OpVecs, OpBundles); 252537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 252637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // The scalar argument uses an in-tree scalar so we add the new vectorized 252737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // call to ExternalUses list to make sure that an extract will be 252837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // generated in the future. 252937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (ScalarArg && ScalarToTreeEntry.count(ScalarArg)) 253037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ExternalUses.push_back(ExternalUser(ScalarArg, cast<User>(V), 0)); 253137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 253236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines E->VectorizedValue = V; 253337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ++NumVectorInstructions; 253436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return V; 253536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 2536c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case Instruction::ShuffleVector: { 2537c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines ValueList LHSVL, RHSVL; 2538ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines assert(isa<BinaryOperator>(VL0) && "Invalid Shuffle Vector Operand"); 2539ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines reorderAltShuffleOperands(E->Scalars, LHSVL, RHSVL); 2540c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines setInsertPointAfterBundle(E->Scalars); 2541c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2542c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Value *LHS = vectorizeTree(LHSVL); 2543c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Value *RHS = vectorizeTree(RHSVL); 2544c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2545c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (Value *V = alreadyVectorized(E->Scalars)) 2546c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return V; 2547c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2548c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // Create a vector of LHS op1 RHS 2549c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines BinaryOperator *BinOp0 = cast<BinaryOperator>(VL0); 2550c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Value *V0 = Builder.CreateBinOp(BinOp0->getOpcode(), LHS, RHS); 2551c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2552c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // Create a vector of LHS op2 RHS 2553c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Instruction *VL1 = cast<Instruction>(E->Scalars[1]); 2554c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines BinaryOperator *BinOp1 = cast<BinaryOperator>(VL1); 2555c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Value *V1 = Builder.CreateBinOp(BinOp1->getOpcode(), LHS, RHS); 2556c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 255737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Create shuffle to take alternate operations from the vector. 255837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Also, gather up odd and even scalar ops to propagate IR flags to 255937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // each vector operation. 256037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ValueList OddScalars, EvenScalars; 2561c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines unsigned e = E->Scalars.size(); 256237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SmallVector<Constant *, 8> Mask(e); 2563c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines for (unsigned i = 0; i < e; ++i) { 256437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (i & 1) { 2565c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Mask[i] = Builder.getInt32(e + i); 256637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines OddScalars.push_back(E->Scalars[i]); 256737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } else { 2568c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Mask[i] = Builder.getInt32(i); 256937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines EvenScalars.push_back(E->Scalars[i]); 257037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 2571c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 2572c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2573c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Value *ShuffleMask = ConstantVector::get(Mask); 257437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines propagateIRFlags(V0, EvenScalars); 257537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines propagateIRFlags(V1, OddScalars); 2576c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2577c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Value *V = Builder.CreateShuffleVector(V0, V1, ShuffleMask); 2578c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines E->VectorizedValue = V; 257937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ++NumVectorInstructions; 2580c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (Instruction *I = dyn_cast<Instruction>(V)) 2581c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return propagateMetadata(I, E->Scalars); 2582c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2583c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return V; 2584c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 2585369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem default: 2586369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem llvm_unreachable("unknown inst"); 258753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 2588dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return nullptr; 2589369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 259053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 2591a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold SchwaighoferValue *BoUpSLP::vectorizeTree() { 2592de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 259337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // All blocks must be scheduled before any instructions are inserted. 259437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (auto &BSIter : BlocksSchedules) { 259537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines scheduleBlock(BSIter.second.get()); 259637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 259737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 2598f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Builder.SetInsertPoint(&F->getEntryBlock().front()); 2599de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *VectorRoot = vectorizeTree(&VectorizableTree[0]); 2600de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 2601de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // If the vectorized tree can be rewritten in a smaller type, we truncate the 2602de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // vectorized root. InstCombine will then rewrite the entire expression. We 2603de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // sign extend the extracted values below. 2604de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *ScalarRoot = VectorizableTree[0].Scalars[0]; 2605de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (MinBWs.count(ScalarRoot)) { 2606de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (auto *I = dyn_cast<Instruction>(VectorRoot)) 2607de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Builder.SetInsertPoint(&*++BasicBlock::iterator(I)); 2608de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto BundleWidth = VectorizableTree[0].Scalars.size(); 2609de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot]); 2610de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *VecTy = VectorType::get(MinTy, BundleWidth); 2611de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *Trunc = Builder.CreateTrunc(VectorRoot, VecTy); 2612de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar VectorizableTree[0].VectorizedValue = Trunc; 2613de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 261453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 2615a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size() << " values .\n"); 2616a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 2617a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem // Extract all of the elements with the external uses. 2618de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (const auto &ExternalUse : ExternalUses) { 2619de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Value *Scalar = ExternalUse.Scalar; 2620de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar llvm::User *User = ExternalUse.User; 2621523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem 2622523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem // Skip users that we already RAUW. This happens when one instruction 2623523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem // has multiple uses of the same value. 262436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (std::find(Scalar->user_begin(), Scalar->user_end(), User) == 262536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Scalar->user_end()) 2626a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem continue; 2627a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem assert(ScalarToTreeEntry.count(Scalar) && "Invalid scalar"); 2628a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 2629a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem int Idx = ScalarToTreeEntry[Scalar]; 2630a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem TreeEntry *E = &VectorizableTree[Idx]; 2631a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem assert(!E->NeedToGather && "Extracting from a gather list"); 2632a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 2633a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem Value *Vec = E->VectorizedValue; 2634a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem assert(Vec && "Can't find vectorizable value"); 2635a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 2636de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Value *Lane = Builder.getInt32(ExternalUse.Lane); 2637a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem // Generate extracts for out-of-tree users. 2638a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem // Find the insertion point for the extractelement lane. 2639de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (auto *VecI = dyn_cast<Instruction>(Vec)) { 2640523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem if (PHINode *PH = dyn_cast<PHINode>(User)) { 2641523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem for (int i = 0, e = PH->getNumIncomingValues(); i != e; ++i) { 2642523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem if (PH->getIncomingValue(i) == Scalar) { 2643de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar TerminatorInst *IncomingTerminator = 2644de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar PH->getIncomingBlock(i)->getTerminator(); 2645de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isa<CatchSwitchInst>(IncomingTerminator)) { 2646de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Builder.SetInsertPoint(VecI->getParent(), 2647de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar std::next(VecI->getIterator())); 2648de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } else { 2649de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator()); 2650de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 2651f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem Value *Ex = Builder.CreateExtractElement(Vec, Lane); 2652de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (MinBWs.count(ScalarRoot)) 2653de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Ex = Builder.CreateSExt(Ex, Scalar->getType()); 2654a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling CSEBlocks.insert(PH->getIncomingBlock(i)); 2655f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem PH->setOperand(i, Ex); 2656523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem } 2657523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem } 2658523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem } else { 2659f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem Builder.SetInsertPoint(cast<Instruction>(User)); 2660f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem Value *Ex = Builder.CreateExtractElement(Vec, Lane); 2661de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (MinBWs.count(ScalarRoot)) 2662de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Ex = Builder.CreateSExt(Ex, Scalar->getType()); 2663a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling CSEBlocks.insert(cast<Instruction>(User)->getParent()); 2664f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem User->replaceUsesOfWith(Scalar, Ex); 2665523cd85b50c3081d9859fe41afadce13c43e1bc9Nadav Rotem } 2666a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem } else { 2667f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Builder.SetInsertPoint(&F->getEntryBlock().front()); 2668f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem Value *Ex = Builder.CreateExtractElement(Vec, Lane); 2669de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (MinBWs.count(ScalarRoot)) 2670de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Ex = Builder.CreateSExt(Ex, Scalar->getType()); 2671a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling CSEBlocks.insert(&F->getEntryBlock()); 2672f2855f88864cb00c774114b80b78f2388821b8caNadav Rotem User->replaceUsesOfWith(Scalar, Ex); 2673a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem } 2674a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 2675a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem DEBUG(dbgs() << "SLP: Replaced:" << *User << ".\n"); 2676a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem } 2677a8608b8dfd79756765862cf40aeab544cd10c188Nadav Rotem 2678369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // For each vectorized value: 2679de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (TreeEntry &EIdx : VectorizableTree) { 2680de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar TreeEntry *Entry = &EIdx; 268153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 2682369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // For each lane: 2683369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) { 2684369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *Scalar = Entry->Scalars[Lane]; 2685369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // No need to handle users of gathered values. 2686369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (Entry->NeedToGather) 2687369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem continue; 268853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 2689ace9ed50b549667eff8e19eb76f7714a3a6161aeNadav Rotem assert(Entry->VectorizedValue && "Can't find vectorizable value"); 2690ac2678684689448d1c88f644f3fa88aa4f93997cNadav Rotem 2691369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Type *Ty = Scalar->getType(); 2692369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (!Ty->isVoidTy()) { 269336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#ifndef NDEBUG 269436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (User *U : Scalar->users()) { 269536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n"); 2696a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 269736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines assert((ScalarToTreeEntry.count(U) || 2698dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // It is legal to replace users in the ignorelist by undef. 2699dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), U) != 2700dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines UserIgnoreList.end())) && 2701369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem "Replacing out-of-tree value with undef"); 2702369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 270336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#endif 2704369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Value *Undef = UndefValue::get(Ty); 2705369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Scalar->replaceAllUsesWith(Undef); 2706369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 2707369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n"); 2708ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines eraseInstruction(cast<Instruction>(Scalar)); 2709ac2678684689448d1c88f644f3fa88aa4f93997cNadav Rotem } 2710ac2678684689448d1c88f644f3fa88aa4f93997cNadav Rotem } 2711ac2678684689448d1c88f644f3fa88aa4f93997cNadav Rotem 2712c7ffbc019fdc6ae5265f1841eaabae34e301f59bNadav Rotem Builder.ClearInsertionPoint(); 2713a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 2714a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return VectorizableTree[0].VectorizedValue; 271553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem} 271653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 2717369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotemvoid BoUpSLP::optimizeGatherSequence() { 2718369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size() 2719369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem << " gather sequences instructions.\n"); 27206959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem // LICM InsertElementInst sequences. 2721de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (Instruction *it : GatherSeq) { 2722de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar InsertElementInst *Insert = dyn_cast<InsertElementInst>(it); 272353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 272453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem if (!Insert) 272553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem continue; 272653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 272753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem // Check if this block is inside a loop. 27286959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem Loop *L = LI->getLoopFor(Insert->getParent()); 272953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem if (!L) 27306959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem continue; 273153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 273253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem // Check if it has a preheader. 273353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem BasicBlock *PreHeader = L->getLoopPreheader(); 273453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem if (!PreHeader) 273529acf7e03af9b5524daa1e7523e0296cc766ff24Nadav Rotem continue; 273653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 273753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem // If the vector or the element that we insert into it are 273853a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem // instructions that are defined in this basic block then we can't 273953a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem // hoist this instruction. 274053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem Instruction *CurrVec = dyn_cast<Instruction>(Insert->getOperand(0)); 274153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem Instruction *NewElem = dyn_cast<Instruction>(Insert->getOperand(1)); 274253a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem if (CurrVec && L->contains(CurrVec)) 274353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem continue; 274453a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem if (NewElem && L->contains(NewElem)) 274553a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem continue; 274653a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 274753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem // We can hoist this instruction. Move it to the pre-header. 27486959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem Insert->moveBefore(PreHeader->getTerminator()); 27496959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem } 27506959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem 2751dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Make a list of all reachable blocks in our CSE queue. 2752dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines SmallVector<const DomTreeNode *, 8> CSEWorkList; 2753dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines CSEWorkList.reserve(CSEBlocks.size()); 2754dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines for (BasicBlock *BB : CSEBlocks) 2755dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (DomTreeNode *N = DT->getNode(BB)) { 2756dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines assert(DT->isReachableFromEntry(N)); 2757dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines CSEWorkList.push_back(N); 2758dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 2759dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 27600c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer // Sort blocks by domination. This ensures we visit a block after all blocks 27610c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer // dominating it are visited. 276236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines std::stable_sort(CSEWorkList.begin(), CSEWorkList.end(), 2763dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines [this](const DomTreeNode *A, const DomTreeNode *B) { 276436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return DT->properlyDominates(A, B); 276536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines }); 27660c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer 27676959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem // Perform O(N^2) search over the gather sequences and merge identical 27686959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem // instructions. TODO: We can further optimize this scan if we split the 27696959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem // instructions into different buckets based on the insert lane. 27700c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer SmallVector<Instruction *, 16> Visited; 2771dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines for (auto I = CSEWorkList.begin(), E = CSEWorkList.end(); I != E; ++I) { 277236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines assert((I == CSEWorkList.begin() || !DT->dominates(*I, *std::prev(I))) && 27730c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer "Worklist not sorted properly!"); 2774dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines BasicBlock *BB = (*I)->getBlock(); 27750c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer // For all instructions in blocks containing gather sequences: 27760c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) { 2777f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Instruction *In = &*it++; 2778a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In)) 27796959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem continue; 27806959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem 278129acf7e03af9b5524daa1e7523e0296cc766ff24Nadav Rotem // Check if we can replace this instruction with any of the 278229acf7e03af9b5524daa1e7523e0296cc766ff24Nadav Rotem // visited instructions. 2783de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (Instruction *v : Visited) { 2784de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (In->isIdenticalTo(v) && 2785de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DT->dominates(v->getParent(), In->getParent())) { 2786de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar In->replaceAllUsesWith(v); 2787ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines eraseInstruction(In); 2788dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines In = nullptr; 27896959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem break; 27906959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem } 27916959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem } 27920c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer if (In) { 27930c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer assert(std::find(Visited.begin(), Visited.end(), In) == Visited.end()); 27940c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer Visited.push_back(In); 27950c7ba3cef2d99bf15175303d5e2523fe898d009dBenjamin Kramer } 27966959f08f445ba8e0f7f37fc2dff4c4a51964f962Nadav Rotem } 279753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem } 2798a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling CSEBlocks.clear(); 2799a0d44fe4cd92c11466b82af4f5089af845a2eeb5Bill Wendling GatherSeq.clear(); 280053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem} 280153a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 280237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// Groups the instructions to a bundle (which is then a single scheduling entity) 280337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// and schedules instructions until the bundle gets ready. 280437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesbool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, 2805ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines BoUpSLP *SLP) { 280637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (isa<PHINode>(VL[0])) 280737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return true; 280837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 280937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Initialize the instruction bundle. 281037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Instruction *OldScheduleEnd = ScheduleEnd; 281137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *PrevInBundle = nullptr; 281237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *Bundle = nullptr; 281337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines bool ReSchedule = false; 281437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: bundle: " << *VL[0] << "\n"); 2815f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2816f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Make sure that the scheduling region contains all 2817f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // instructions of the bundle. 2818f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar for (Value *V : VL) { 2819f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (!extendSchedulingRegion(V)) 2820f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return false; 2821f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 2822f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 282337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (Value *V : VL) { 282437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *BundleMember = getScheduleData(V); 282537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(BundleMember && 282637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines "no ScheduleData for bundle member (maybe not in same basic block)"); 282737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (BundleMember->IsScheduled) { 282837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // A bundle member was scheduled as single instruction before and now 282937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // needs to be scheduled as part of the bundle. We just get rid of the 283037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // existing schedule. 283137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: reset schedule because " << *BundleMember 283237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines << " was already scheduled\n"); 283337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ReSchedule = true; 283437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 283537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(BundleMember->isSchedulingEntity() && 283637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines "bundle member already part of other bundle"); 283737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (PrevInBundle) { 283837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines PrevInBundle->NextInBundle = BundleMember; 283937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } else { 284037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Bundle = BundleMember; 284137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 284237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember->UnscheduledDepsInBundle = 0; 284337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Bundle->UnscheduledDepsInBundle += BundleMember->UnscheduledDeps; 284437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 284537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Group the instructions to a bundle. 284637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember->FirstInBundle = Bundle; 284737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines PrevInBundle = BundleMember; 284837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 284937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (ScheduleEnd != OldScheduleEnd) { 285037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // The scheduling region got new instructions at the lower end (or it is a 285137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // new region for the first bundle). This makes it necessary to 285237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // recalculate all dependencies. 285337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // It is seldom that this needs to be done a second time after adding the 285437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // initial bundle to the region. 285537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) { 285637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *SD = getScheduleData(I); 285737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SD->clearDependencies(); 285837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 285937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ReSchedule = true; 286037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 286137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (ReSchedule) { 286237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines resetSchedule(); 286337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines initialFillReadyList(ReadyInsts); 286437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 286537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 286637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle << " in block " 286737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines << BB->getName() << "\n"); 286837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 2869ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines calculateDependencies(Bundle, true, SLP); 287037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 287137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Now try to schedule the new bundle. As soon as the bundle is "ready" it 287237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // means that there are no cyclic dependencies and we can schedule it. 287337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Note that's important that we don't "schedule" the bundle yet (see 287437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // cancelScheduling). 287537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines while (!Bundle->isReady() && !ReadyInsts.empty()) { 287637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 287737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *pickedSD = ReadyInsts.back(); 287837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ReadyInsts.pop_back(); 287937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 288037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (pickedSD->isSchedulingEntity() && pickedSD->isReady()) { 288137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines schedule(pickedSD, ReadyInsts); 288237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 288337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 2884f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (!Bundle->isReady()) { 2885f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar cancelScheduling(VL); 2886f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return false; 2887f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 2888f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return true; 288937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 289037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 289137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesvoid BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL) { 289237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (isa<PHINode>(VL[0])) 289337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return; 289437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 289537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *Bundle = getScheduleData(VL[0]); 289637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: cancel scheduling of " << *Bundle << "\n"); 289737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(!Bundle->IsScheduled && 289837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines "Can't cancel bundle which is already scheduled"); 289937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(Bundle->isSchedulingEntity() && Bundle->isPartOfBundle() && 290037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines "tried to unbundle something which is not a bundle"); 290137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 290237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Un-bundle: make single instructions out of the bundle. 290337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *BundleMember = Bundle; 290437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines while (BundleMember) { 290537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(BundleMember->FirstInBundle == Bundle && "corrupt bundle links"); 290637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember->FirstInBundle = BundleMember; 290737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *Next = BundleMember->NextInBundle; 290837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember->NextInBundle = nullptr; 290937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember->UnscheduledDepsInBundle = BundleMember->UnscheduledDeps; 291037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (BundleMember->UnscheduledDepsInBundle == 0) { 291137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ReadyInsts.insert(BundleMember); 291237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 291337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember = Next; 291437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 291537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 291637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 2917f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarbool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) { 291837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (getScheduleData(V)) 2919f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return true; 292037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Instruction *I = dyn_cast<Instruction>(V); 292137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(I && "bundle member must be an instruction"); 292237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(!isa<PHINode>(I) && "phi nodes don't need to be scheduled"); 292337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!ScheduleStart) { 292437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // It's the first instruction in the new region. 292537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines initScheduleData(I, I->getNextNode(), nullptr, nullptr); 292637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleStart = I; 292737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleEnd = I->getNextNode(); 292837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(ScheduleEnd && "tried to vectorize a TerminatorInst?"); 292937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: initialize schedule region to " << *I << "\n"); 2930f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return true; 293137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 293237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Search up and down at the same time, because we don't know if the new 293337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // instruction is above or below the existing scheduling region. 2934f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar BasicBlock::reverse_iterator UpIter(ScheduleStart->getIterator()); 293537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BasicBlock::reverse_iterator UpperEnd = BB->rend(); 293637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BasicBlock::iterator DownIter(ScheduleEnd); 293737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BasicBlock::iterator LowerEnd = BB->end(); 293837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (;;) { 2939f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (++ScheduleRegionSize > ScheduleRegionSizeLimit) { 2940f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar DEBUG(dbgs() << "SLP: exceeded schedule region size limit\n"); 2941f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return false; 2942f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 2943f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 294437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (UpIter != UpperEnd) { 294537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (&*UpIter == I) { 294637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines initScheduleData(I, ScheduleStart, nullptr, FirstLoadStoreInRegion); 294737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleStart = I; 294837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: extend schedule region start to " << *I << "\n"); 2949f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return true; 295037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 295137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines UpIter++; 295237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 295337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (DownIter != LowerEnd) { 295437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (&*DownIter == I) { 295537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines initScheduleData(ScheduleEnd, I->getNextNode(), LastLoadStoreInRegion, 295637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines nullptr); 295737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleEnd = I->getNextNode(); 295837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(ScheduleEnd && "tried to vectorize a TerminatorInst?"); 295937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: extend schedule region end to " << *I << "\n"); 2960f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return true; 296137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 296237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DownIter++; 296337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 296437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert((UpIter != UpperEnd || DownIter != LowerEnd) && 296537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines "instruction not found in block"); 296637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 2967f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return true; 296837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 296937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 297037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesvoid BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI, 297137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Instruction *ToI, 297237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *PrevLoadStore, 297337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *NextLoadStore) { 297437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *CurrentLoadStore = PrevLoadStore; 297537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (Instruction *I = FromI; I != ToI; I = I->getNextNode()) { 297637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *SD = ScheduleDataMap[I]; 297737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!SD) { 297837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Allocate a new ScheduleData for the instruction. 297937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (ChunkPos >= ChunkSize) { 298037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleDataChunks.push_back( 298137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines llvm::make_unique<ScheduleData[]>(ChunkSize)); 298237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ChunkPos = 0; 298337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 298437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SD = &(ScheduleDataChunks.back()[ChunkPos++]); 298537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleDataMap[I] = SD; 298637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SD->Inst = I; 298737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 298837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(!isInSchedulingRegion(SD) && 298937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines "new ScheduleData already in scheduling region"); 299037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SD->init(SchedulingRegionID); 299137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 299237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (I->mayReadOrWriteMemory()) { 299337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Update the linked list of memory accessing instructions. 299437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (CurrentLoadStore) { 299537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines CurrentLoadStore->NextLoadStore = SD; 299637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } else { 299737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines FirstLoadStoreInRegion = SD; 299837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 299937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines CurrentLoadStore = SD; 300037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 300137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 300237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (NextLoadStore) { 300337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (CurrentLoadStore) 300437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines CurrentLoadStore->NextLoadStore = NextLoadStore; 300537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } else { 300637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines LastLoadStoreInRegion = CurrentLoadStore; 300737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 300837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 300937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 301037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesvoid BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD, 301137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines bool InsertInReadyList, 3012ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines BoUpSLP *SLP) { 301337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(SD->isSchedulingEntity()); 301437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 301537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SmallVector<ScheduleData *, 10> WorkList; 301637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines WorkList.push_back(SD); 301737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 301837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines while (!WorkList.empty()) { 301937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *SD = WorkList.back(); 302037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines WorkList.pop_back(); 302137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 302237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *BundleMember = SD; 302337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines while (BundleMember) { 302437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(isInSchedulingRegion(BundleMember)); 302537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!BundleMember->hasValidDependencies()) { 302637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 302737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: update deps of " << *BundleMember << "\n"); 302837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember->Dependencies = 0; 302937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember->resetUnscheduledDeps(); 303037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 303137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Handle def-use chain dependencies. 303237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (User *U : BundleMember->Inst->users()) { 303337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (isa<Instruction>(U)) { 303437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *UseSD = getScheduleData(U); 303537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) { 303637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember->Dependencies++; 303737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *DestBundle = UseSD->FirstInBundle; 303837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!DestBundle->IsScheduled) { 303937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember->incrementUnscheduledDeps(1); 304037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 304137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!DestBundle->hasValidDependencies()) { 304237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines WorkList.push_back(DestBundle); 304337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 304437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 304537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } else { 304637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // I'm not sure if this can ever happen. But we need to be safe. 3047f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // This lets the instruction/bundle never be scheduled and 3048f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // eventually disable vectorization. 304937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember->Dependencies++; 305037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember->incrementUnscheduledDeps(1); 305137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 305237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 305337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 305437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Handle the memory dependencies. 305537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *DepDest = BundleMember->NextLoadStore; 305637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (DepDest) { 3057ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Instruction *SrcInst = BundleMember->Inst; 3058f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MemoryLocation SrcLoc = getLocation(SrcInst, SLP->AA); 305937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines bool SrcMayWrite = BundleMember->Inst->mayWriteToMemory(); 3060ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines unsigned numAliased = 0; 3061ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines unsigned DistToSrc = 1; 306237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 306337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines while (DepDest) { 306437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(isInSchedulingRegion(DepDest)); 3065ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 3066ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // We have two limits to reduce the complexity: 3067ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // 1) AliasedCheckLimit: It's a small limit to reduce calls to 3068ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // SLP->isAliased (which is the expensive part in this loop). 3069ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // 2) MaxMemDepDistance: It's for very large blocks and it aborts 3070ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // the whole loop (even if the loop is fast, it's quadratic). 3071ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // It's important for the loop break condition (see below) to 3072ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // check this limit even between two read-only instructions. 3073ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (DistToSrc >= MaxMemDepDistance || 3074ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines ((SrcMayWrite || DepDest->Inst->mayWriteToMemory()) && 3075ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines (numAliased >= AliasedCheckLimit || 3076ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines SLP->isAliased(SrcLoc, SrcInst, DepDest->Inst)))) { 3077ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 3078ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // We increment the counter only if the locations are aliased 3079ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // (instead of counting all alias checks). This gives a better 3080ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // balance between reduced runtime and accurate dependencies. 3081ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines numAliased++; 3082ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 3083ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines DepDest->MemoryDependencies.push_back(BundleMember); 3084ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines BundleMember->Dependencies++; 3085ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines ScheduleData *DestBundle = DepDest->FirstInBundle; 3086ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (!DestBundle->IsScheduled) { 3087ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines BundleMember->incrementUnscheduledDeps(1); 3088ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 3089ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (!DestBundle->hasValidDependencies()) { 3090ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines WorkList.push_back(DestBundle); 309137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 309237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 309337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DepDest = DepDest->NextLoadStore; 3094ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 3095ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Example, explaining the loop break condition: Let's assume our 3096ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // starting instruction is i0 and MaxMemDepDistance = 3. 3097ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // 3098ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // +--------v--v--v 3099ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // i0,i1,i2,i3,i4,i5,i6,i7,i8 3100ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // +--------^--^--^ 3101ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // 3102ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // MaxMemDepDistance let us stop alias-checking at i3 and we add 3103ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // dependencies from i0 to i3,i4,.. (even if they are not aliased). 3104ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Previously we already added dependencies from i3 to i6,i7,i8 3105ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // (because of MaxMemDepDistance). As we added a dependency from 3106ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // i0 to i3, we have transitive dependencies from i0 to i6,i7,i8 3107ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // and we can abort this loop at i6. 3108ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (DistToSrc >= 2 * MaxMemDepDistance) 3109ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines break; 3110ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines DistToSrc++; 311137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 311237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 311337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 311437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember = BundleMember->NextInBundle; 311537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 311637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (InsertInReadyList && SD->isReady()) { 311737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ReadyInsts.push_back(SD); 311837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: gets ready on update: " << *SD->Inst << "\n"); 311937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 312037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 312137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 312237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 312337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesvoid BoUpSLP::BlockScheduling::resetSchedule() { 312437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(ScheduleStart && 312537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines "tried to reset schedule on block which has not been scheduled"); 312637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (Instruction *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) { 312737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *SD = getScheduleData(I); 312837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(isInSchedulingRegion(SD)); 312937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SD->IsScheduled = false; 313037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SD->resetUnscheduledDeps(); 313137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 313237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ReadyInsts.clear(); 313337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 313437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 313537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesvoid BoUpSLP::scheduleBlock(BlockScheduling *BS) { 3136de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 313737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!BS->ScheduleStart) 313837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return; 3139de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 314037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: schedule block " << BS->BB->getName() << "\n"); 314137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 314237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS->resetSchedule(); 314337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 314437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // For the real scheduling we use a more sophisticated ready-list: it is 314537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // sorted by the original instruction location. This lets the final schedule 314637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // be as close as possible to the original instruction order. 314737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines struct ScheduleDataCompare { 314837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines bool operator()(ScheduleData *SD1, ScheduleData *SD2) { 314937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return SD2->SchedulingPriority < SD1->SchedulingPriority; 315037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 315137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines }; 315237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines std::set<ScheduleData *, ScheduleDataCompare> ReadyInsts; 315337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 3154f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Ensure that all dependency data is updated and fill the ready-list with 315537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // initial instructions. 315637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int Idx = 0; 315737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int NumToSchedule = 0; 315837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (auto *I = BS->ScheduleStart; I != BS->ScheduleEnd; 315937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines I = I->getNextNode()) { 316037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *SD = BS->getScheduleData(I); 316137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert( 316237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SD->isPartOfBundle() == (ScalarToTreeEntry.count(SD->Inst) != 0) && 316337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines "scheduler and vectorizer have different opinion on what is a bundle"); 316437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SD->FirstInBundle->SchedulingPriority = Idx++; 316537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (SD->isSchedulingEntity()) { 3166ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines BS->calculateDependencies(SD, false, this); 316737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines NumToSchedule++; 316837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 316937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 317037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS->initialFillReadyList(ReadyInsts); 317137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 317237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Instruction *LastScheduledInst = BS->ScheduleEnd; 317337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 317437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Do the "real" scheduling. 317537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines while (!ReadyInsts.empty()) { 317637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *picked = *ReadyInsts.begin(); 317737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ReadyInsts.erase(ReadyInsts.begin()); 317837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 317937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Move the scheduled instruction(s) to their dedicated places, if not 318037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // there yet. 318137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ScheduleData *BundleMember = picked; 318237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines while (BundleMember) { 318337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Instruction *pickedInst = BundleMember->Inst; 318437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (LastScheduledInst->getNextNode() != pickedInst) { 318537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS->BB->getInstList().remove(pickedInst); 3186f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar BS->BB->getInstList().insert(LastScheduledInst->getIterator(), 3187f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar pickedInst); 318837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 318937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines LastScheduledInst = pickedInst; 319037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BundleMember = BundleMember->NextInBundle; 319137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 319237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 319337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS->schedule(picked, ReadyInsts); 319437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines NumToSchedule--; 319537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 319637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(NumToSchedule == 0 && "could not schedule all instructions"); 319737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 319837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Avoid duplicate scheduling of the block. 319937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BS->ScheduleStart = nullptr; 320037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 320137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 3202de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarunsigned BoUpSLP::getVectorElementSize(Value *V) { 3203de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // If V is a store, just return the width of the stored value without 3204de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // traversing the expression tree. This is the common case. 3205de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (auto *Store = dyn_cast<StoreInst>(V)) 3206de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return DL->getTypeSizeInBits(Store->getValueOperand()->getType()); 3207de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3208de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // If V is not a store, we can traverse the expression tree to find loads 3209de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // that feed it. The type of the loaded value may indicate a more suitable 3210de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // width than V's type. We want to base the vector element size on the width 3211de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // of memory operations where possible. 3212de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SmallVector<Instruction *, 16> Worklist; 3213de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SmallPtrSet<Instruction *, 16> Visited; 3214de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (auto *I = dyn_cast<Instruction>(V)) 3215de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Worklist.push_back(I); 3216de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3217de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Traverse the expression tree in bottom-up order looking for loads. If we 3218de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // encounter an instruciton we don't yet handle, we give up. 3219de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto MaxWidth = 0u; 3220de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto FoundUnknownInst = false; 3221de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar while (!Worklist.empty() && !FoundUnknownInst) { 3222de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *I = Worklist.pop_back_val(); 3223de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Visited.insert(I); 3224de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3225de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // We should only be looking at scalar instructions here. If the current 3226de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // instruction has a vector type, give up. 3227de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *Ty = I->getType(); 3228de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isa<VectorType>(Ty)) 3229de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar FoundUnknownInst = true; 3230de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3231de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // If the current instruction is a load, update MaxWidth to reflect the 3232de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // width of the loaded value. 3233de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar else if (isa<LoadInst>(I)) 3234de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MaxWidth = std::max<unsigned>(MaxWidth, DL->getTypeSizeInBits(Ty)); 3235de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3236de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Otherwise, we need to visit the operands of the instruction. We only 3237de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // handle the interesting cases from buildTree here. If an operand is an 3238de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // instruction we haven't yet visited, we add it to the worklist. 3239de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar else if (isa<PHINode>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) || 3240de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar isa<CmpInst>(I) || isa<SelectInst>(I) || isa<BinaryOperator>(I)) { 3241de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (Use &U : I->operands()) 3242de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (auto *J = dyn_cast<Instruction>(U.get())) 3243de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!Visited.count(J)) 3244de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Worklist.push_back(J); 3245de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 32468383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 3247de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // If we don't yet handle the instruction, give up. 3248de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar else 3249de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar FoundUnknownInst = true; 3250de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 32518383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 3252de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // If we didn't encounter a memory access in the expression tree, or if we 3253de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // gave up for some reason, just return the width of V. 3254de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!MaxWidth || FoundUnknownInst) 3255de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return DL->getTypeSizeInBits(V->getType()); 3256de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3257de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Otherwise, return the maximum width we found. 3258de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return MaxWidth; 3259de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 3260de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3261de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar// Determine if a value V in a vectorizable expression Expr can be demoted to a 3262de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar// smaller type with a truncation. We collect the values that will be demoted 3263de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar// in ToDemote and additional roots that require investigating in Roots. 3264de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarstatic bool collectValuesToDemote(Value *V, SmallPtrSetImpl<Value *> &Expr, 3265de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SmallVectorImpl<Value *> &ToDemote, 3266de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SmallVectorImpl<Value *> &Roots) { 3267de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3268de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // We can always demote constants. 3269de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isa<Constant>(V)) { 3270de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar ToDemote.push_back(V); 3271de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return true; 32728383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem } 32738383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 3274de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // If the value is not an instruction in the expression with only one use, it 3275de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // cannot be demoted. 3276de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *I = dyn_cast<Instruction>(V); 3277de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!I || !I->hasOneUse() || !Expr.count(I)) 3278de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 3279e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem 3280de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar switch (I->getOpcode()) { 3281de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3282de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // We can always demote truncations and extensions. Since truncations can 3283de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // seed additional demotion, we save the truncated value. 3284de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case Instruction::Trunc: 3285de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Roots.push_back(I->getOperand(0)); 3286de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case Instruction::ZExt: 3287de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case Instruction::SExt: 3288de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar break; 3289de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3290de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // We can demote certain binary operations if we can demote both of their 3291de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // operands. 3292de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case Instruction::Add: 3293de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case Instruction::Sub: 3294de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case Instruction::Mul: 3295de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case Instruction::And: 3296de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case Instruction::Or: 3297de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case Instruction::Xor: 3298de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!collectValuesToDemote(I->getOperand(0), Expr, ToDemote, Roots) || 3299de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar !collectValuesToDemote(I->getOperand(1), Expr, ToDemote, Roots)) 330036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return false; 3301de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar break; 330236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 3303de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // We can demote selects if we can demote their true and false values. 3304de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case Instruction::Select: { 3305de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SelectInst *SI = cast<SelectInst>(I); 3306de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!collectValuesToDemote(SI->getTrueValue(), Expr, ToDemote, Roots) || 3307de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar !collectValuesToDemote(SI->getFalseValue(), Expr, ToDemote, Roots)) 3308d072d1b2a36f6c6e1b4b513dc69e91b61ad8c21aRobert Lytton return false; 3309de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar break; 3310de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 3311d072d1b2a36f6c6e1b4b513dc69e91b61ad8c21aRobert Lytton 3312de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // We can demote phis if we can demote all their incoming operands. Note that 3313de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // we don't need to worry about cycles since we ensure single use above. 3314de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case Instruction::PHI: { 3315de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar PHINode *PN = cast<PHINode>(I); 3316de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (Value *IncValue : PN->incoming_values()) 3317de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!collectValuesToDemote(IncValue, Expr, ToDemote, Roots)) 3318de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 3319de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar break; 3320de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 3321f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 3322de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Otherwise, conservatively give up. 3323de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar default: 3324de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 3325de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 33263202f6cdb9193fe5365462118f499f6e164a1738Nadav Rotem 3327de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Record the value that we can demote. 3328de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar ToDemote.push_back(V); 3329de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return true; 3330de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 333109ec4b21648700f9d4ef5bc90d732f90f32c930cNadav Rotem 3332de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarvoid BoUpSLP::computeMinimumValueSizes() { 3333de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // If there are no external uses, the expression tree must be rooted by a 3334de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // store. We can't demote in-memory values, so there is nothing to do here. 3335de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (ExternalUses.empty()) 3336de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return; 3337ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 3338de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // We only attempt to truncate integer expressions. 3339de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto &TreeRoot = VectorizableTree[0].Scalars; 3340de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *TreeRootIT = dyn_cast<IntegerType>(TreeRoot[0]->getType()); 3341de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!TreeRootIT) 3342de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return; 334353a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 3344de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // If the expression is not rooted by a store, these roots should have 3345de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // external uses. We will rely on InstCombine to rewrite the expression in 3346de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // the narrower type. However, InstCombine only rewrites single-use values. 3347de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // This means that if a tree entry other than a root is used externally, it 3348de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // must have multiple uses and InstCombine will not rewrite it. The code 3349de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // below ensures that only the roots are used externally. 3350de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SmallPtrSet<Value *, 32> Expr(TreeRoot.begin(), TreeRoot.end()); 3351de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (auto &EU : ExternalUses) 3352de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!Expr.erase(EU.Scalar)) 3353de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return; 3354de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!Expr.empty()) 3355de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return; 33566611eaa32f7941dd50a3ffe608f3f4a7665dbe91Nadav Rotem 3357de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Collect the scalar values of the vectorizable expression. We will use this 3358de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // context to determine which values can be demoted. If we see a truncation, 3359de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // we mark it as seeding another demotion. 3360de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (auto &Entry : VectorizableTree) 3361de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Expr.insert(Entry.Scalars.begin(), Entry.Scalars.end()); 3362e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem 3363de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Ensure the roots of the vectorizable tree don't form a cycle. They must 3364de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // have a single external user that is not in the vectorizable tree. 3365de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (auto *Root : TreeRoot) 3366de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!Root->hasOneUse() || Expr.count(*Root->user_begin())) 3367de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return; 3368de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3369de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Conservatively determine if we can actually truncate the roots of the 3370de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // expression. Collect the values that can be demoted in ToDemote and 3371de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // additional roots that require investigating in Roots. 3372de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SmallVector<Value *, 32> ToDemote; 3373de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SmallVector<Value *, 4> Roots; 3374de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (auto *Root : TreeRoot) 3375de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!collectValuesToDemote(Root, Expr, ToDemote, Roots)) 3376de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return; 3377de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3378de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // The maximum bit width required to represent all the values that can be 3379de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // demoted without loss of precision. It would be safe to truncate the roots 3380de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // of the expression to this width. 3381de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto MaxBitWidth = 8u; 3382de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3383de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // We first check if all the bits of the roots are demanded. If they're not, 3384de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // we can truncate the roots to this narrower type. 3385de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (auto *Root : TreeRoot) { 3386de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto Mask = DB->getDemandedBits(cast<Instruction>(Root)); 3387de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MaxBitWidth = std::max<unsigned>( 3388de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Mask.getBitWidth() - Mask.countLeadingZeros(), MaxBitWidth); 3389de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 3390de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3391de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // If all the bits of the roots are demanded, we can try a little harder to 3392de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // compute a narrower type. This can happen, for example, if the roots are 3393de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // getelementptr indices. InstCombine promotes these indices to the pointer 3394de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // width. Thus, all their bits are technically demanded even though the 3395de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // address computation might be vectorized in a smaller type. 3396de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // 3397de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // We start by looking at each entry that can be demoted. We compute the 3398de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // maximum bit width required to store the scalar by using ValueTracking to 3399de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // compute the number of high-order bits we can truncate. 3400de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (MaxBitWidth == DL->getTypeSizeInBits(TreeRoot[0]->getType())) { 3401de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MaxBitWidth = 8u; 3402de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (auto *Scalar : ToDemote) { 3403de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto NumSignBits = ComputeNumSignBits(Scalar, *DL, 0, AC, 0, DT); 3404de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto NumTypeBits = DL->getTypeSizeInBits(Scalar->getType()); 3405de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MaxBitWidth = std::max<unsigned>(NumTypeBits - NumSignBits, MaxBitWidth); 3406e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem } 3407de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 3408de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3409de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Round MaxBitWidth up to the next power-of-two. 3410de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!isPowerOf2_64(MaxBitWidth)) 3411de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MaxBitWidth = NextPowerOf2(MaxBitWidth); 3412de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3413de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // If the maximum bit width we compute is less than the with of the roots' 3414de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // type, we can proceed with the narrowing. Otherwise, do nothing. 3415de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (MaxBitWidth >= TreeRootIT->getBitWidth()) 3416de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return; 3417de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3418de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // If we can truncate the root, we must collect additional values that might 3419de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // be demoted as a result. That is, those seeded by truncations we will 3420de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // modify. 3421de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar while (!Roots.empty()) 3422de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar collectValuesToDemote(Roots.pop_back_val(), Expr, ToDemote, Roots); 3423de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3424de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Finally, map the values we can demote to the maximum bit with we computed. 3425de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (auto *Scalar : ToDemote) 3426de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MinBWs[Scalar] = MaxBitWidth; 3427de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 3428de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3429de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarnamespace { 3430de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar/// The SLPVectorizer Pass. 3431de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarstruct SLPVectorizer : public FunctionPass { 3432de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SLPVectorizerPass Impl; 3433de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3434de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar /// Pass identification, replacement for typeid 3435de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar static char ID; 3436de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3437de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar explicit SLPVectorizer() : FunctionPass(ID) { 3438de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar initializeSLPVectorizerPass(*PassRegistry::getPassRegistry()); 3439de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 3440de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3441de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3442de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar bool doInitialization(Module &M) override { 3443de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 3444de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 3445de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3446de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar bool runOnFunction(Function &F) override { 3447de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (skipFunction(F)) 3448de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 3449de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3450de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 3451de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 3452de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 3453de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *TLI = TLIP ? &TLIP->getTLI() : nullptr; 3454de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 3455de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 3456de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 3457de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 3458de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *DB = &getAnalysis<DemandedBitsWrapperPass>().getDemandedBits(); 3459de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3460de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return Impl.runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB); 3461e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem } 3462e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem 346336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines void getAnalysisUsage(AnalysisUsage &AU) const override { 3464e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem FunctionPass::getAnalysisUsage(AU); 3465ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines AU.addRequired<AssumptionCacheTracker>(); 3466f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar AU.addRequired<ScalarEvolutionWrapperPass>(); 3467f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar AU.addRequired<AAResultsWrapperPass>(); 3468ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines AU.addRequired<TargetTransformInfoWrapperPass>(); 3469ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines AU.addRequired<LoopInfoWrapperPass>(); 347036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines AU.addRequired<DominatorTreeWrapperPass>(); 3471de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar AU.addRequired<DemandedBitsWrapperPass>(); 3472ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines AU.addPreserved<LoopInfoWrapperPass>(); 347336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines AU.addPreserved<DominatorTreeWrapperPass>(); 3474f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar AU.addPreserved<AAResultsWrapperPass>(); 3475f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar AU.addPreserved<GlobalsAAWrapperPass>(); 3476d4a9ebc7341a1ed066fcdff8e7e4e9cbf1bc4368Nadav Rotem AU.setPreservesCFG(); 3477e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem } 3478de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar}; 3479de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} // end anonymous namespace 3480e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem 3481de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga NainarPreservedAnalyses SLPVectorizerPass::run(Function &F, FunctionAnalysisManager &AM) { 3482de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F); 3483de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *TTI = &AM.getResult<TargetIRAnalysis>(F); 3484de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *TLI = AM.getCachedResult<TargetLibraryAnalysis>(F); 3485de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *AA = &AM.getResult<AAManager>(F); 3486de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *LI = &AM.getResult<LoopAnalysis>(F); 3487de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); 3488de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *AC = &AM.getResult<AssumptionAnalysis>(F); 3489de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *DB = &AM.getResult<DemandedBitsAnalysis>(F); 3490de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3491de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar bool Changed = runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB); 3492de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!Changed) 3493de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return PreservedAnalyses::all(); 3494de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar PreservedAnalyses PA; 3495de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar PA.preserve<LoopAnalysis>(); 3496de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar PA.preserve<DominatorTreeAnalysis>(); 3497de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar PA.preserve<AAManager>(); 3498de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar PA.preserve<GlobalsAA>(); 3499de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return PA; 3500de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 35018383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 3502de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, 3503de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar TargetTransformInfo *TTI_, 3504de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar TargetLibraryInfo *TLI_, AliasAnalysis *AA_, 3505de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar LoopInfo *LI_, DominatorTree *DT_, 3506de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar AssumptionCache *AC_, DemandedBits *DB_) { 3507de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SE = SE_; 3508de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar TTI = TTI_; 3509de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar TLI = TLI_; 3510de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar AA = AA_; 3511de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar LI = LI_; 3512de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DT = DT_; 3513de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar AC = AC_; 3514de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DB = DB_; 3515de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DL = &F.getParent()->getDataLayout(); 3516de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3517de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Stores.clear(); 3518de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar GEPs.clear(); 3519de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar bool Changed = false; 35208383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 3521de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // If the target claims to have no vector registers don't attempt 3522de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // vectorization. 3523de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!TTI->getNumberOfRegisters(true)) 3524de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 35258383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 3526de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Don't vectorize when the attribute NoImplicitFloat is used. 3527de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (F.hasFnAttribute(Attribute::NoImplicitFloat)) 3528de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 3529444e33e8987110c6669bc2d9b8efd768bb17faa1Nadav Rotem 3530de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n"); 35318383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 3532de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Use the bottom up slp vectorizer to construct chains that start with 3533de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // store instructions. 3534de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC, DB, DL); 3535d69d9f20bc3acee0fc233853745c1de015b541f2Nadav Rotem 3536de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // A general note: the vectorizer must use BoUpSLP::eraseInstruction() to 3537de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // delete instructions. 3538369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3539de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Scan the blocks in the function in post order. 3540de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (auto BB : post_order(&F.getEntryBlock())) { 3541de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar collectSeedInstructions(BB); 3542e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem 3543de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Vectorize trees that end at stores. 3544de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!Stores.empty()) { 3545de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DEBUG(dbgs() << "SLP: Found stores for " << Stores.size() 3546de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar << " underlying objects.\n"); 3547de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Changed |= vectorizeStoreChains(R); 3548de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 3549de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3550de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Vectorize trees that end at reductions. 3551de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Changed |= vectorizeChainsInBlock(BB, R); 3552de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3553de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Vectorize the index computations of getelementptr instructions. This 3554de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // is primarily intended to catch gather-like idioms ending at 3555de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // non-consecutive loads. 3556de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!GEPs.empty()) { 3557de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DEBUG(dbgs() << "SLP: Found GEPs for " << GEPs.size() 3558de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar << " underlying objects.\n"); 3559de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Changed |= vectorizeGEPIndices(BB, R); 3560de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 3561de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 3562de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3563de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (Changed) { 3564de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar R.optimizeGatherSequence(); 3565de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DEBUG(dbgs() << "SLP: vectorized \"" << F.getName() << "\"\n"); 3566de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DEBUG(verifyFunction(F)); 3567de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 3568de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return Changed; 3569de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 3570e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem 357136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines/// \brief Check that the Values in the slice in VL array are still existent in 3572dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling/// the WeakVH array. 3573dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling/// Vectorization of part of the VL array may cause later values in the VL array 3574dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling/// to become invalid. We track when this has happened in the WeakVH array. 35754c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainarstatic bool hasValueBeenRAUWed(ArrayRef<Value *> VL, ArrayRef<WeakVH> VH, 35764c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar unsigned SliceBegin, unsigned SliceSize) { 35774c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar VL = VL.slice(SliceBegin, SliceSize); 35784c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar VH = VH.slice(SliceBegin, SliceSize); 35794c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar return !std::equal(VL.begin(), VL.end(), VH.begin()); 3580dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling} 3581dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling 3582de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, 3583de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar int CostThreshold, BoUpSLP &R, 3584de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned VecRegSize) { 3585369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem unsigned ChainLen = Chain.size(); 3586369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen 3587369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem << "\n"); 3588de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned Sz = R.getVectorElementSize(Chain[0]); 3589f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned VF = VecRegSize / Sz; 3590369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3591369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (!isPowerOf2_32(Sz) || VF < 2) 3592369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return false; 3593369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 359436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Keep track of values that were deleted by vectorizing in the loop below. 3595dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling SmallVector<WeakVH, 8> TrackValues(Chain.begin(), Chain.end()); 3596dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling 3597369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem bool Changed = false; 3598369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Look for profitable vectorizable trees at all offsets, starting at zero. 3599369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem for (unsigned i = 0, e = ChainLen; i < e; ++i) { 3600369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (i + VF > e) 3601369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem break; 3602dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling 3603dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling // Check that a previous iteration of this loop did not delete the Value. 3604dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling if (hasValueBeenRAUWed(Chain, TrackValues, i, VF)) 3605dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling continue; 3606dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling 3607369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i 3608369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem << "\n"); 3609369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem ArrayRef<Value *> Operands = Chain.slice(i, VF); 3610369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3611369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem R.buildTree(Operands); 3612de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar R.computeMinimumValueSizes(); 3613369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3614369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem int Cost = R.getTreeCost(); 3615369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3616369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n"); 3617369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (Cost < CostThreshold) { 3618369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n"); 3619369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem R.vectorizeTree(); 3620369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3621369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Move to the next bundle. 3622369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem i += VF - 1; 3623369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Changed = true; 3624369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 3625369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 3626369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 36278e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling return Changed; 3628369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 3629369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3630de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores, 3631de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar int costThreshold, BoUpSLP &R) { 36324c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar SetVector<StoreInst *> Heads, Tails; 36334c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain; 3634369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3635369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // We may run into multiple chains that merge into a single chain. We mark the 3636369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // stores that we vectorized so that we don't visit the same store twice. 3637369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem BoUpSLP::ValueSet VectorizedStores; 3638369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem bool Changed = false; 3639369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3640369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Do a quadratic search on all of the given stores and find 36416611eaa32f7941dd50a3ffe608f3f4a7665dbe91Nadav Rotem // all of the pairs of stores that follow each other. 3642f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar SmallVector<unsigned, 16> IndexQueue; 364321508bf853354343266dbe6d830ff30bed006a68Nadav Rotem for (unsigned i = 0, e = Stores.size(); i < e; ++i) { 3644f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar IndexQueue.clear(); 3645f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // If a store has multiple consecutive store candidates, search Stores 3646f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // array according to the sequence: from i+1 to e, then from i-1 to 0. 3647f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // This is because usually pairing with immediate succeeding or preceding 3648f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // candidate create the best chance to find slp vectorization opportunity. 3649f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned j = 0; 3650f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar for (j = i + 1; j < e; ++j) 3651f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar IndexQueue.push_back(j); 3652f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar for (j = i; j > 0; --j) 3653f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar IndexQueue.push_back(j - 1); 3654f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 3655f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar for (auto &k : IndexQueue) { 3656de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isConsecutiveAccess(Stores[i], Stores[k], *DL, *SE)) { 3657f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Tails.insert(Stores[k]); 3658369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Heads.insert(Stores[i]); 3659f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar ConsecutiveChain[Stores[i]] = Stores[k]; 3660f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar break; 3661369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 3662369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 366321508bf853354343266dbe6d830ff30bed006a68Nadav Rotem } 3664369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3665369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // For stores that start but don't end a link in the chain: 36664c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end(); 3667369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem it != e; ++it) { 3668369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (Tails.count(*it)) 3669369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem continue; 3670369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3671369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // We found a store instr that starts a chain. Now follow the chain and try 3672369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // to vectorize it. 3673369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem BoUpSLP::ValueList Operands; 36744c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar StoreInst *I = *it; 3675369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Collect the chain into a list. 3676369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem while (Tails.count(I) || Heads.count(I)) { 3677369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem if (VectorizedStores.count(I)) 3678369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem break; 3679369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem Operands.push_back(I); 3680369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem // Move to the next value in the chain. 3681369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem I = ConsecutiveChain[I]; 3682369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 3683369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3684f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // FIXME: Is division-by-2 the correct step? Should we assert that the 3685f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // register size is a power-of-2? 3686de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (unsigned Size = R.getMaxVecRegSize(); Size >= R.getMinVecRegSize(); Size /= 2) { 3687f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (vectorizeStoreChain(Operands, costThreshold, R, Size)) { 3688f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Mark the vectorized stores so that we don't vectorize them again. 3689f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar VectorizedStores.insert(Operands.begin(), Operands.end()); 3690f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Changed = true; 3691f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar break; 3692f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 3693f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 3694369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem } 3695369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3696369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem return Changed; 3697369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem} 3698369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3699de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarvoid SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) { 3700369cc938d261de3295eb70d0738f54ef1a82806cNadav Rotem 3701de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Initialize the collections. We will make a single pass over the block. 3702de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Stores.clear(); 3703de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar GEPs.clear(); 3704fc1604ec7274e9b724ff9b2512c288a691167427Arnold Schwaighofer 3705de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Visit the store and getelementptr instructions in BB and organize them in 3706de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Stores and GEPs according to the underlying objects of their pointer 3707de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // operands. 3708de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (Instruction &I : *BB) { 3709e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem 3710de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Ignore store instructions that are volatile or have a pointer operand 3711de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // that doesn't point to a scalar type. 3712de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (auto *SI = dyn_cast<StoreInst>(&I)) { 3713de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!SI->isSimple()) 3714de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar continue; 3715de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!isValidElementType(SI->getValueOperand()->getType())) 3716de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar continue; 3717de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Stores[GetUnderlyingObject(SI->getPointerOperand(), *DL)].push_back(SI); 3718de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 3719e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem 3720de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Ignore getelementptr instructions that have more than one index, a 3721de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // constant index, or a pointer operand that doesn't point to a scalar 3722de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // type. 3723de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) { 3724de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto Idx = GEP->idx_begin()->get(); 3725de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (GEP->getNumIndices() > 1 || isa<Constant>(Idx)) 3726de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar continue; 3727de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!isValidElementType(Idx->getType())) 3728de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar continue; 3729de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (GEP->getType()->isVectorTy()) 3730de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar continue; 3731de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar GEPs[GetUnderlyingObject(GEP->getPointerOperand(), *DL)].push_back(GEP); 3732de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 37338383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem } 3734e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem} 3735e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem 3736de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SLPVectorizerPass::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) { 37370b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem if (!A || !B) 37380b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem return false; 37396fe5cc49d88c9dd48a1eefe4c1bdba1567b8eef2Benjamin Kramer Value *VL[] = { A, B }; 374037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return tryToVectorizeList(VL, R, None, true); 3741444e33e8987110c6669bc2d9b8efd768bb17faa1Nadav Rotem} 3742444e33e8987110c6669bc2d9b8efd768bb17faa1Nadav Rotem 3743de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, 3744de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar ArrayRef<Value *> BuildVector, 3745de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar bool allowReorder) { 37465cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem if (VL.size() < 2) 37475cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem return false; 37485cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem 37490b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem DEBUG(dbgs() << "SLP: Vectorizing a list of length = " << VL.size() << ".\n"); 37504f38e16b89895b795ece58742195d0d95cbd4187Nadav Rotem 37515cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem // Check that all of the parts are scalar instructions of the same type. 37525cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem Instruction *I0 = dyn_cast<Instruction>(VL[0]); 37530b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem if (!I0) 375489008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang return false; 37555cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem 37565cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem unsigned Opcode0 = I0->getOpcode(); 37578e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling 3758f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // FIXME: Register size should be a parameter to this function, so we can 3759f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // try different vectorization factors. 3760de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned Sz = R.getVectorElementSize(I0); 3761de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned VF = R.getMinVecRegSize() / Sz; 37625cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem 3763f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar for (Value *V : VL) { 3764f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Type *Ty = V->getType(); 3765ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (!isValidElementType(Ty)) 376689008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang return false; 3767f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Instruction *Inst = dyn_cast<Instruction>(V); 37685cb84896bc9fa839335bcdb569d26faa2fe5b583Nadav Rotem if (!Inst || Inst->getOpcode() != Opcode0) 376989008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang return false; 37704f38e16b89895b795ece58742195d0d95cbd4187Nadav Rotem } 37714f38e16b89895b795ece58742195d0d95cbd4187Nadav Rotem 377289008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang bool Changed = false; 37738e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling 3774dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Keep track of values that were deleted by vectorizing in the loop below. 3775dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end()); 3776dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling 377789008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang for (unsigned i = 0, e = VL.size(); i < e; ++i) { 377889008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang unsigned OpsWidth = 0; 37798e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling 37808e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling if (i + VF > e) 378189008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang OpsWidth = e - i; 378289008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang else 378389008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang OpsWidth = VF; 378489008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang 378589008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2) 378689008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang break; 3787d6f0c34273dd3536102f2d643403252468dfc4a3Nadav Rotem 3788dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling // Check that a previous iteration of this loop did not delete the Value. 3789dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling if (hasValueBeenRAUWed(VL, TrackValues, i, OpsWidth)) 3790dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling continue; 3791dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731dBill Wendling 37928e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations " 37938e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling << "\n"); 379489008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang ArrayRef<Value *> Ops = VL.slice(i, OpsWidth); 37958e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling 3796dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines ArrayRef<Value *> BuildVectorSlice; 3797dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!BuildVector.empty()) 3798dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines BuildVectorSlice = BuildVector.slice(i, OpsWidth); 3799dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 3800dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines R.buildTree(Ops, BuildVectorSlice); 380137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // TODO: check if we can allow reordering also for other cases than 380237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // tryToVectorizePair() 380337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (allowReorder && R.shouldReorder()) { 380437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(Ops.size() == 2); 380537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(BuildVectorSlice.empty()); 380637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Value *ReorderedOps[] = { Ops[1], Ops[0] }; 380737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines R.buildTree(ReorderedOps, None); 380837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 3809de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar R.computeMinimumValueSizes(); 381089008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang int Cost = R.getTreeCost(); 38118e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling 381289008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang if (Cost < -SLPCostThreshold) { 381336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n"); 3814dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Value *VectorizedRoot = R.vectorizeTree(); 3815dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 3816dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Reconstruct the build vector by extracting the vectorized root. This 3817dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // way we handle the case where some elements of the vector are undefined. 3818dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2)) 3819dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!BuildVectorSlice.empty()) { 3820dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // The insert point is the last build vector instruction. The vectorized 3821dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // root will precede it. This guarantees that we get an instruction. The 3822dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // vectorized tree could have been constant folded. 3823dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back()); 3824dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned VecIdx = 0; 3825dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines for (auto &V : BuildVectorSlice) { 3826de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar IRBuilder<NoFolder> Builder(InsertAfter->getParent(), 3827de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar ++BasicBlock::iterator(InsertAfter)); 3828de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Instruction *I = cast<Instruction>(V); 3829de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar assert(isa<InsertElementInst>(I) || isa<InsertValueInst>(I)); 3830dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Instruction *Extract = cast<Instruction>(Builder.CreateExtractElement( 3831dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines VectorizedRoot, Builder.getInt32(VecIdx++))); 3832de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar I->setOperand(1, Extract); 3833de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar I->removeFromParent(); 3834de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar I->insertAfter(Extract); 3835de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar InsertAfter = I; 3836dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 3837dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 383889008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang // Move to the next bundle. 383989008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang i += VF - 1; 384089008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang Changed = true; 384189008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang } 384289008539a322b9ce1d66837b3342d0be312bcbbaYi Jiang } 38438e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling 38448e5b91849a39173b1ce1c15e0e279b94562204b5Bill Wendling return Changed; 3845e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem} 38468383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 3847de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SLPVectorizerPass::tryToVectorize(BinaryOperator *V, BoUpSLP &R) { 38480b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem if (!V) 38490b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem return false; 385053a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 3851e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem // Try to vectorize V. 3852e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R)) 3853f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem return true; 3854f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem 3855e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0)); 3856e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1)); 3857e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem // Try to skip B. 3858e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem if (B && B->hasOneUse()) { 3859e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0)); 3860e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem BinaryOperator *B1 = dyn_cast<BinaryOperator>(B->getOperand(1)); 3861e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem if (tryToVectorizePair(A, B0, R)) { 3862ab105ae95fc473c19d9f0b019fc7c7a16d17b1a5Nadav Rotem return true; 3863f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem } 3864e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem if (tryToVectorizePair(A, B1, R)) { 3865e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem return true; 3866f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem } 3867f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem } 3868f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem 38697fac0ef71cfaeafd91b9520b553d00d91f83a442Nadav Rotem // Try to skip A. 3870e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem if (A && A->hasOneUse()) { 3871e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem BinaryOperator *A0 = dyn_cast<BinaryOperator>(A->getOperand(0)); 3872e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem BinaryOperator *A1 = dyn_cast<BinaryOperator>(A->getOperand(1)); 3873e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem if (tryToVectorizePair(A0, B, R)) { 3874e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem return true; 3875e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem } 3876e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem if (tryToVectorizePair(A1, B, R)) { 3877e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem return true; 3878f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem } 3879f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem } 3880e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem return 0; 3881e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem} 3882f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem 3883a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// \brief Generate a shuffle mask to be used in a reduction tree. 3884a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// 3885a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// \param VecLen The length of the vector to be reduced. 3886a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// \param NumEltsToRdx The number of elements that should be reduced in the 3887a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// vector. 3888a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// \param IsPairwise Whether the reduction is a pairwise or splitting 3889de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar/// reduction. A pairwise reduction will generate a mask of 3890a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// <0,2,...> or <1,3,..> while a splitting reduction will generate 3891a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// <2,3, undef,undef> for a vector of 4 and NumElts = 2. 3892a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// \param IsLeft True will generate a mask of even elements, odd otherwise. 3893a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighoferstatic Value *createRdxShuffleMask(unsigned VecLen, unsigned NumEltsToRdx, 3894a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer bool IsPairwise, bool IsLeft, 3895a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer IRBuilder<> &Builder) { 3896a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer assert((IsPairwise || !IsLeft) && "Don't support a <0,1,undef,...> mask"); 3897a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3898a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer SmallVector<Constant *, 32> ShuffleMask( 3899a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer VecLen, UndefValue::get(Builder.getInt32Ty())); 3900a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3901a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (IsPairwise) 3902a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Build a mask of 0, 2, ... (left) or 1, 3, ... (right). 3903a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer for (unsigned i = 0; i != NumEltsToRdx; ++i) 3904a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ShuffleMask[i] = Builder.getInt32(2 * i + !IsLeft); 3905a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer else 3906a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Move the upper half of the vector to the lower half. 3907a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer for (unsigned i = 0; i != NumEltsToRdx; ++i) 3908a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ShuffleMask[i] = Builder.getInt32(NumEltsToRdx + i); 3909a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3910a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return ConstantVector::get(ShuffleMask); 3911a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer} 3912a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3913a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3914a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// Model horizontal reductions. 3915a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// 3916a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// A horizontal reduction is a tree of reduction operations (currently add and 3917a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// fadd) that has operations that can be put into a vector as its leaf. 3918a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// For example, this tree: 3919a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// 3920a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// mul mul mul mul 3921a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// \ / \ / 3922a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// + + 3923a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// \ / 3924a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// + 3925a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// This tree has "mul" as its reduced values and "+" as its reduction 3926a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// operations. A reduction might be feeding into a store or a binary operation 3927a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// feeding a phi. 3928a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// ... 3929a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// \ / 3930a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// + 393138bf2d62b6be4b496e8f6d176578699b9c6e08bbArnold Schwaighofer/// | 3932a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// phi += 3933a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// 3934a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// Or: 3935a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// ... 3936a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// \ / 3937a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// + 393838bf2d62b6be4b496e8f6d176578699b9c6e08bbArnold Schwaighofer/// | 3939a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// *p = 3940a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer/// 3941a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighoferclass HorizontalReduction { 3942dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines SmallVector<Value *, 16> ReductionOps; 3943a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer SmallVector<Value *, 32> ReducedVals; 3944a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3945a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer BinaryOperator *ReductionRoot; 3946a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer PHINode *ReductionPHI; 3947a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3948a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer /// The opcode of the reduction. 3949a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer unsigned ReductionOpcode; 3950a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer /// The opcode of the values we perform a reduction on. 3951a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer unsigned ReducedValueOpcode; 3952a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer /// Should we model this reduction as a pairwise reduction tree or a tree that 3953a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer /// splits the vector in halves and adds those halves. 3954a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer bool IsPairwiseReduction; 3955a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3956a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighoferpublic: 3957f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar /// The width of one full horizontal reduction operation. 3958f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned ReduxWidth; 3959f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 3960de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar /// Minimal width of available vector registers. It's used to determine 3961de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar /// ReduxWidth. 3962de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned MinVecRegSize; 3963de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3964de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar HorizontalReduction(unsigned MinVecRegSize) 3965de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar : ReductionRoot(nullptr), ReductionPHI(nullptr), ReductionOpcode(0), 3966de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar ReducedValueOpcode(0), IsPairwiseReduction(false), ReduxWidth(0), 3967de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MinVecRegSize(MinVecRegSize) {} 3968a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3969a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer /// \brief Try to find a reduction tree. 39704c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B) { 3971a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer assert((!Phi || 3972a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer std::find(Phi->op_begin(), Phi->op_end(), B) != Phi->op_end()) && 3973a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer "Thi phi needs to use the binary operator"); 3974a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3975a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // We could have a initial reductions that is not an add. 3976a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // r *= v1 + v2 + v3 + v4 3977a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // In such a case start looking for a tree rooted in the first '+'. 3978a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (Phi) { 3979a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (B->getOperand(0) == Phi) { 3980dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Phi = nullptr; 3981a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer B = dyn_cast<BinaryOperator>(B->getOperand(1)); 3982a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } else if (B->getOperand(1) == Phi) { 3983dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Phi = nullptr; 3984a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer B = dyn_cast<BinaryOperator>(B->getOperand(0)); 3985a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 3986a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 3987a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3988a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (!B) 3989a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return false; 3990a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 3991a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Type *Ty = B->getType(); 3992ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (!isValidElementType(Ty)) 3993a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return false; 3994a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 39954c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar const DataLayout &DL = B->getModule()->getDataLayout(); 3996a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ReductionOpcode = B->getOpcode(); 3997a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ReducedValueOpcode = 0; 3998f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // FIXME: Register size should be a parameter to this function, so we can 3999f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // try different vectorization factors. 40004c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar ReduxWidth = MinVecRegSize / DL.getTypeSizeInBits(Ty); 4001a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ReductionRoot = B; 4002a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ReductionPHI = Phi; 4003a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4004a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (ReduxWidth < 4) 4005a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return false; 4006a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4007a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // We currently only support adds. 4008a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (ReductionOpcode != Instruction::Add && 4009a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ReductionOpcode != Instruction::FAdd) 4010a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return false; 4011a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4012a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Post order traverse the reduction tree starting at B. We only handle true 4013f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // trees containing only binary operators or selects. 4014f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar SmallVector<std::pair<Instruction *, unsigned>, 32> Stack; 4015a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Stack.push_back(std::make_pair(B, 0)); 4016a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer while (!Stack.empty()) { 4017f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Instruction *TreeN = Stack.back().first; 4018a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer unsigned EdgeToVist = Stack.back().second++; 4019a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer bool IsReducedValue = TreeN->getOpcode() != ReductionOpcode; 4020a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4021a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Only handle trees in the current basic block. 4022a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (TreeN->getParent() != B->getParent()) 4023a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return false; 4024a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4025a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Each tree node needs to have one user except for the ultimate 4026a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // reduction. 4027a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (!TreeN->hasOneUse() && TreeN != B) 4028a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return false; 4029a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4030a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Postorder vist. 4031a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (EdgeToVist == 2 || IsReducedValue) { 4032a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (IsReducedValue) { 4033a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Make sure that the opcodes of the operations that we are going to 4034a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // reduce match. 4035a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (!ReducedValueOpcode) 4036a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ReducedValueOpcode = TreeN->getOpcode(); 4037a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer else if (ReducedValueOpcode != TreeN->getOpcode()) 4038a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return false; 4039a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ReducedVals.push_back(TreeN); 4040a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } else { 4041a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // We need to be able to reassociate the adds. 4042a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (!TreeN->isAssociative()) 4043a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return false; 4044dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines ReductionOps.push_back(TreeN); 4045a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 4046a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Retract. 4047a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Stack.pop_back(); 4048a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer continue; 4049a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 4050a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4051a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Visit left or right. 4052a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Value *NextV = TreeN->getOperand(EdgeToVist); 4053f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // We currently only allow BinaryOperator's and SelectInst's as reduction 4054f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // values in our tree. 4055f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (isa<BinaryOperator>(NextV) || isa<SelectInst>(NextV)) 4056f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Stack.push_back(std::make_pair(cast<Instruction>(NextV), 0)); 4057a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer else if (NextV != Phi) 4058a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return false; 4059a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 4060a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return true; 4061a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 4062a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4063a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer /// \brief Attempt to vectorize the tree found by 4064a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer /// matchAssociativeReduction. 4065a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer bool tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) { 4066a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (ReducedVals.empty()) 4067a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return false; 4068a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4069a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer unsigned NumReducedVals = ReducedVals.size(); 4070a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (NumReducedVals < ReduxWidth) 4071a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return false; 4072a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4073dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Value *VectorizedTree = nullptr; 4074a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer IRBuilder<> Builder(ReductionRoot); 4075a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer FastMathFlags Unsafe; 4076a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Unsafe.setUnsafeAlgebra(); 4077de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Builder.setFastMathFlags(Unsafe); 4078a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer unsigned i = 0; 4079a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4080a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) { 408137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines V.buildTree(makeArrayRef(&ReducedVals[i], ReduxWidth), ReductionOps); 4082de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar V.computeMinimumValueSizes(); 4083a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4084a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Estimate cost. 4085a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i]); 4086a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (Cost >= -SLPCostThreshold) 4087a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer break; 4088a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4089a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:" << Cost 4090a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer << ". (HorRdx)\n"); 4091a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4092a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Vectorize a tree. 4093a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer DebugLoc Loc = cast<Instruction>(ReducedVals[i])->getDebugLoc(); 4094a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Value *VectorizedRoot = V.vectorizeTree(); 4095a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4096a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Emit a reduction. 4097a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Value *ReducedSubTree = emitReduction(VectorizedRoot, Builder); 4098a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (VectorizedTree) { 4099a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Builder.SetCurrentDebugLocation(Loc); 4100a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer VectorizedTree = createBinOp(Builder, ReductionOpcode, VectorizedTree, 4101a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ReducedSubTree, "bin.rdx"); 4102a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } else 4103a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer VectorizedTree = ReducedSubTree; 4104a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 4105a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4106a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (VectorizedTree) { 4107a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Finish the reduction. 4108a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer for (; i < NumReducedVals; ++i) { 4109a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Builder.SetCurrentDebugLocation( 4110a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer cast<Instruction>(ReducedVals[i])->getDebugLoc()); 4111a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer VectorizedTree = createBinOp(Builder, ReductionOpcode, VectorizedTree, 4112a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ReducedVals[i]); 4113a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 4114a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Update users. 4115a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (ReductionPHI) { 4116dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines assert(ReductionRoot && "Need a reduction operation"); 4117a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ReductionRoot->setOperand(0, VectorizedTree); 4118a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ReductionRoot->setOperand(1, ReductionPHI); 4119a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } else 4120a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ReductionRoot->replaceAllUsesWith(VectorizedTree); 4121a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 4122dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return VectorizedTree != nullptr; 4123a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 4124a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4125f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned numReductionValues() const { 4126f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return ReducedVals.size(); 4127f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 4128a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4129f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarprivate: 4130f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar /// \brief Calculate the cost of a reduction. 4131a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer int getReductionCost(TargetTransformInfo *TTI, Value *FirstReducedVal) { 4132a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Type *ScalarTy = FirstReducedVal->getType(); 4133a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Type *VecTy = VectorType::get(ScalarTy, ReduxWidth); 4134a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4135a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer int PairwiseRdxCost = TTI->getReductionCost(ReductionOpcode, VecTy, true); 4136a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer int SplittingRdxCost = TTI->getReductionCost(ReductionOpcode, VecTy, false); 4137a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4138a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer IsPairwiseReduction = PairwiseRdxCost < SplittingRdxCost; 4139a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer int VecReduxCost = IsPairwiseReduction ? PairwiseRdxCost : SplittingRdxCost; 4140a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4141a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer int ScalarReduxCost = 4142a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer ReduxWidth * TTI->getArithmeticInstrCost(ReductionOpcode, VecTy); 4143a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4144a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer DEBUG(dbgs() << "SLP: Adding cost " << VecReduxCost - ScalarReduxCost 4145a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer << " for reduction that starts with " << *FirstReducedVal 4146a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer << " (It is a " 4147a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer << (IsPairwiseReduction ? "pairwise" : "splitting") 4148a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer << " reduction)\n"); 4149a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4150a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return VecReduxCost - ScalarReduxCost; 4151a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 4152a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4153a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer static Value *createBinOp(IRBuilder<> &Builder, unsigned Opcode, Value *L, 4154a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Value *R, const Twine &Name = "") { 4155a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (Opcode == Instruction::FAdd) 4156a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return Builder.CreateFAdd(L, R, Name); 4157a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return Builder.CreateBinOp((Instruction::BinaryOps)Opcode, L, R, Name); 4158a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 4159a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4160a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer /// \brief Emit a horizontal reduction of the vectorized value. 4161a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Value *emitReduction(Value *VectorizedValue, IRBuilder<> &Builder) { 4162a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer assert(VectorizedValue && "Need to have a vectorized tree node"); 4163a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer assert(isPowerOf2_32(ReduxWidth) && 4164a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer "We only handle power-of-two reductions for now"); 4165a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4166ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Value *TmpVec = VectorizedValue; 4167a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) { 4168a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer if (IsPairwiseReduction) { 4169a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Value *LeftMask = 4170a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer createRdxShuffleMask(ReduxWidth, i, true, true, Builder); 4171a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Value *RightMask = 4172a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer createRdxShuffleMask(ReduxWidth, i, true, false, Builder); 4173a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4174a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Value *LeftShuf = Builder.CreateShuffleVector( 4175a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer TmpVec, UndefValue::get(TmpVec->getType()), LeftMask, "rdx.shuf.l"); 4176a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Value *RightShuf = Builder.CreateShuffleVector( 4177a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer TmpVec, UndefValue::get(TmpVec->getType()), (RightMask), 4178a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer "rdx.shuf.r"); 4179a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer TmpVec = createBinOp(Builder, ReductionOpcode, LeftShuf, RightShuf, 4180a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer "bin.rdx"); 4181a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } else { 4182a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Value *UpperHalf = 4183a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer createRdxShuffleMask(ReduxWidth, i, false, false, Builder); 4184a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Value *Shuf = Builder.CreateShuffleVector( 4185a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer TmpVec, UndefValue::get(TmpVec->getType()), UpperHalf, "rdx.shuf"); 4186a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer TmpVec = createBinOp(Builder, ReductionOpcode, TmpVec, Shuf, "bin.rdx"); 4187a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 4188a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 4189a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4190a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // The result is in the first element of the vector. 4191a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); 4192a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 4193a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer}; 4194a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 41951b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault/// \brief Recognize construction of vectors like 41961b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault/// %ra = insertelement <4 x float> undef, float %s0, i32 0 41971b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault/// %rb = insertelement <4 x float> %ra, float %s1, i32 1 41981b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault/// %rc = insertelement <4 x float> %rb, float %s2, i32 2 41991b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault/// %rd = insertelement <4 x float> %rc, float %s3, i32 3 42001b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault/// 42011b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault/// Returns true if it matches 42021b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault/// 4203dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesstatic bool findBuildVector(InsertElementInst *FirstInsertElem, 4204dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines SmallVectorImpl<Value *> &BuildVector, 4205dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines SmallVectorImpl<Value *> &BuildVectorOpds) { 4206dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!isa<UndefValue>(FirstInsertElem->getOperand(0))) 42071b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault return false; 42081b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault 4209dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines InsertElementInst *IE = FirstInsertElem; 42101b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault while (true) { 4211dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines BuildVector.push_back(IE); 4212dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines BuildVectorOpds.push_back(IE->getOperand(1)); 42131b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault 42141b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault if (IE->use_empty()) 42151b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault return false; 42161b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault 421736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines InsertElementInst *NextUse = dyn_cast<InsertElementInst>(IE->user_back()); 42181b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault if (!NextUse) 42191b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault return true; 42201b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault 42211b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault // If this isn't the final use, make sure the next insertelement is the only 42221b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault // use. It's OK if the final constructed vector is used multiple times 42231b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault if (!IE->hasOneUse()) 42241b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault return false; 42251b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault 42261b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault IE = NextUse; 42271b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault } 42281b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault 42291b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault return false; 42301b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault} 42311b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault 4232de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar/// \brief Like findBuildVector, but looks backwards for construction of aggregate. 4233de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar/// 4234de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar/// \return true if it matches. 4235de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarstatic bool findBuildAggregate(InsertValueInst *IV, 4236de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SmallVectorImpl<Value *> &BuildVector, 4237de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SmallVectorImpl<Value *> &BuildVectorOpds) { 4238de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!IV->hasOneUse()) 4239de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 4240de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Value *V = IV->getAggregateOperand(); 4241de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!isa<UndefValue>(V)) { 4242de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar InsertValueInst *I = dyn_cast<InsertValueInst>(V); 4243de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!I || !findBuildAggregate(I, BuildVector, BuildVectorOpds)) 4244de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 4245de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 4246de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar BuildVector.push_back(IV); 4247de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar BuildVectorOpds.push_back(IV->getInsertedValueOperand()); 4248de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return true; 4249de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 4250de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 425124732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighoferstatic bool PhiTypeSorterFunc(Value *V, Value *V2) { 425224732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer return V->getType() < V2->getType(); 425324732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer} 425424732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer 4255f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// \brief Try and get a reduction value from a phi node. 4256f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// 4257f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// Given a phi node \p P in a block \p ParentBB, consider possible reductions 4258f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// if they come from either \p ParentBB or a containing loop latch. 4259f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// 4260f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// \returns A candidate reduction value if possible, or \code nullptr \endcode 4261f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// if not possible. 4262f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarstatic Value *getReductionValue(const DominatorTree *DT, PHINode *P, 4263f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar BasicBlock *ParentBB, LoopInfo *LI) { 4264f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // There are situations where the reduction value is not dominated by the 4265f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // reduction phi. Vectorizing such cases has been reported to cause 4266f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // miscompiles. See PR25787. 4267f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar auto DominatedReduxValue = [&](Value *R) { 4268f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return ( 4269f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar dyn_cast<Instruction>(R) && 4270f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar DT->dominates(P->getParent(), dyn_cast<Instruction>(R)->getParent())); 4271f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar }; 4272f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 4273f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Value *Rdx = nullptr; 4274f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 4275f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Return the incoming value if it comes from the same BB as the phi node. 4276f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (P->getIncomingBlock(0) == ParentBB) { 4277f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Rdx = P->getIncomingValue(0); 4278f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } else if (P->getIncomingBlock(1) == ParentBB) { 4279f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Rdx = P->getIncomingValue(1); 4280f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 4281f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 4282f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (Rdx && DominatedReduxValue(Rdx)) 4283f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return Rdx; 4284f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 4285f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Otherwise, check whether we have a loop latch to look at. 4286f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Loop *BBL = LI->getLoopFor(ParentBB); 4287f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (!BBL) 4288f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return nullptr; 4289f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar BasicBlock *BBLatch = BBL->getLoopLatch(); 4290f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (!BBLatch) 4291f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return nullptr; 4292f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 4293f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // There is a loop latch, return the incoming value if it comes from 4294f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // that. This reduction pattern occassionaly turns up. 4295f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (P->getIncomingBlock(0) == BBLatch) { 4296f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Rdx = P->getIncomingValue(0); 4297f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } else if (P->getIncomingBlock(1) == BBLatch) { 4298f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Rdx = P->getIncomingValue(1); 4299f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 4300f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 4301f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (Rdx && DominatedReduxValue(Rdx)) 4302f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return Rdx; 4303f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 4304f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return nullptr; 4305f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar} 4306f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 4307f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// \brief Attempt to reduce a horizontal reduction. 4308f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// If it is legal to match a horizontal reduction feeding 4309f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// the phi node P with reduction operators BI, then check if it 4310f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// can be done. 4311f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// \returns true if a horizontal reduction was matched and reduced. 4312f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// \returns false if a horizontal reduction was not matched. 4313f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarstatic bool canMatchHorizontalReduction(PHINode *P, BinaryOperator *BI, 4314de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar BoUpSLP &R, TargetTransformInfo *TTI, 4315de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned MinRegSize) { 4316f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (!ShouldVectorizeHor) 4317f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return false; 4318f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 4319de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar HorizontalReduction HorRdx(MinRegSize); 4320f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (!HorRdx.matchAssociativeReduction(P, BI)) 4321f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return false; 4322f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 4323f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // If there is a sufficient number of reduction values, reduce 4324f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // to a nearby power-of-2. Can safely generate oversized 4325f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // vectors and rely on the backend to split them to legal sizes. 4326f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar HorRdx.ReduxWidth = 4327f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar std::max((uint64_t)4, PowerOf2Floor(HorRdx.numReductionValues())); 4328f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 4329f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return HorRdx.tryToReduce(R, TTI); 4330f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar} 4331f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 4332de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { 4333e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem bool Changed = false; 4334931b861e3dcf966fde46d57683013e74736eb448Nadav Rotem SmallVector<Value *, 4> Incoming; 433524732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer SmallSet<Value *, 16> VisitedInstrs; 433624732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer 433724732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer bool HaveVectorizedPhiNodes = true; 433824732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer while (HaveVectorizedPhiNodes) { 433924732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer HaveVectorizedPhiNodes = false; 434024732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer 434124732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer // Collect the incoming values from the PHIs. 434224732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer Incoming.clear(); 4343de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (Instruction &I : *BB) { 4344de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar PHINode *P = dyn_cast<PHINode>(&I); 434524732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer if (!P) 434624732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer break; 434716a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer 434824732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer if (!VisitedInstrs.count(P)) 434924732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer Incoming.push_back(P); 435024732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer } 4351931b861e3dcf966fde46d57683013e74736eb448Nadav Rotem 435224732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer // Sort by type. 435324732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer std::stable_sort(Incoming.begin(), Incoming.end(), PhiTypeSorterFunc); 4354931b861e3dcf966fde46d57683013e74736eb448Nadav Rotem 435524732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer // Try to vectorize elements base on their type. 435624732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer for (SmallVector<Value *, 4>::iterator IncIt = Incoming.begin(), 435724732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer E = Incoming.end(); 435824732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer IncIt != E;) { 435924732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer 436024732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer // Look for the next elements with the same type. 436124732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer SmallVector<Value *, 4>::iterator SameTypeIt = IncIt; 436224732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer while (SameTypeIt != E && 436324732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer (*SameTypeIt)->getType() == (*IncIt)->getType()) { 436424732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer VisitedInstrs.insert(*SameTypeIt); 436524732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer ++SameTypeIt; 436624732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer } 436716a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer 436824732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer // Try to vectorize them. 436924732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer unsigned NumElts = (SameTypeIt - IncIt); 437024732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer DEBUG(errs() << "SLP: Trying to vectorize starting at PHIs (" << NumElts << ")\n"); 437137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R)) { 437224732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer // Success start over because instructions might have been changed. 437324732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer HaveVectorizedPhiNodes = true; 437416a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer Changed = true; 437524732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer break; 437616a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer } 437716a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer 437836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Start over at the next instruction of a different type (or the end). 437924732c3363a9a442c14cf236c3de1086cdee6000Arnold Schwaighofer IncIt = SameTypeIt; 4380931b861e3dcf966fde46d57683013e74736eb448Nadav Rotem } 4381931b861e3dcf966fde46d57683013e74736eb448Nadav Rotem } 4382931b861e3dcf966fde46d57683013e74736eb448Nadav Rotem 438316a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer VisitedInstrs.clear(); 438416a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer 438516a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) { 438616a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer // We may go through BB multiple times so skip the one we have checked. 4387f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (!VisitedInstrs.insert(&*it).second) 438816a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer continue; 438916a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer 439016a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer if (isa<DbgInfoIntrinsic>(it)) 43910b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem continue; 4392e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem 4393e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem // Try to vectorize reductions that use PHINodes. 439416a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer if (PHINode *P = dyn_cast<PHINode>(it)) { 4395e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem // Check that the PHI is a reduction PHI. 43960b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem if (P->getNumIncomingValues() != 2) 43970b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem return Changed; 4398f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 4399f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Value *Rdx = getReductionValue(DT, P, BB, LI); 4400f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 4401e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem // Check if this is a Binary Operator. 4402e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx); 4403e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem if (!BI) 44048383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem continue; 4405196ee11f85ce0148d2c2e33fbe1f1171ac5a8828Nadav Rotem 4406a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer // Try to match and vectorize a horizontal reduction. 4407de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (canMatchHorizontalReduction(P, BI, R, TTI, R.getMinVecRegSize())) { 4408a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Changed = true; 4409a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer it = BB->begin(); 4410a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer e = BB->end(); 4411a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer continue; 4412a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 4413a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4414a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer Value *Inst = BI->getOperand(0); 44150b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem if (Inst == P) 44160b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem Inst = BI->getOperand(1); 441753a0552b06cb8288004f7698f6e4640fe2a74f61Nadav Rotem 441816a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer if (tryToVectorize(dyn_cast<BinaryOperator>(Inst), R)) { 441916a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer // We would like to start over since some instructions are deleted 442016a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer // and the iterator may become invalid value. 442116a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer Changed = true; 442216a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer it = BB->begin(); 442316a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer e = BB->end(); 4424a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer continue; 442516a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer } 4426a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 4427e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem continue; 4428e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem } 4429196ee11f85ce0148d2c2e33fbe1f1171ac5a8828Nadav Rotem 44309660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer if (ShouldStartVectorizeHorAtStore) 44319660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer if (StoreInst *SI = dyn_cast<StoreInst>(it)) 44329660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer if (BinaryOperator *BinOp = 44339660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer dyn_cast<BinaryOperator>(SI->getValueOperand())) { 4434de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (canMatchHorizontalReduction(nullptr, BinOp, R, TTI, 4435de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar R.getMinVecRegSize()) || 4436f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar tryToVectorize(BinOp, R)) { 44379660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer Changed = true; 44389660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer it = BB->begin(); 44399660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer e = BB->end(); 44409660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer continue; 44419660ebb398cc3e9202a24087ad46290c3de29c7cArnold Schwaighofer } 4442a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer } 4443a9baf1ecfd343e2d8d8fa277c8b093a1869726bbArnold Schwaighofer 444437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Try to vectorize horizontal reductions feeding into a return. 444537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (ReturnInst *RI = dyn_cast<ReturnInst>(it)) 444637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (RI->getNumOperands() != 0) 444737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (BinaryOperator *BinOp = 444837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines dyn_cast<BinaryOperator>(RI->getOperand(0))) { 444937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "SLP: Found a return to vectorize.\n"); 445037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (tryToVectorizePair(BinOp->getOperand(0), 445137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BinOp->getOperand(1), R)) { 445237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Changed = true; 445337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines it = BB->begin(); 445437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines e = BB->end(); 445537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines continue; 445637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 445737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 445837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 4459e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem // Try to vectorize trees that start at compare instructions. 446016a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer if (CmpInst *CI = dyn_cast<CmpInst>(it)) { 4461e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) { 446216a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer Changed = true; 446316a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer // We would like to start over since some instructions are deleted 446416a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer // and the iterator may become invalid value. 446516a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer it = BB->begin(); 446616a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer e = BB->end(); 4467e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem continue; 4468e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem } 446916a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer 447016a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer for (int i = 0; i < 2; ++i) { 447137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i))) { 447237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R)) { 447337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Changed = true; 447437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // We would like to start over since some instructions are deleted 447537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // and the iterator may become invalid value. 447637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines it = BB->begin(); 447737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines e = BB->end(); 4478ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines break; 447937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 448037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 448116a2253e4011d27a9426f81f55501fd5dfb863bdArnold Schwaighofer } 4482e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem continue; 44838383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem } 44841b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault 44851b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault // Try to vectorize trees that start at insertelement instructions. 4486dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (InsertElementInst *FirstInsertElem = dyn_cast<InsertElementInst>(it)) { 4487dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines SmallVector<Value *, 16> BuildVector; 4488dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines SmallVector<Value *, 16> BuildVectorOpds; 4489dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!findBuildVector(FirstInsertElem, BuildVector, BuildVectorOpds)) 44901b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault continue; 44911b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault 4492dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Vectorize starting with the build vector operands ignoring the 4493dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // BuildVector instructions for the purpose of scheduling and user 4494dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // extraction. 4495dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (tryToVectorizeList(BuildVectorOpds, R, BuildVector)) { 44961b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault Changed = true; 44971b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault it = BB->begin(); 44981b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault e = BB->end(); 44991b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault } 45001b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault 45011b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault continue; 45021b00d910058c31abb7cc5333b42cd380a3c8e128Matt Arsenault } 4503de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 4504de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Try to vectorize trees that start at insertvalue instructions feeding into 4505de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // a store. 4506de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (StoreInst *SI = dyn_cast<StoreInst>(it)) { 4507de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (InsertValueInst *LastInsertValue = dyn_cast<InsertValueInst>(SI->getValueOperand())) { 4508de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const DataLayout &DL = BB->getModule()->getDataLayout(); 4509de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (R.canMapToVector(SI->getValueOperand()->getType(), DL)) { 4510de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SmallVector<Value *, 16> BuildVector; 4511de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SmallVector<Value *, 16> BuildVectorOpds; 4512de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!findBuildAggregate(LastInsertValue, BuildVector, BuildVectorOpds)) 4513de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar continue; 4514de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 4515de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DEBUG(dbgs() << "SLP: store of array mappable to vector: " << *SI << "\n"); 4516de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (tryToVectorizeList(BuildVectorOpds, R, BuildVector, false)) { 4517de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Changed = true; 4518de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar it = BB->begin(); 4519de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar e = BB->end(); 4520de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 4521de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar continue; 4522de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 4523de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 4524de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 45258383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem } 45268383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 4527e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem return Changed; 4528e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem} 45298383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 4530de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { 4531de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto Changed = false; 4532de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (auto &Entry : GEPs) { 4533de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 4534de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // If the getelementptr list has fewer than two elements, there's nothing 4535de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // to do. 4536de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (Entry.second.size() < 2) 4537de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar continue; 4538de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 4539de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DEBUG(dbgs() << "SLP: Analyzing a getelementptr list of length " 4540de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar << Entry.second.size() << ".\n"); 4541de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 4542de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // We process the getelementptr list in chunks of 16 (like we do for 4543de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // stores) to minimize compile-time. 4544de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (unsigned BI = 0, BE = Entry.second.size(); BI < BE; BI += 16) { 4545de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto Len = std::min<unsigned>(BE - BI, 16); 4546de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto GEPList = makeArrayRef(&Entry.second[BI], Len); 4547de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 4548de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Initialize a set a candidate getelementptrs. Note that we use a 4549de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // SetVector here to preserve program order. If the index computations 4550de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // are vectorizable and begin with loads, we want to minimize the chance 4551de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // of having to reorder them later. 4552de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SetVector<Value *> Candidates(GEPList.begin(), GEPList.end()); 4553de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 4554de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Some of the candidates may have already been vectorized after we 4555de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // initially collected them. If so, the WeakVHs will have nullified the 4556de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // values, so remove them from the set of candidates. 4557de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Candidates.remove(nullptr); 4558de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 4559de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Remove from the set of candidates all pairs of getelementptrs with 4560de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // constant differences. Such getelementptrs are likely not good 4561de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // candidates for vectorization in a bottom-up phase since one can be 4562de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // computed from the other. We also ensure all candidate getelementptr 4563de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // indices are unique. 4564de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (int I = 0, E = GEPList.size(); I < E && Candidates.size() > 1; ++I) { 4565de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *GEPI = cast<GetElementPtrInst>(GEPList[I]); 4566de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!Candidates.count(GEPI)) 4567de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar continue; 4568de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *SCEVI = SE->getSCEV(GEPList[I]); 4569de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (int J = I + 1; J < E && Candidates.size() > 1; ++J) { 4570de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *GEPJ = cast<GetElementPtrInst>(GEPList[J]); 4571de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *SCEVJ = SE->getSCEV(GEPList[J]); 4572de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isa<SCEVConstant>(SE->getMinusSCEV(SCEVI, SCEVJ))) { 4573de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Candidates.remove(GEPList[I]); 4574de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Candidates.remove(GEPList[J]); 4575de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } else if (GEPI->idx_begin()->get() == GEPJ->idx_begin()->get()) { 4576de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Candidates.remove(GEPList[J]); 4577de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 4578de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 4579de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 4580de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 4581de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // We break out of the above computation as soon as we know there are 4582de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // fewer than two candidates remaining. 4583de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (Candidates.size() < 2) 4584de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar continue; 4585de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 4586de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Add the single, non-constant index of each candidate to the bundle. We 4587de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // ensured the indices met these constraints when we originally collected 4588de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // the getelementptrs. 4589de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SmallVector<Value *, 16> Bundle(Candidates.size()); 4590de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto BundleIndex = 0u; 4591de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (auto *V : Candidates) { 4592de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *GEP = cast<GetElementPtrInst>(V); 4593de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar auto *GEPIdx = GEP->idx_begin()->get(); 4594de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar assert(GEP->getNumIndices() == 1 || !isa<Constant>(GEPIdx)); 4595de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Bundle[BundleIndex++] = GEPIdx; 4596de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 4597de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 4598de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Try and vectorize the indices. We are currently only interested in 4599de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // gather-like cases of the form: 4600de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // 4601de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // ... = g[a[0] - b[0]] + g[a[1] - b[1]] + ... 4602de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // 4603de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // where the loads of "a", the loads of "b", and the subtractions can be 4604de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // performed in parallel. It's likely that detecting this pattern in a 4605de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // bottom-up phase will be simpler and less costly than building a 4606de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // full-blown top-down phase beginning at the consecutive loads. 4607de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Changed |= tryToVectorizeList(Bundle, R); 4608de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 4609de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 4610de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return Changed; 4611de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 4612de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 4613de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) { 4614e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem bool Changed = false; 4615e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem // Attempt to sort and vectorize each of the store-groups. 4616de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (StoreListMap::iterator it = Stores.begin(), e = Stores.end(); it != e; 4617de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar ++it) { 4618e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem if (it->second.size() < 2) 4619e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem continue; 4620f7eaf29cf70a545f5b717c638db83ba6e8b6b3c5Nadav Rotem 46210b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav Rotem DEBUG(dbgs() << "SLP: Analyzing a store chain of length " 462221508bf853354343266dbe6d830ff30bed006a68Nadav Rotem << it->second.size() << ".\n"); 46238383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 462421508bf853354343266dbe6d830ff30bed006a68Nadav Rotem // Process the stores in chunks of 16. 4625f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // TODO: The limit of 16 inhibits greater vectorization factors. 4626f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // For example, AVX2 supports v32i8. Increasing this limit, however, 4627f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // may cause a significant compile-time increase. 462821508bf853354343266dbe6d830ff30bed006a68Nadav Rotem for (unsigned CI = 0, CE = it->second.size(); CI < CE; CI+=16) { 462921508bf853354343266dbe6d830ff30bed006a68Nadav Rotem unsigned Len = std::min<unsigned>(CE - CI, 16); 463037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Changed |= vectorizeStores(makeArrayRef(&it->second[CI], Len), 463137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines -SLPCostThreshold, R); 463221508bf853354343266dbe6d830ff30bed006a68Nadav Rotem } 46338383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem } 4634e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem return Changed; 4635e9a4411db4d3a05965630f668daf8071bf2d3513Nadav Rotem} 46368383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 46378383b539ff4c039108ee0c202a27b787621d96cfNadav Rotemchar SLPVectorizer::ID = 0; 46388383b539ff4c039108ee0c202a27b787621d96cfNadav Rotemstatic const char lv_name[] = "SLP Vectorizer"; 46398383b539ff4c039108ee0c202a27b787621d96cfNadav RotemINITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false) 4640f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga NainarINITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 4641ebe69fe11e48d322045d5949c83283927a0d790bStephen HinesINITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 4642ebe69fe11e48d322045d5949c83283927a0d790bStephen HinesINITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 4643f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga NainarINITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 46448383b539ff4c039108ee0c202a27b787621d96cfNadav RotemINITIALIZE_PASS_DEPENDENCY(LoopSimplify) 4645de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga NainarINITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass) 46468383b539ff4c039108ee0c202a27b787621d96cfNadav RotemINITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false) 46478383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem 46488383b539ff4c039108ee0c202a27b787621d96cfNadav Rotemnamespace llvm { 46490b827993ed98b20425c33fecaa06b7b1a32a40c4Nadav RotemPass *createSLPVectorizerPass() { return new SLPVectorizer(); } 46508383b539ff4c039108ee0c202a27b787621d96cfNadav Rotem} 4651