BBVectorize.cpp revision fc3665c87519850f629c9565535e3be447e10add
1de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel//===- BBVectorize.cpp - A Basic-Block Vectorizer -------------------------===// 2de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// 3de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// The LLVM Compiler Infrastructure 4de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// 5de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// This file is distributed under the University of Illinois Open Source 6de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// License. See LICENSE.TXT for details. 7de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// 8de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel//===----------------------------------------------------------------------===// 9de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// 10de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// This file implements a basic-block vectorization pass. The algorithm was 11de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// inspired by that used by the Vienna MAP Vectorizor by Franchetti and Kral, 12de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// et al. It works by looking for chains of pairable operations and then 13de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// pairing them. 14de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// 15de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel//===----------------------------------------------------------------------===// 16de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 17de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#define BBV_NAME "bb-vectorize" 18de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#define DEBUG_TYPE BBV_NAME 19de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Constants.h" 20de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/DerivedTypes.h" 21de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Function.h" 22de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Instructions.h" 23de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/IntrinsicInst.h" 24de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Intrinsics.h" 25de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/LLVMContext.h" 26de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Pass.h" 27de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Type.h" 28de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/DenseMap.h" 29de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/DenseSet.h" 30de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/SmallVector.h" 31de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/Statistic.h" 32de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/STLExtras.h" 33de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/StringExtras.h" 34de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/AliasAnalysis.h" 35de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/AliasSetTracker.h" 36de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/ScalarEvolution.h" 37de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/ScalarEvolutionExpressions.h" 38de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/ValueTracking.h" 39de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Support/CommandLine.h" 40de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Support/Debug.h" 41de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Support/raw_ostream.h" 42de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Support/ValueHandle.h" 43de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Target/TargetData.h" 44de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Transforms/Vectorize.h" 45de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include <algorithm> 46de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include <map> 47de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelusing namespace llvm; 48de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 49de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned> 50de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelReqChainDepth("bb-vectorize-req-chain-depth", cl::init(6), cl::Hidden, 51de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("The required chain depth for vectorization")); 52de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 53de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned> 54de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelSearchLimit("bb-vectorize-search-limit", cl::init(400), cl::Hidden, 55de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("The maximum search distance for instruction pairs")); 56de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 57de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 58de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelSplatBreaksChain("bb-vectorize-splat-breaks-chain", cl::init(false), cl::Hidden, 59de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Replicating one element to a pair breaks the chain")); 60de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 61de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned> 62de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelVectorBits("bb-vectorize-vector-bits", cl::init(128), cl::Hidden, 63de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("The size of the native vector registers")); 64de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 65de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned> 66de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelMaxIter("bb-vectorize-max-iter", cl::init(0), cl::Hidden, 67de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("The maximum number of pairing iterations")); 68de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 69de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned> 705d4e18bc39fea892f523d960213906d296d3cb38Hal FinkelMaxInsts("bb-vectorize-max-instr-per-group", cl::init(500), cl::Hidden, 715d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel cl::desc("The maximum number of pairable instructions per group")); 725d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 735d4e18bc39fea892f523d960213906d296d3cb38Hal Finkelstatic cl::opt<unsigned> 74de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelMaxCandPairsForCycleCheck("bb-vectorize-max-cycle-check-pairs", cl::init(200), 75de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::Hidden, cl::desc("The maximum number of candidate pairs with which to use" 76de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " a full cycle check")); 77de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 78de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 79de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoInts("bb-vectorize-no-ints", cl::init(false), cl::Hidden, 80de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize integer values")); 81de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 82de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 83de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoFloats("bb-vectorize-no-floats", cl::init(false), cl::Hidden, 84de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize floating-point values")); 85de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 86de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 87de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoCasts("bb-vectorize-no-casts", cl::init(false), cl::Hidden, 88de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize casting (conversion) operations")); 89de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 90de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 91de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoMath("bb-vectorize-no-math", cl::init(false), cl::Hidden, 92de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize floating-point math intrinsics")); 93de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 94de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 95de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoFMA("bb-vectorize-no-fma", cl::init(false), cl::Hidden, 96de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize the fused-multiply-add intrinsic")); 97de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 98de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 99fc3665c87519850f629c9565535e3be447e10addHal FinkelNoSelect("bb-vectorize-no-select", cl::init(false), cl::Hidden, 100fc3665c87519850f629c9565535e3be447e10addHal Finkel cl::desc("Don't try to vectorize select instructions")); 101fc3665c87519850f629c9565535e3be447e10addHal Finkel 102fc3665c87519850f629c9565535e3be447e10addHal Finkelstatic cl::opt<bool> 103de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoMemOps("bb-vectorize-no-mem-ops", cl::init(false), cl::Hidden, 104de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize loads and stores")); 105de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 106de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 107de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelAlignedOnly("bb-vectorize-aligned-only", cl::init(false), cl::Hidden, 108de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Only generate aligned loads and stores")); 109de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 110de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 111edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal FinkelNoMemOpBoost("bb-vectorize-no-mem-op-boost", 112edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel cl::init(false), cl::Hidden, 113edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel cl::desc("Don't boost the chain-depth contribution of loads and stores")); 114edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel 115edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkelstatic cl::opt<bool> 116de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelFastDep("bb-vectorize-fast-dep", cl::init(false), cl::Hidden, 117de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Use a fast instruction dependency analysis")); 118de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 119de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#ifndef NDEBUG 120de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 121de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelDebugInstructionExamination("bb-vectorize-debug-instruction-examination", 122de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::init(false), cl::Hidden, 123de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("When debugging is enabled, output information on the" 124de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " instruction-examination process")); 125de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 126de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelDebugCandidateSelection("bb-vectorize-debug-candidate-selection", 127de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::init(false), cl::Hidden, 128de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("When debugging is enabled, output information on the" 129de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " candidate-selection process")); 130de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 131de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelDebugPairSelection("bb-vectorize-debug-pair-selection", 132de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::init(false), cl::Hidden, 133de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("When debugging is enabled, output information on the" 134de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " pair-selection process")); 135de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 136de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelDebugCycleCheck("bb-vectorize-debug-cycle-check", 137de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::init(false), cl::Hidden, 138de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("When debugging is enabled, output information on the" 139de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " cycle-checking process")); 140de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#endif 141de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 142de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelSTATISTIC(NumFusedOps, "Number of operations fused by bb-vectorize"); 143de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 144de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelnamespace { 145de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel struct BBVectorize : public BasicBlockPass { 146de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel static char ID; // Pass identification, replacement for typeid 147bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng 148940371bc65570ec0add1ede4f4d9f0a41ba25e09Hongbin Zheng const VectorizeConfig Config; 149bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng 150bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng BBVectorize(const VectorizeConfig &C = VectorizeConfig()) 151bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng : BasicBlockPass(ID), Config(C) { 152de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel initializeBBVectorizePass(*PassRegistry::getPassRegistry()); 153de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 154de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 155bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng BBVectorize(Pass *P, const VectorizeConfig &C) 156bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng : BasicBlockPass(ID), Config(C) { 15787825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng AA = &P->getAnalysis<AliasAnalysis>(); 15887825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng SE = &P->getAnalysis<ScalarEvolution>(); 15987825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng TD = P->getAnalysisIfAvailable<TargetData>(); 16087825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng } 16187825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng 162de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typedef std::pair<Value *, Value *> ValuePair; 163de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typedef std::pair<ValuePair, size_t> ValuePairWithDepth; 164de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typedef std::pair<ValuePair, ValuePair> VPPair; // A ValuePair pair 165de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typedef std::pair<std::multimap<Value *, Value *>::iterator, 166de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *>::iterator> VPIteratorPair; 167de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typedef std::pair<std::multimap<ValuePair, ValuePair>::iterator, 168de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair>::iterator> 169de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPPIteratorPair; 170de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 171de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasAnalysis *AA; 172de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ScalarEvolution *SE; 173de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel TargetData *TD; 174de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 175de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // FIXME: const correct? 176de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 177de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool vectorizePairs(BasicBlock &BB); 178de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1795d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel bool getCandidatePairs(BasicBlock &BB, 1805d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel BasicBlock::iterator &Start, 181de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 182de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts); 183de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 184de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void computeConnectedPairs(std::multimap<Value *, Value *> &CandidatePairs, 185de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 186de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs); 187de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 188de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void buildDepMap(BasicBlock &BB, 189de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 190de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 191de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers); 192de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 193de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void choosePairs(std::multimap<Value *, Value *> &CandidatePairs, 194de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 195de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 196de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 197de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *>& ChosenPairs); 198de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 199de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void fuseChosenPairs(BasicBlock &BB, 200de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 201de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *>& ChosenPairs); 202de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 203de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore); 204de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 205de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool areInstsCompatible(Instruction *I, Instruction *J, 206de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool IsSimpleLoadStore); 207de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 208de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool trackUsesOfI(DenseSet<Value *> &Users, 209de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker &WriteSet, Instruction *I, 210de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J, bool UpdateUsers = true, 211de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> *LoadMoveSet = 0); 2121230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop 213de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void computePairsConnectedTo( 214de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 215de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 216de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 217de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ValuePair P); 218de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 219de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool pairsConflict(ValuePair P, ValuePair Q, 220de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 221de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> *PairableInstUserMap = 0); 222de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 223de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool pairWillFormCycle(ValuePair P, 224de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &PairableInstUsers, 225de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &CurrentPairs); 226de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 227de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void pruneTreeFor( 228de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 229de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 230de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 231de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 232de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &PairableInstUserMap, 233de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 234de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<ValuePair, size_t> &Tree, 235de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PrunedTree, ValuePair J, 236de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool UseCycleCheck); 237de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 238de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void buildInitialTreeFor( 239de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 240de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 241de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 242de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 243de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 244de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<ValuePair, size_t> &Tree, ValuePair J); 245de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 246de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void findBestTreeFor( 247de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 248de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 249de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 250de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 251de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &PairableInstUserMap, 252de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 253de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth, 254de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t &BestEffSize, VPIteratorPair ChoiceRange, 255de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool UseCycleCheck); 256de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 257de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I, 258de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J, unsigned o, bool &FlipMemInputs); 259de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 260de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void fillNewShuffleMask(LLVMContext& Context, Instruction *J, 261de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned NumElem, unsigned MaskOffset, unsigned NumInElem, 262de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned IdxOffset, std::vector<Constant*> &Mask); 263de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 264de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *getReplacementShuffleMask(LLVMContext& Context, Instruction *I, 265de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J); 266de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 267de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *getReplacementInput(LLVMContext& Context, Instruction *I, 268de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J, unsigned o, bool FlipMemInputs); 269de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 270de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void getReplacementInputsForPair(LLVMContext& Context, Instruction *I, 271de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J, SmallVector<Value *, 3> &ReplacedOperands, 272de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool &FlipMemInputs); 273de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 274de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void replaceOutputsOfPair(LLVMContext& Context, Instruction *I, 275de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J, Instruction *K, 276de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *&InsertionPt, Instruction *&K1, 277de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *&K2, bool &FlipMemInputs); 278de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 279de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void collectPairLoadMoveSet(BasicBlock &BB, 280de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 281de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &LoadMoveSet, 282de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I); 283de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 284de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void collectLoadMoveSet(BasicBlock &BB, 285de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 286de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 287de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &LoadMoveSet); 288de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 289de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool canMoveUsesOfIAfterJ(BasicBlock &BB, 290de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &LoadMoveSet, 291de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J); 292de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 293de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void moveUsesOfIAfterJ(BasicBlock &BB, 294de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &LoadMoveSet, 295de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *&InsertionPt, 296de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J); 297de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 29887825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng bool vectorizeBB(BasicBlock &BB) { 299de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool changed = false; 300de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Iterate a sufficient number of times to merge types of size 1 bit, 301de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // then 2 bits, then 4, etc. up to half of the target vector width of the 302de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // target vector register. 303bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng for (unsigned v = 2, n = 1; 304bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng v <= Config.VectorBits && (!Config.MaxIter || n <= Config.MaxIter); 305de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel v *= 2, ++n) { 306bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng DEBUG(dbgs() << "BBV: fusing loop #" << n << 307de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " for " << BB.getName() << " in " << 308de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BB.getParent()->getName() << "...\n"); 309de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (vectorizePairs(BB)) 310de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel changed = true; 311de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel else 312de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 313de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 314de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 315de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: done!\n"); 316de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return changed; 317de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 318de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 31987825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng virtual bool runOnBasicBlock(BasicBlock &BB) { 32087825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng AA = &getAnalysis<AliasAnalysis>(); 32187825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng SE = &getAnalysis<ScalarEvolution>(); 32287825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng TD = getAnalysisIfAvailable<TargetData>(); 32387825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng 32487825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng return vectorizeBB(BB); 32587825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng } 32687825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng 327de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel virtual void getAnalysisUsage(AnalysisUsage &AU) const { 328de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BasicBlockPass::getAnalysisUsage(AU); 329de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AU.addRequired<AliasAnalysis>(); 330de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AU.addRequired<ScalarEvolution>(); 331de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AU.addPreserved<AliasAnalysis>(); 332de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AU.addPreserved<ScalarEvolution>(); 3337e004d177fe76145f75a9417ed2e281f1b9abaf7Hal Finkel AU.setPreservesCFG(); 334de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 335de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 336de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This returns the vector type that holds a pair of the provided type. 337de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // If the provided type is already a vector, then its length is doubled. 338de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel static inline VectorType *getVecTypeForPair(Type *ElemTy) { 339de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (VectorType *VTy = dyn_cast<VectorType>(ElemTy)) { 340de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned numElem = VTy->getNumElements(); 341de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return VectorType::get(ElemTy->getScalarType(), numElem*2); 342de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 3437e004d177fe76145f75a9417ed2e281f1b9abaf7Hal Finkel 3447e004d177fe76145f75a9417ed2e281f1b9abaf7Hal Finkel return VectorType::get(ElemTy, 2); 345de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 346de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 347de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns the weight associated with the provided value. A chain of 348de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // candidate pairs has a length given by the sum of the weights of its 349de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // members (one weight per pair; the weight of each member of the pair 350de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // is assumed to be the same). This length is then compared to the 351de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // chain-length threshold to determine if a given chain is significant 352de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // enough to be vectorized. The length is also used in comparing 353de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // candidate chains where longer chains are considered to be better. 354de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Note: when this function returns 0, the resulting instructions are 355de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // not actually fused. 356bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng inline size_t getDepthFactor(Value *V) { 357de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // InsertElement and ExtractElement have a depth factor of zero. This is 358de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // for two reasons: First, they cannot be usefully fused. Second, because 359de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the pass generates a lot of these, they can confuse the simple metric 360de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // used to compare the trees in the next iteration. Thus, giving them a 361de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // weight of zero allows the pass to essentially ignore them in 362de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // subsequent iterations when looking for vectorization opportunities 363de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // while still tracking dependency chains that flow through those 364de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // instructions. 365de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (isa<InsertElementInst>(V) || isa<ExtractElementInst>(V)) 366de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return 0; 367de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 368edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel // Give a load or store half of the required depth so that load/store 369edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel // pairs will vectorize. 370bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (!Config.NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V))) 371bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng return Config.ReqChainDepth/2; 372edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel 373de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return 1; 374de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 375de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 376de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This determines the relative offset of two loads or stores, returning 377de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // true if the offset could be determined to be some constant value. 378de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // For example, if OffsetInElmts == 1, then J accesses the memory directly 379de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // after I; if OffsetInElmts == -1 then I accesses the memory 380de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // directly after J. This function assumes that both instructions 381de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // have the same type. 382de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool getPairPtrInfo(Instruction *I, Instruction *J, 383de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *&IPtr, Value *&JPtr, unsigned &IAlignment, unsigned &JAlignment, 384de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel int64_t &OffsetInElmts) { 385de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel OffsetInElmts = 0; 386de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (isa<LoadInst>(I)) { 387de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IPtr = cast<LoadInst>(I)->getPointerOperand(); 388de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel JPtr = cast<LoadInst>(J)->getPointerOperand(); 389de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IAlignment = cast<LoadInst>(I)->getAlignment(); 390de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel JAlignment = cast<LoadInst>(J)->getAlignment(); 391de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 392de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IPtr = cast<StoreInst>(I)->getPointerOperand(); 393de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel JPtr = cast<StoreInst>(J)->getPointerOperand(); 394de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IAlignment = cast<StoreInst>(I)->getAlignment(); 395de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel JAlignment = cast<StoreInst>(J)->getAlignment(); 396de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 397de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 398de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel const SCEV *IPtrSCEV = SE->getSCEV(IPtr); 399de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel const SCEV *JPtrSCEV = SE->getSCEV(JPtr); 400de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 401de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // If this is a trivial offset, then we'll get something like 402de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // 1*sizeof(type). With target data, which we need anyway, this will get 403de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // constant folded into a number. 404de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel const SCEV *OffsetSCEV = SE->getMinusSCEV(JPtrSCEV, IPtrSCEV); 405de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (const SCEVConstant *ConstOffSCEV = 406de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel dyn_cast<SCEVConstant>(OffsetSCEV)) { 407de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ConstantInt *IntOff = ConstOffSCEV->getValue(); 408de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel int64_t Offset = IntOff->getSExtValue(); 409de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 410de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *VTy = cast<PointerType>(IPtr->getType())->getElementType(); 411de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel int64_t VTyTSS = (int64_t) TD->getTypeStoreSize(VTy); 412de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 413de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel assert(VTy == cast<PointerType>(JPtr->getType())->getElementType()); 414de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 415de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel OffsetInElmts = Offset/VTyTSS; 416de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return (abs64(Offset) % VTyTSS) == 0; 417de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 418de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 419de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 420de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 421de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 422de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns true if the provided CallInst represents an intrinsic that can 423de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // be vectorized. 424de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool isVectorizableIntrinsic(CallInst* I) { 425de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Function *F = I->getCalledFunction(); 426de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!F) return false; 427de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 428de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned IID = F->getIntrinsicID(); 429de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!IID) return false; 430de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 431de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel switch(IID) { 432de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel default: 433de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 434de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::sqrt: 435de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::powi: 436de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::sin: 437de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::cos: 438de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::log: 439de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::log2: 440de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::log10: 441de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::exp: 442de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::exp2: 443de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::pow: 44486312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng return Config.VectorizeMath; 445de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::fma: 44686312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng return Config.VectorizeFMA; 447de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 448de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 449de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 450de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns true if J is the second element in some pair referenced by 451de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // some multimap pair iterator pair. 452de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel template <typename V> 453de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool isSecondInIteratorPair(V J, std::pair< 454de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typename std::multimap<V, V>::iterator, 455de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typename std::multimap<V, V>::iterator> PairRange) { 456de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (typename std::multimap<V, V>::iterator K = PairRange.first; 457de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K != PairRange.second; ++K) 458de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (K->second == J) return true; 459de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 460de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 461de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 462de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel }; 463de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 464de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function implements one vectorization iteration on the provided 465de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // basic block. It returns true if the block is changed. 466de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::vectorizePairs(BasicBlock &BB) { 4675d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel bool ShouldContinue; 4685d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel BasicBlock::iterator Start = BB.getFirstInsertionPt(); 4695d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 4705d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel std::vector<Value *> AllPairableInsts; 4715d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel DenseMap<Value *, Value *> AllChosenPairs; 4725d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 4735d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel do { 4745d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel std::vector<Value *> PairableInsts; 4755d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel std::multimap<Value *, Value *> CandidatePairs; 4765d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs, 4775d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel PairableInsts); 4785d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (PairableInsts.empty()) continue; 4793706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 4805d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // Now we have a map of all of the pairable instructions and we need to 4815d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // select the best possible pairing. A good pairing is one such that the 4825d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // users of the pair are also paired. This defines a (directed) forest 4835d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // over the pairs such that two pairs are connected iff the second pair 4845d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // uses the first. 4853706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 4865d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // Note that it only matters that both members of the second pair use some 4875d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // element of the first pair (to allow for splatting). 4883706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 4895d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel std::multimap<ValuePair, ValuePair> ConnectedPairs; 4905d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel computeConnectedPairs(CandidatePairs, PairableInsts, ConnectedPairs); 4915d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (ConnectedPairs.empty()) continue; 4923706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 4935d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // Build the pairable-instruction dependency map 4945d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel DenseSet<ValuePair> PairableInstUsers; 4955d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel buildDepMap(BB, CandidatePairs, PairableInsts, PairableInstUsers); 4963706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 49735564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel // There is now a graph of the connected pairs. For each variable, pick 49835564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel // the pairing with the largest tree meeting the depth requirement on at 49935564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel // least one branch. Then select all pairings that are part of that tree 50035564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel // and remove them from the list of available pairings and pairable 50135564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel // variables. 5023706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 5035d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel DenseMap<Value *, Value *> ChosenPairs; 5045d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel choosePairs(CandidatePairs, PairableInsts, ConnectedPairs, 5055d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel PairableInstUsers, ChosenPairs); 5063706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 5075d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (ChosenPairs.empty()) continue; 5085d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel AllPairableInsts.insert(AllPairableInsts.end(), PairableInsts.begin(), 5095d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel PairableInsts.end()); 5105d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel AllChosenPairs.insert(ChosenPairs.begin(), ChosenPairs.end()); 5115d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel } while (ShouldContinue); 5125d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 5135d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (AllChosenPairs.empty()) return false; 5145d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel NumFusedOps += AllChosenPairs.size(); 5153706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 516de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // A set of pairs has now been selected. It is now necessary to replace the 517de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // paired instructions with vector instructions. For this procedure each 51843ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop // operand must be replaced with a vector operand. This vector is formed 519de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // by using build_vector on the old operands. The replaced values are then 520de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // replaced with a vector_extract on the result. Subsequent optimization 521de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // passes should coalesce the build/extract combinations. 5223706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 5235d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel fuseChosenPairs(BB, AllPairableInsts, AllChosenPairs); 524de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return true; 525de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 526de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 527de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function returns true if the provided instruction is capable of being 528de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // fused into a vector instruction. This determination is based only on the 529de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // type and other attributes of the instruction. 530de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::isInstVectorizable(Instruction *I, 531de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool &IsSimpleLoadStore) { 532de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IsSimpleLoadStore = false; 533de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 534de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (CallInst *C = dyn_cast<CallInst>(I)) { 535de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!isVectorizableIntrinsic(C)) 536de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 537de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else if (LoadInst *L = dyn_cast<LoadInst>(I)) { 538de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Vectorize simple loads if possbile: 539de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IsSimpleLoadStore = L->isSimple(); 54086312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng if (!IsSimpleLoadStore || !Config.VectorizeMemOps) 541de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 542de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else if (StoreInst *S = dyn_cast<StoreInst>(I)) { 543de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Vectorize simple stores if possbile: 544de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IsSimpleLoadStore = S->isSimple(); 54586312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng if (!IsSimpleLoadStore || !Config.VectorizeMemOps) 546de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 547de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else if (CastInst *C = dyn_cast<CastInst>(I)) { 548de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // We can vectorize casts, but not casts of pointer types, etc. 54986312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng if (!Config.VectorizeCasts) 550de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 551de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 552de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *SrcTy = C->getSrcTy(); 553de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!SrcTy->isSingleValueType() || SrcTy->isPointerTy()) 554de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 555de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 556de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *DestTy = C->getDestTy(); 557de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!DestTy->isSingleValueType() || DestTy->isPointerTy()) 558de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 559fc3665c87519850f629c9565535e3be447e10addHal Finkel } else if (isa<SelectInst>(I)) { 560fc3665c87519850f629c9565535e3be447e10addHal Finkel if (!Config.VectorizeSelect) 561fc3665c87519850f629c9565535e3be447e10addHal Finkel return false; 562de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else if (!(I->isBinaryOp() || isa<ShuffleVectorInst>(I) || 563de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel isa<ExtractElementInst>(I) || isa<InsertElementInst>(I))) { 564de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 565de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 566de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 567de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // We can't vectorize memory operations without target data 568de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (TD == 0 && IsSimpleLoadStore) 569de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 570de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 571de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *T1, *T2; 572de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (isa<StoreInst>(I)) { 573de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // For stores, it is the value type, not the pointer type that matters 574de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // because the value is what will come from a vector register. 575de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 576de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *IVal = cast<StoreInst>(I)->getValueOperand(); 577de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel T1 = IVal->getType(); 578de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 579de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel T1 = I->getType(); 580de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 581de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 582de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (I->isCast()) 583de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel T2 = cast<CastInst>(I)->getSrcTy(); 584de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel else 585de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel T2 = T1; 586de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 587de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Not every type can be vectorized... 588de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!(VectorType::isValidElementType(T1) || T1->isVectorTy()) || 589de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel !(VectorType::isValidElementType(T2) || T2->isVectorTy())) 590de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 591de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 59286312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng if (!Config.VectorizeInts 59386312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng && (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy())) 594de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 595de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 59686312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng if (!Config.VectorizeFloats 59786312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy())) 598de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 599de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 600bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (T1->getPrimitiveSizeInBits() > Config.VectorBits/2 || 601bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng T2->getPrimitiveSizeInBits() > Config.VectorBits/2) 602de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 603de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 604de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return true; 605de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 606de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 607de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function returns true if the two provided instructions are compatible 608de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // (meaning that they can be fused into a vector instruction). This assumes 609de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // that I has already been determined to be vectorizable and that J is not 610de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // in the use tree of I. 611de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J, 612de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool IsSimpleLoadStore) { 613de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugInstructionExamination) dbgs() << "BBV: looking at " << *I << 614de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " <-> " << *J << "\n"); 615de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 616de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Loads and stores can be merged if they have different alignments, 617de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // but are otherwise the same. 618de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel LoadInst *LI, *LJ; 619de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel StoreInst *SI, *SJ; 620de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if ((LI = dyn_cast<LoadInst>(I)) && (LJ = dyn_cast<LoadInst>(J))) { 621de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (I->getType() != J->getType()) 622de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 623de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 624de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (LI->getPointerOperand()->getType() != 625de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel LJ->getPointerOperand()->getType() || 626de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel LI->isVolatile() != LJ->isVolatile() || 627de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel LI->getOrdering() != LJ->getOrdering() || 628de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel LI->getSynchScope() != LJ->getSynchScope()) 629bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng return false; 630de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else if ((SI = dyn_cast<StoreInst>(I)) && (SJ = dyn_cast<StoreInst>(J))) { 631de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (SI->getValueOperand()->getType() != 632de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel SJ->getValueOperand()->getType() || 633de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel SI->getPointerOperand()->getType() != 634de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel SJ->getPointerOperand()->getType() || 635de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel SI->isVolatile() != SJ->isVolatile() || 636de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel SI->getOrdering() != SJ->getOrdering() || 637de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel SI->getSynchScope() != SJ->getSynchScope()) 638de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 639de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else if (!J->isSameOperationAs(I)) { 640de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 641de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 642de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // FIXME: handle addsub-type operations! 643de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 644de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (IsSimpleLoadStore) { 645de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *IPtr, *JPtr; 646de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned IAlignment, JAlignment; 647de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel int64_t OffsetInElmts = 0; 648de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, 649de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel OffsetInElmts) && abs64(OffsetInElmts) == 1) { 650bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (Config.AlignedOnly) { 651de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *aType = isa<StoreInst>(I) ? 652de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cast<StoreInst>(I)->getValueOperand()->getType() : I->getType(); 653de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // An aligned load or store is possible only if the instruction 654de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // with the lower offset has an alignment suitable for the 655de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // vector type. 6561230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop 657de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned BottomAlignment = IAlignment; 658de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (OffsetInElmts < 0) BottomAlignment = JAlignment; 6591230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop 660de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *VType = getVecTypeForPair(aType); 661de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned VecAlignment = TD->getPrefTypeAlignment(VType); 662de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (BottomAlignment < VecAlignment) 663de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 664de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 665de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 666de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 667de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 668de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else if (isa<ShuffleVectorInst>(I)) { 669de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Only merge two shuffles if they're both constant 670de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return isa<Constant>(I->getOperand(2)) && 671de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel isa<Constant>(J->getOperand(2)); 672de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // FIXME: We may want to vectorize non-constant shuffles also. 673de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 674de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 6756173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel // The powi intrinsic is special because only the first argument is 6766173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel // vectorized, the second arguments must be equal. 6776173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel CallInst *CI = dyn_cast<CallInst>(I); 6786173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel Function *FI; 6796173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel if (CI && (FI = CI->getCalledFunction()) && 6806173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel FI->getIntrinsicID() == Intrinsic::powi) { 6816173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel 6826173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel Value *A1I = CI->getArgOperand(1), 6836173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel *A1J = cast<CallInst>(J)->getArgOperand(1); 6846173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel const SCEV *A1ISCEV = SE->getSCEV(A1I), 6856173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel *A1JSCEV = SE->getSCEV(A1J); 6866173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel return (A1ISCEV == A1JSCEV); 6876173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel } 6886173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel 689de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return true; 690de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 691de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 692de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Figure out whether or not J uses I and update the users and write-set 693de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // structures associated with I. Specifically, Users represents the set of 694de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // instructions that depend on I. WriteSet represents the set 695de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // of memory locations that are dependent on I. If UpdateUsers is true, 696de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // and J uses I, then Users is updated to contain J and WriteSet is updated 697de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // to contain any memory locations to which J writes. The function returns 698de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // true if J uses I. By default, alias analysis is used to determine 699de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // whether J reads from memory that overlaps with a location in WriteSet. 700de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // If LoadMoveSet is not null, then it is a previously-computed multimap 701de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // where the key is the memory-based user instruction and the value is 702de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the instruction to be compared with I. So, if LoadMoveSet is provided, 703de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // then the alias analysis is not used. This is necessary because this 704de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // function is called during the process of moving instructions during 705de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // vectorization and the results of the alias analysis are not stable during 706de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // that process. 707de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::trackUsesOfI(DenseSet<Value *> &Users, 708de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker &WriteSet, Instruction *I, 709de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J, bool UpdateUsers, 710de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> *LoadMoveSet) { 711de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool UsesI = false; 712de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 713de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This instruction may already be marked as a user due, for example, to 714de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // being a member of a selected pair. 715de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (Users.count(J)) 716de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UsesI = true; 717de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 718de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!UsesI) 7197e004d177fe76145f75a9417ed2e281f1b9abaf7Hal Finkel for (User::op_iterator JU = J->op_begin(), JE = J->op_end(); 7207e004d177fe76145f75a9417ed2e281f1b9abaf7Hal Finkel JU != JE; ++JU) { 721de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *V = *JU; 722de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (I == V || Users.count(V)) { 723de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UsesI = true; 724de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 725de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 726de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 727de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!UsesI && J->mayReadFromMemory()) { 728de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (LoadMoveSet) { 729de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair JPairRange = LoadMoveSet->equal_range(J); 730de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UsesI = isSecondInIteratorPair<Value*>(I, JPairRange); 731de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 732de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (AliasSetTracker::iterator W = WriteSet.begin(), 733de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel WE = WriteSet.end(); W != WE; ++W) { 73438a7f22445b8782682d1f8f253454ea0390d4ac5Hal Finkel if (W->aliasesUnknownInst(J, *AA)) { 73538a7f22445b8782682d1f8f253454ea0390d4ac5Hal Finkel UsesI = true; 73638a7f22445b8782682d1f8f253454ea0390d4ac5Hal Finkel break; 737de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 738de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 739de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 740de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 741de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 742de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (UsesI && UpdateUsers) { 743de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (J->mayWriteToMemory()) WriteSet.add(J); 744de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Users.insert(J); 745de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 746de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 747de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return UsesI; 748de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 749de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 750de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function iterates over all instruction pairs in the provided 751de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // basic block and collects all candidate pairs for vectorization. 7525d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel bool BBVectorize::getCandidatePairs(BasicBlock &BB, 7535d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel BasicBlock::iterator &Start, 754de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 755de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts) { 756de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BasicBlock::iterator E = BB.end(); 7575d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (Start == E) return false; 7585d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 7595d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel bool ShouldContinue = false, IAfterStart = false; 7605d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel for (BasicBlock::iterator I = Start++; I != E; ++I) { 7615d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (I == Start) IAfterStart = true; 7625d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 763de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool IsSimpleLoadStore; 764de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!isInstVectorizable(I, IsSimpleLoadStore)) continue; 765de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 766de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Look for an instruction with which to pair instruction *I... 767de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> Users; 768de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker WriteSet(*AA); 7695d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel bool JAfterStart = IAfterStart; 7705d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel BasicBlock::iterator J = llvm::next(I); 771bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) { 7725d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (J == Start) JAfterStart = true; 7735d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 774de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Determine if J uses I, if so, exit the loop. 775bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !Config.FastDep); 776bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (Config.FastDep) { 777de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Note: For this heuristic to be effective, independent operations 778de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // must tend to be intermixed. This is likely to be true from some 779de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // kinds of grouped loop unrolling (but not the generic LLVM pass), 780de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // but otherwise may require some kind of reordering pass. 781de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 782de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // When using fast dependency analysis, 783de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // stop searching after first use: 784de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (UsesI) break; 785de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 786de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (UsesI) continue; 787de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 788de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 789de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // J does not use I, and comes before the first use of I, so it can be 790de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // merged with I if the instructions are compatible. 791de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!areInstsCompatible(I, J, IsSimpleLoadStore)) continue; 792de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 793de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // J is a candidate for merging with I. 794de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!PairableInsts.size() || 795de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInsts[PairableInsts.size()-1] != I) { 796de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInsts.push_back(I); 797de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 7985d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 799de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CandidatePairs.insert(ValuePair(I, J)); 8005d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 8015d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // The next call to this function must start after the last instruction 8025d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // selected during this invocation. 8035d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (JAfterStart) { 8045d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel Start = llvm::next(J); 8055d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel IAfterStart = JAfterStart = false; 8065d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel } 8075d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 808de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugCandidateSelection) dbgs() << "BBV: candidate pair " 809de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << *I << " <-> " << *J << "\n"); 8105d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 8115d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // If we have already found too many pairs, break here and this function 8125d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // will be called again starting after the last instruction selected 8135d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // during this invocation. 814bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (PairableInsts.size() >= Config.MaxInsts) { 8155d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel ShouldContinue = true; 8165d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel break; 8175d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel } 818de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 8195d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 8205d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (ShouldContinue) 8215d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel break; 822de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 823de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 824de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: found " << PairableInsts.size() 825de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << " instructions with candidate pairs\n"); 8265d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 8275d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel return ShouldContinue; 828de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 829de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 830de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Finds candidate pairs connected to the pair P = <PI, PJ>. This means that 831de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // it looks for pairs such that both members have an input which is an 832de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // output of PI or PJ. 833de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::computePairsConnectedTo( 834de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 835de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 836de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 837de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ValuePair P) { 838de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // For each possible pairing for this variable, look at the uses of 839de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the first value... 840de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (Value::use_iterator I = P.first->use_begin(), 841de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = P.first->use_end(); I != E; ++I) { 842de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair IPairRange = CandidatePairs.equal_range(*I); 843de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 844de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // For each use of the first variable, look for uses of the second 845de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // variable... 846de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (Value::use_iterator J = P.second->use_begin(), 847de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E2 = P.second->use_end(); J != E2; ++J) { 848de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair JPairRange = CandidatePairs.equal_range(*J); 849de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 850de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Look for <I, J>: 851de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (isSecondInIteratorPair<Value*>(*J, IPairRange)) 852de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J))); 853de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 854de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Look for <J, I>: 855de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (isSecondInIteratorPair<Value*>(*I, JPairRange)) 856de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ConnectedPairs.insert(VPPair(P, ValuePair(*J, *I))); 857de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 858de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 859bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (Config.SplatBreaksChain) continue; 860de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Look for cases where just the first value in the pair is used by 861de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // both members of another pair (splatting). 862de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (Value::use_iterator J = P.first->use_begin(); J != E; ++J) { 863de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (isSecondInIteratorPair<Value*>(*J, IPairRange)) 864de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J))); 865de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 866de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 867de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 868bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (Config.SplatBreaksChain) return; 869de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Look for cases where just the second value in the pair is used by 870de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // both members of another pair (splatting). 871de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (Value::use_iterator I = P.second->use_begin(), 872de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = P.second->use_end(); I != E; ++I) { 873de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair IPairRange = CandidatePairs.equal_range(*I); 874de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 875de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (Value::use_iterator J = P.second->use_begin(); J != E; ++J) { 876de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (isSecondInIteratorPair<Value*>(*J, IPairRange)) 877de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J))); 878de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 879de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 880de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 881de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 882de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function figures out which pairs are connected. Two pairs are 883de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // connected if some output of the first pair forms an input to both members 884de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // of the second pair. 885de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::computeConnectedPairs( 886de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 887de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 888de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs) { 889de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 890de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::vector<Value *>::iterator PI = PairableInsts.begin(), 891de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PE = PairableInsts.end(); PI != PE; ++PI) { 892de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair choiceRange = CandidatePairs.equal_range(*PI); 893de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 894de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<Value *, Value *>::iterator P = choiceRange.first; 895de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel P != choiceRange.second; ++P) 896de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel computePairsConnectedTo(CandidatePairs, PairableInsts, 897de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ConnectedPairs, *P); 898de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 899de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 900de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: found " << ConnectedPairs.size() 901de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << " pair connections.\n"); 902de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 903de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 904de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function builds a set of use tuples such that <A, B> is in the set 905de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // if B is in the use tree of A. If B is in the use tree of A, then B 906de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // depends on the output of A. 907de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::buildDepMap( 908de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BasicBlock &BB, 909de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 910de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 911de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers) { 912de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> IsInPair; 913de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<Value *, Value *>::iterator C = CandidatePairs.begin(), 914de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = CandidatePairs.end(); C != E; ++C) { 915de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IsInPair.insert(C->first); 916de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IsInPair.insert(C->second); 917de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 918de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 919de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Iterate through the basic block, recording all Users of each 920de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pairable instruction. 921de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 922de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BasicBlock::iterator E = BB.end(); 923de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (BasicBlock::iterator I = BB.getFirstInsertionPt(); I != E; ++I) { 924de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (IsInPair.find(I) == IsInPair.end()) continue; 925de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 926de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> Users; 927de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker WriteSet(*AA); 928de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (BasicBlock::iterator J = llvm::next(I); J != E; ++J) 929de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel (void) trackUsesOfI(Users, WriteSet, I, J); 930de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 931de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseSet<Value *>::iterator U = Users.begin(), E = Users.end(); 932de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel U != E; ++U) 933de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.insert(ValuePair(I, *U)); 934de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 935de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 936de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 937de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns true if an input to pair P is an output of pair Q and also an 938de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // input of pair Q is an output of pair P. If this is the case, then these 939de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // two pairs cannot be simultaneously fused. 940de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::pairsConflict(ValuePair P, ValuePair Q, 941de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 942de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> *PairableInstUserMap) { 943de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Two pairs are in conflict if they are mutual Users of eachother. 944de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool QUsesP = PairableInstUsers.count(ValuePair(P.first, Q.first)) || 945de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(P.first, Q.second)) || 946de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(P.second, Q.first)) || 947de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(P.second, Q.second)); 948de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool PUsesQ = PairableInstUsers.count(ValuePair(Q.first, P.first)) || 949de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(Q.first, P.second)) || 950de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(Q.second, P.first)) || 951de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(Q.second, P.second)); 952de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (PairableInstUserMap) { 953de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // FIXME: The expensive part of the cycle check is not so much the cycle 954de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // check itself but this edge insertion procedure. This needs some 955de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // profiling and probably a different data structure (same is true of 956de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // most uses of std::multimap). 957de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (PUsesQ) { 958de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPPIteratorPair QPairRange = PairableInstUserMap->equal_range(Q); 959de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!isSecondInIteratorPair(P, QPairRange)) 960de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUserMap->insert(VPPair(Q, P)); 961de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 962de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (QUsesP) { 963de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPPIteratorPair PPairRange = PairableInstUserMap->equal_range(P); 964de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!isSecondInIteratorPair(Q, PPairRange)) 965de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUserMap->insert(VPPair(P, Q)); 966de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 967de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 968de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 969de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return (QUsesP && PUsesQ); 970de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 971de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 972de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function walks the use graph of current pairs to see if, starting 973de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // from P, the walk returns to P. 974de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::pairWillFormCycle(ValuePair P, 975de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &PairableInstUserMap, 976de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &CurrentPairs) { 977de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugCycleCheck) 978de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel dbgs() << "BBV: starting cycle check for : " << *P.first << " <-> " 979de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << *P.second << "\n"); 980de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // A lookup table of visisted pairs is kept because the PairableInstUserMap 981de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // contains non-direct associations. 982de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> Visited; 98335564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel SmallVector<ValuePair, 32> Q; 984de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // General depth-first post-order traversal: 985de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.push_back(P); 98635564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel do { 98735564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel ValuePair QTop = Q.pop_back_val(); 988de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Visited.insert(QTop); 989de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 990de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugCycleCheck) 991de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel dbgs() << "BBV: cycle check visiting: " << *QTop.first << " <-> " 992de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << *QTop.second << "\n"); 993de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPPIteratorPair QPairRange = PairableInstUserMap.equal_range(QTop); 994de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<ValuePair, ValuePair>::iterator C = QPairRange.first; 995de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C != QPairRange.second; ++C) { 996de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C->second == P) { 997de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() 998de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << "BBV: rejected to prevent non-trivial cycle formation: " 999de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << *C->first.first << " <-> " << *C->first.second << "\n"); 1000de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return true; 1001de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1002de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 10030b2500c504156c45cd71817a9ef6749b6cde5703David Blaikie if (CurrentPairs.count(C->second) && !Visited.count(C->second)) 1004de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.push_back(C->second); 1005de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 100635564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel } while (!Q.empty()); 1007de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1008de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 1009de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1010de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1011de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function builds the initial tree of connected pairs with the 1012de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pair J at the root. 1013de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::buildInitialTreeFor( 1014de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 1015de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 1016de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 1017de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 1018de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 1019de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<ValuePair, size_t> &Tree, ValuePair J) { 1020de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Each of these pairs is viewed as the root node of a Tree. The Tree 1021de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // is then walked (depth-first). As this happens, we keep track of 1022de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the pairs that compose the Tree and the maximum depth of the Tree. 102335564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel SmallVector<ValuePairWithDepth, 32> Q; 1024de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // General depth-first post-order traversal: 1025de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first))); 102635564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel do { 1027de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ValuePairWithDepth QTop = Q.back(); 1028de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1029de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Push each child onto the queue: 1030de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool MoreChildren = false; 1031de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t MaxChildDepth = QTop.second; 1032de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPPIteratorPair qtRange = ConnectedPairs.equal_range(QTop.first); 1033478eed85f96f0d93da43e26cfb7fc6dee981c9aaNAKAMURA Takumi for (std::multimap<ValuePair, ValuePair>::iterator k = qtRange.first; 1034de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel k != qtRange.second; ++k) { 1035de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Make sure that this child pair is still a candidate: 1036de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool IsStillCand = false; 1037de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair checkRange = 1038de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CandidatePairs.equal_range(k->second.first); 1039de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<Value *, Value *>::iterator m = checkRange.first; 1040de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel m != checkRange.second; ++m) { 1041de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (m->second == k->second.second) { 1042de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IsStillCand = true; 1043de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1044de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1045de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1046de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1047de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (IsStillCand) { 1048de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<ValuePair, size_t>::iterator C = Tree.find(k->second); 1049de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C == Tree.end()) { 1050de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t d = getDepthFactor(k->second.first); 1051de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.push_back(ValuePairWithDepth(k->second, QTop.second+d)); 1052de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel MoreChildren = true; 1053de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 1054de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel MaxChildDepth = std::max(MaxChildDepth, C->second); 1055de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1056de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1057de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1058de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1059de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!MoreChildren) { 1060de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Record the current pair as part of the Tree: 1061de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Tree.insert(ValuePairWithDepth(QTop.first, MaxChildDepth)); 1062de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.pop_back(); 1063de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 106435564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel } while (!Q.empty()); 1065de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1066de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1067de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Given some initial tree, prune it by removing conflicting pairs (pairs 1068de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // that cannot be simultaneously chosen for vectorization). 1069de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::pruneTreeFor( 1070de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 1071de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 1072de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 1073de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 1074de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &PairableInstUserMap, 1075de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 1076de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<ValuePair, size_t> &Tree, 1077de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PrunedTree, ValuePair J, 1078de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool UseCycleCheck) { 107935564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel SmallVector<ValuePairWithDepth, 32> Q; 1080de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // General depth-first post-order traversal: 1081de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first))); 108235564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel do { 108335564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel ValuePairWithDepth QTop = Q.pop_back_val(); 1084de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PrunedTree.insert(QTop.first); 1085de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1086de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Visit each child, pruning as necessary... 108743ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop DenseMap<ValuePair, size_t> BestChildren; 1088de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPPIteratorPair QTopRange = ConnectedPairs.equal_range(QTop.first); 1089478eed85f96f0d93da43e26cfb7fc6dee981c9aaNAKAMURA Takumi for (std::multimap<ValuePair, ValuePair>::iterator K = QTopRange.first; 1090de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K != QTopRange.second; ++K) { 1091de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<ValuePair, size_t>::iterator C = Tree.find(K->second); 1092de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C == Tree.end()) continue; 1093de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1094de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This child is in the Tree, now we need to make sure it is the 1095de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // best of any conflicting children. There could be multiple 1096de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // conflicting children, so first, determine if we're keeping 1097de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // this child, then delete conflicting children as necessary. 1098de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1099de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // It is also necessary to guard against pairing-induced 1100de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // dependencies. Consider instructions a .. x .. y .. b 1101de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // such that (a,b) are to be fused and (x,y) are to be fused 1102de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // but a is an input to x and b is an output from y. This 1103de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // means that y cannot be moved after b but x must be moved 1104de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // after b for (a,b) to be fused. In other words, after 1105de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // fusing (a,b) we have y .. a/b .. x where y is an input 1106de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // to a/b and x is an output to a/b: x and y can no longer 1107de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // be legally fused. To prevent this condition, we must 1108de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // make sure that a child pair added to the Tree is not 1109de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // both an input and output of an already-selected pair. 1110de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1111de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Pairing-induced dependencies can also form from more complicated 1112de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // cycles. The pair vs. pair conflicts are easy to check, and so 1113de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // that is done explicitly for "fast rejection", and because for 1114de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // child vs. child conflicts, we may prefer to keep the current 1115de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pair in preference to the already-selected child. 1116de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> CurrentPairs; 1117de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1118de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool CanAdd = true; 1119de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseMap<ValuePair, size_t>::iterator C2 112043ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop = BestChildren.begin(), E2 = BestChildren.end(); 1121de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2 != E2; ++C2) { 1122de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C2->first.first == C->first.first || 1123de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.first == C->first.second || 1124de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.first || 1125de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.second || 1126de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairsConflict(C2->first, C->first, PairableInstUsers, 1127de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UseCycleCheck ? &PairableInstUserMap : 0)) { 1128de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C2->second >= C->second) { 1129de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CanAdd = false; 1130de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1131de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1132de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1133de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CurrentPairs.insert(C2->first); 1134de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1135de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1136de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!CanAdd) continue; 1137de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1138de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Even worse, this child could conflict with another node already 1139de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // selected for the Tree. If that is the case, ignore this child. 1140de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseSet<ValuePair>::iterator T = PrunedTree.begin(), 1141de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E2 = PrunedTree.end(); T != E2; ++T) { 1142de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (T->first == C->first.first || 1143de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel T->first == C->first.second || 1144de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel T->second == C->first.first || 1145de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel T->second == C->first.second || 1146de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairsConflict(*T, C->first, PairableInstUsers, 1147de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UseCycleCheck ? &PairableInstUserMap : 0)) { 1148de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CanAdd = false; 1149de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1150de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1151de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1152de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CurrentPairs.insert(*T); 1153de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1154de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!CanAdd) continue; 1155de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1156de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // And check the queue too... 115735564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel for (SmallVector<ValuePairWithDepth, 32>::iterator C2 = Q.begin(), 1158de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E2 = Q.end(); C2 != E2; ++C2) { 1159de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C2->first.first == C->first.first || 1160de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.first == C->first.second || 1161de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.first || 1162de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.second || 1163de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairsConflict(C2->first, C->first, PairableInstUsers, 1164de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UseCycleCheck ? &PairableInstUserMap : 0)) { 1165de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CanAdd = false; 1166de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1167de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1168de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1169de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CurrentPairs.insert(C2->first); 1170de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1171de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!CanAdd) continue; 1172de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1173de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Last but not least, check for a conflict with any of the 1174de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // already-chosen pairs. 1175de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseMap<Value *, Value *>::iterator C2 = 1176de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairs.begin(), E2 = ChosenPairs.end(); 1177de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2 != E2; ++C2) { 1178de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (pairsConflict(*C2, C->first, PairableInstUsers, 1179de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UseCycleCheck ? &PairableInstUserMap : 0)) { 1180de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CanAdd = false; 1181de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1182de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1183de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1184de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CurrentPairs.insert(*C2); 1185de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1186de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!CanAdd) continue; 1187de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 11881230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop // To check for non-trivial cycles formed by the addition of the 11891230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop // current pair we've formed a list of all relevant pairs, now use a 11901230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop // graph walk to check for a cycle. We start from the current pair and 11911230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop // walk the use tree to see if we again reach the current pair. If we 11921230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop // do, then the current pair is rejected. 1193de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1194de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // FIXME: It may be more efficient to use a topological-ordering 1195de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // algorithm to improve the cycle check. This should be investigated. 1196de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (UseCycleCheck && 1197de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairWillFormCycle(C->first, PairableInstUserMap, CurrentPairs)) 1198de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 1199de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1200de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This child can be added, but we may have chosen it in preference 1201de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // to an already-selected child. Check for this here, and if a 1202de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // conflict is found, then remove the previously-selected child 1203de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // before adding this one in its place. 1204de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseMap<ValuePair, size_t>::iterator C2 120543ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop = BestChildren.begin(); C2 != BestChildren.end();) { 1206de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C2->first.first == C->first.first || 1207de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.first == C->first.second || 1208de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.first || 1209de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.second || 1210de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairsConflict(C2->first, C->first, PairableInstUsers)) 121143ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop BestChildren.erase(C2++); 1212de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel else 1213de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++C2; 1214de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1215de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 121643ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop BestChildren.insert(ValuePairWithDepth(C->first, C->second)); 1217de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1218de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1219de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseMap<ValuePair, size_t>::iterator C 122043ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop = BestChildren.begin(), E2 = BestChildren.end(); 1221de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C != E2; ++C) { 1222de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t DepthF = getDepthFactor(C->first.first); 1223de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.push_back(ValuePairWithDepth(C->first, QTop.second+DepthF)); 1224de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 122535564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel } while (!Q.empty()); 1226de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1227de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1228de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function finds the best tree of mututally-compatible connected 1229de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pairs, given the choice of root pairs as an iterator range. 1230de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::findBestTreeFor( 1231de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 1232de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 1233de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 1234de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 1235de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &PairableInstUserMap, 1236de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 1237de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth, 1238de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t &BestEffSize, VPIteratorPair ChoiceRange, 1239de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool UseCycleCheck) { 1240de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<Value *, Value *>::iterator J = ChoiceRange.first; 1241de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel J != ChoiceRange.second; ++J) { 1242de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1243de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Before going any further, make sure that this pair does not 1244de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // conflict with any already-selected pairs (see comment below 1245de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // near the Tree pruning for more details). 1246de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> ChosenPairSet; 1247de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool DoesConflict = false; 1248de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseMap<Value *, Value *>::iterator C = ChosenPairs.begin(), 1249de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = ChosenPairs.end(); C != E; ++C) { 1250de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (pairsConflict(*C, *J, PairableInstUsers, 1251de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UseCycleCheck ? &PairableInstUserMap : 0)) { 1252de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DoesConflict = true; 1253de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1254de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1255de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1256de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairSet.insert(*C); 1257de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1258de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (DoesConflict) continue; 1259de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1260de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (UseCycleCheck && 1261de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairWillFormCycle(*J, PairableInstUserMap, ChosenPairSet)) 1262de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 1263de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1264de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<ValuePair, size_t> Tree; 1265de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel buildInitialTreeFor(CandidatePairs, PairableInsts, ConnectedPairs, 1266de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers, ChosenPairs, Tree, *J); 1267de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1268de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Because we'll keep the child with the largest depth, the largest 1269de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // depth is still the same in the unpruned Tree. 1270de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t MaxDepth = Tree.lookup(*J); 1271de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1272de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugPairSelection) dbgs() << "BBV: found Tree for pair {" 1273de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << *J->first << " <-> " << *J->second << "} of depth " << 1274de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel MaxDepth << " and size " << Tree.size() << "\n"); 1275de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1276de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // At this point the Tree has been constructed, but, may contain 1277de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // contradictory children (meaning that different children of 1278de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // some tree node may be attempting to fuse the same instruction). 1279de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // So now we walk the tree again, in the case of a conflict, 1280de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // keep only the child with the largest depth. To break a tie, 1281de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // favor the first child. 1282de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1283de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> PrunedTree; 1284de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pruneTreeFor(CandidatePairs, PairableInsts, ConnectedPairs, 1285de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers, PairableInstUserMap, ChosenPairs, Tree, 1286de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PrunedTree, *J, UseCycleCheck); 1287de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1288de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t EffSize = 0; 1289de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(), 1290de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = PrunedTree.end(); S != E; ++S) 1291de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel EffSize += getDepthFactor(S->first); 1292de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1293de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugPairSelection) 1294de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel dbgs() << "BBV: found pruned Tree for pair {" 1295de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << *J->first << " <-> " << *J->second << "} of depth " << 1296de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel MaxDepth << " and size " << PrunedTree.size() << 1297de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " (effective size: " << EffSize << ")\n"); 1298bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (MaxDepth >= Config.ReqChainDepth && EffSize > BestEffSize) { 1299de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BestMaxDepth = MaxDepth; 1300de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BestEffSize = EffSize; 1301de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BestTree = PrunedTree; 1302de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1303de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1304de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1305de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1306de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Given the list of candidate pairs, this function selects those 1307de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // that will be fused into vector instructions. 1308de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::choosePairs( 1309de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 1310de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 1311de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 1312de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 1313de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *>& ChosenPairs) { 1314bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng bool UseCycleCheck = 1315bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng CandidatePairs.size() <= Config.MaxCandPairsForCycleCheck; 1316de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> PairableInstUserMap; 1317de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::vector<Value *>::iterator I = PairableInsts.begin(), 1318de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = PairableInsts.end(); I != E; ++I) { 1319de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // The number of possible pairings for this variable: 1320de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t NumChoices = CandidatePairs.count(*I); 1321de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!NumChoices) continue; 1322de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1323de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair ChoiceRange = CandidatePairs.equal_range(*I); 1324de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1325de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // The best pair to choose and its tree: 1326de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t BestMaxDepth = 0, BestEffSize = 0; 1327de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> BestTree; 1328de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel findBestTreeFor(CandidatePairs, PairableInsts, ConnectedPairs, 1329de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers, PairableInstUserMap, ChosenPairs, 1330de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BestTree, BestMaxDepth, BestEffSize, ChoiceRange, 1331de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UseCycleCheck); 1332de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1333de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // A tree has been chosen (or not) at this point. If no tree was 1334de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // chosen, then this instruction, I, cannot be paired (and is no longer 1335de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // considered). 1336de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1337de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (BestTree.size() > 0) 1338de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel dbgs() << "BBV: selected pairs in the best tree for: " 1339de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << *cast<Instruction>(*I) << "\n"); 1340de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1341de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseSet<ValuePair>::iterator S = BestTree.begin(), 1342de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel SE2 = BestTree.end(); S != SE2; ++S) { 1343de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Insert the members of this tree into the list of chosen pairs. 1344de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairs.insert(ValuePair(S->first, S->second)); 1345de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: selected pair: " << *S->first << " <-> " << 1346de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel *S->second << "\n"); 1347de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1348de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Remove all candidate pairs that have values in the chosen tree. 1349de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<Value *, Value *>::iterator K = 1350de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CandidatePairs.begin(); K != CandidatePairs.end();) { 1351de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (K->first == S->first || K->second == S->first || 1352de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K->second == S->second || K->first == S->second) { 1353de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Don't remove the actual pair chosen so that it can be used 1354de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // in subsequent tree selections. 1355de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!(K->first == S->first && K->second == S->second)) 1356de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CandidatePairs.erase(K++); 1357de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel else 1358de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++K; 1359de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 1360de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++K; 1361de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1362de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1363de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1364de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1365de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1366de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: selected " << ChosenPairs.size() << " pairs.\n"); 1367de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1368de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1369de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::string getReplacementName(Instruction *I, bool IsInput, unsigned o, 1370de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned n = 0) { 1371de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!I->hasName()) 1372de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return ""; 1373de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1374de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return (I->getName() + (IsInput ? ".v.i" : ".v.r") + utostr(o) + 1375de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel (n > 0 ? "." + utostr(n) : "")).str(); 1376de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1377de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1378de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns the value that is to be used as the pointer input to the vector 1379de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // instruction that fuses I with J. 1380de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *BBVectorize::getReplacementPointerInput(LLVMContext& Context, 1381de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J, unsigned o, 1382de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool &FlipMemInputs) { 1383de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *IPtr, *JPtr; 1384de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned IAlignment, JAlignment; 1385de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel int64_t OffsetInElmts; 1386de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel (void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, 1387de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel OffsetInElmts); 1388de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1389de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // The pointer value is taken to be the one with the lowest offset. 1390de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *VPtr; 1391de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (OffsetInElmts > 0) { 1392de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPtr = IPtr; 1393de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 1394de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel FlipMemInputs = true; 1395de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPtr = JPtr; 1396de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1397de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1398de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *ArgType = cast<PointerType>(IPtr->getType())->getElementType(); 1399de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *VArgType = getVecTypeForPair(ArgType); 1400de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *VArgPtrType = PointerType::get(VArgType, 1401de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cast<PointerType>(IPtr->getType())->getAddressSpace()); 1402de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return new BitCastInst(VPtr, VArgPtrType, getReplacementName(I, true, o), 1403de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel /* insert before */ FlipMemInputs ? J : I); 1404de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1405de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1406de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::fillNewShuffleMask(LLVMContext& Context, Instruction *J, 1407de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned NumElem, unsigned MaskOffset, unsigned NumInElem, 1408de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned IdxOffset, std::vector<Constant*> &Mask) { 1409de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (unsigned v = 0; v < NumElem/2; ++v) { 1410de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel int m = cast<ShuffleVectorInst>(J)->getMaskValue(v); 1411de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (m < 0) { 1412de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Mask[v+MaskOffset] = UndefValue::get(Type::getInt32Ty(Context)); 1413de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 1414de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned mm = m + (int) IdxOffset; 1415de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (m >= (int) NumInElem) 1416de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel mm += (int) NumInElem; 1417de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1418de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Mask[v+MaskOffset] = 1419de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ConstantInt::get(Type::getInt32Ty(Context), mm); 1420de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1421de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1422de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1423de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1424de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns the value that is to be used as the vector-shuffle mask to the 1425de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // vector instruction that fuses I with J. 1426de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *BBVectorize::getReplacementShuffleMask(LLVMContext& Context, 1427de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J) { 1428de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This is the shuffle mask. We need to append the second 1429de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // mask to the first, and the numbers need to be adjusted. 1430de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1431de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *ArgType = I->getType(); 1432de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *VArgType = getVecTypeForPair(ArgType); 1433de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1434de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Get the total number of elements in the fused vector type. 1435de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // By definition, this must equal the number of elements in 1436de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the final mask. 1437de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned NumElem = cast<VectorType>(VArgType)->getNumElements(); 1438de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Constant*> Mask(NumElem); 1439de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1440de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *OpType = I->getOperand(0)->getType(); 1441de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned NumInElem = cast<VectorType>(OpType)->getNumElements(); 1442de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1443de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // For the mask from the first pair... 1444de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel fillNewShuffleMask(Context, I, NumElem, 0, NumInElem, 0, Mask); 1445de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1446de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // For the mask from the second pair... 1447de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel fillNewShuffleMask(Context, J, NumElem, NumElem/2, NumInElem, NumInElem, 1448de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Mask); 1449de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1450de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return ConstantVector::get(Mask); 1451de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1452de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1453de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns the value to be used as the specified operand of the vector 1454de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // instruction that fuses I with J. 1455de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *BBVectorize::getReplacementInput(LLVMContext& Context, Instruction *I, 1456de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J, unsigned o, bool FlipMemInputs) { 1457de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0); 1458de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), 1); 1459de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1460de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Compute the fused vector type for this operand 1461de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *ArgType = I->getOperand(o)->getType(); 1462de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VectorType *VArgType = getVecTypeForPair(ArgType); 1463de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1464de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *L = I, *H = J; 1465de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (FlipMemInputs) { 1466de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel L = J; 1467de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel H = I; 1468de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1469de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1470de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (ArgType->isVectorTy()) { 1471de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned numElem = cast<VectorType>(VArgType)->getNumElements(); 1472de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Constant*> Mask(numElem); 1473de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (unsigned v = 0; v < numElem; ++v) 1474de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); 1475de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1476de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *BV = new ShuffleVectorInst(L->getOperand(o), 1477de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel H->getOperand(o), 1478de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ConstantVector::get(Mask), 1479de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel getReplacementName(I, true, o)); 1480de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BV->insertBefore(J); 1481de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return BV; 1482de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1483de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1484de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // If these two inputs are the output of another vector instruction, 1485de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // then we should use that output directly. It might be necessary to 1486de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // permute it first. [When pairings are fused recursively, you can 1487de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // end up with cases where a large vector is decomposed into scalars 1488de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // using extractelement instructions, then built into size-2 1489de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // vectors using insertelement and the into larger vectors using 1490de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // shuffles. InstCombine does not simplify all of these cases well, 1491de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // and so we make sure that shuffles are generated here when possible. 1492de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ExtractElementInst *LEE 1493de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel = dyn_cast<ExtractElementInst>(L->getOperand(o)); 1494de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ExtractElementInst *HEE 1495de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel = dyn_cast<ExtractElementInst>(H->getOperand(o)); 1496de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1497de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (LEE && HEE && 1498de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel LEE->getOperand(0)->getType() == HEE->getOperand(0)->getType()) { 1499de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VectorType *EEType = cast<VectorType>(LEE->getOperand(0)->getType()); 1500de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned LowIndx = cast<ConstantInt>(LEE->getOperand(1))->getZExtValue(); 1501de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned HighIndx = cast<ConstantInt>(HEE->getOperand(1))->getZExtValue(); 1502de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (LEE->getOperand(0) == HEE->getOperand(0)) { 1503de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (LowIndx == 0 && HighIndx == 1) 1504de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return LEE->getOperand(0); 15051230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop 1506de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Constant*> Mask(2); 1507de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Mask[0] = ConstantInt::get(Type::getInt32Ty(Context), LowIndx); 1508de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Mask[1] = ConstantInt::get(Type::getInt32Ty(Context), HighIndx); 1509de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1510de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *BV = new ShuffleVectorInst(LEE->getOperand(0), 1511de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UndefValue::get(EEType), 1512de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ConstantVector::get(Mask), 1513de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel getReplacementName(I, true, o)); 1514de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BV->insertBefore(J); 1515de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return BV; 1516de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1517de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1518de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Constant*> Mask(2); 1519de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel HighIndx += EEType->getNumElements(); 1520de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Mask[0] = ConstantInt::get(Type::getInt32Ty(Context), LowIndx); 1521de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Mask[1] = ConstantInt::get(Type::getInt32Ty(Context), HighIndx); 1522de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1523de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *BV = new ShuffleVectorInst(LEE->getOperand(0), 1524de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel HEE->getOperand(0), 1525de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ConstantVector::get(Mask), 1526de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel getReplacementName(I, true, o)); 1527de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BV->insertBefore(J); 1528de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return BV; 1529de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1530de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1531de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *BV1 = InsertElementInst::Create( 1532de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UndefValue::get(VArgType), 1533de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel L->getOperand(o), CV0, 1534de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel getReplacementName(I, true, o, 1)); 1535de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BV1->insertBefore(I); 1536de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *BV2 = InsertElementInst::Create(BV1, H->getOperand(o), 1537de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CV1, 1538de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel getReplacementName(I, true, o, 2)); 1539de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BV2->insertBefore(J); 1540de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return BV2; 1541de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1542de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1543de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function creates an array of values that will be used as the inputs 1544de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // to the vector instruction that fuses I with J. 1545de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::getReplacementInputsForPair(LLVMContext& Context, 1546de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J, 1547de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel SmallVector<Value *, 3> &ReplacedOperands, 1548de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool &FlipMemInputs) { 1549de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel FlipMemInputs = false; 1550de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned NumOperands = I->getNumOperands(); 1551de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1552de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) { 1553de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Iterate backward so that we look at the store pointer 1554de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // first and know whether or not we need to flip the inputs. 1555de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1556de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (isa<LoadInst>(I) || (o == 1 && isa<StoreInst>(I))) { 1557de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This is the pointer for a load/store instruction. 1558de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ReplacedOperands[o] = getReplacementPointerInput(Context, I, J, o, 1559de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel FlipMemInputs); 1560de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 15616173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel } else if (isa<CallInst>(I)) { 1562de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Function *F = cast<CallInst>(I)->getCalledFunction(); 1563de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned IID = F->getIntrinsicID(); 15646173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel if (o == NumOperands-1) { 15656173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel BasicBlock &BB = *I->getParent(); 1566bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng 15676173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel Module *M = BB.getParent()->getParent(); 15686173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel Type *ArgType = I->getType(); 15696173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel Type *VArgType = getVecTypeForPair(ArgType); 1570bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng 15716173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel // FIXME: is it safe to do this here? 15726173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel ReplacedOperands[o] = Intrinsic::getDeclaration(M, 15736173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel (Intrinsic::ID) IID, VArgType); 15746173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel continue; 15756173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel } else if (IID == Intrinsic::powi && o == 1) { 15766173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel // The second argument of powi is a single integer and we've already 15776173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel // checked that both arguments are equal. As a result, we just keep 15786173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel // I's second argument. 15796173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel ReplacedOperands[o] = I->getOperand(o); 15806173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel continue; 15816173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel } 1582de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else if (isa<ShuffleVectorInst>(I) && o == NumOperands-1) { 1583de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ReplacedOperands[o] = getReplacementShuffleMask(Context, I, J); 1584de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 1585de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1586de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1587de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ReplacedOperands[o] = 1588de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel getReplacementInput(Context, I, J, o, FlipMemInputs); 1589de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1590de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1591de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1592de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function creates two values that represent the outputs of the 1593de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // original I and J instructions. These are generally vector shuffles 1594de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // or extracts. In many cases, these will end up being unused and, thus, 1595de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // eliminated by later passes. 1596de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::replaceOutputsOfPair(LLVMContext& Context, Instruction *I, 1597de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J, Instruction *K, 1598de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *&InsertionPt, 1599de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *&K1, Instruction *&K2, 1600de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool &FlipMemInputs) { 1601de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0); 1602de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), 1); 1603de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1604de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (isa<StoreInst>(I)) { 1605de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AA->replaceWithNewValue(I, K); 1606de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AA->replaceWithNewValue(J, K); 1607de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 1608de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *IType = I->getType(); 1609de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *VType = getVecTypeForPair(IType); 1610de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1611de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (IType->isVectorTy()) { 1612de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned numElem = cast<VectorType>(IType)->getNumElements(); 1613de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Constant*> Mask1(numElem), Mask2(numElem); 1614de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (unsigned v = 0; v < numElem; ++v) { 1615de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v); 1616de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElem+v); 1617de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1618de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1619de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K1 = new ShuffleVectorInst(K, UndefValue::get(VType), 1620de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ConstantVector::get( 1621de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel FlipMemInputs ? Mask2 : Mask1), 1622de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel getReplacementName(K, false, 1)); 1623de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K2 = new ShuffleVectorInst(K, UndefValue::get(VType), 1624de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ConstantVector::get( 1625de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel FlipMemInputs ? Mask1 : Mask2), 1626de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel getReplacementName(K, false, 2)); 1627de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 1628de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K1 = ExtractElementInst::Create(K, FlipMemInputs ? CV1 : CV0, 1629de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel getReplacementName(K, false, 1)); 1630de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K2 = ExtractElementInst::Create(K, FlipMemInputs ? CV0 : CV1, 1631de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel getReplacementName(K, false, 2)); 1632de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1633de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1634de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K1->insertAfter(K); 1635de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K2->insertAfter(K1); 1636de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel InsertionPt = K2; 1637de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1638de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1639de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1640de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Move all uses of the function I (including pairing-induced uses) after J. 1641de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::canMoveUsesOfIAfterJ(BasicBlock &BB, 1642de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &LoadMoveSet, 1643de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J) { 1644de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Skip to the first instruction past I. 1645ded681d2725907c7de9db53d59cee0c51fad6fcbBenjamin Kramer BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I)); 1646de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1647de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> Users; 1648de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker WriteSet(*AA); 1649de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (; cast<Instruction>(L) != J; ++L) 1650de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel (void) trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSet); 1651de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1652de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel assert(cast<Instruction>(L) == J && 1653de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel "Tracking has not proceeded far enough to check for dependencies"); 1654de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // If J is now in the use set of I, then trackUsesOfI will return true 1655de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // and we have a dependency cycle (and the fusing operation must abort). 1656de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return !trackUsesOfI(Users, WriteSet, I, J, true, &LoadMoveSet); 1657de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1658de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1659de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Move all uses of the function I (including pairing-induced uses) after J. 1660de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::moveUsesOfIAfterJ(BasicBlock &BB, 1661de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &LoadMoveSet, 1662de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *&InsertionPt, 1663de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J) { 1664de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Skip to the first instruction past I. 1665ded681d2725907c7de9db53d59cee0c51fad6fcbBenjamin Kramer BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I)); 1666de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1667de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> Users; 1668de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker WriteSet(*AA); 1669de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (; cast<Instruction>(L) != J;) { 1670de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSet)) { 1671de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Move this instruction 1672de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *InstToMove = L; ++L; 1673de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1674de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: moving: " << *InstToMove << 1675de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " to after " << *InsertionPt << "\n"); 1676de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel InstToMove->removeFromParent(); 1677de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel InstToMove->insertAfter(InsertionPt); 1678de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel InsertionPt = InstToMove; 1679de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 1680de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++L; 1681de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1682de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1683de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1684de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1685de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Collect all load instruction that are in the move set of a given first 1686de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pair member. These loads depend on the first instruction, I, and so need 1687de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // to be moved after J (the second instruction) when the pair is fused. 1688de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::collectPairLoadMoveSet(BasicBlock &BB, 1689de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 1690de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &LoadMoveSet, 1691de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I) { 1692de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Skip to the first instruction past I. 1693ded681d2725907c7de9db53d59cee0c51fad6fcbBenjamin Kramer BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I)); 1694de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1695de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> Users; 1696de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker WriteSet(*AA); 1697de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1698de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Note: We cannot end the loop when we reach J because J could be moved 1699de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // farther down the use chain by another instruction pairing. Also, J 1700de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // could be before I if this is an inverted input. 1701de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (BasicBlock::iterator E = BB.end(); cast<Instruction>(L) != E; ++L) { 1702de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (trackUsesOfI(Users, WriteSet, I, L)) { 1703de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (L->mayReadFromMemory()) 1704de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel LoadMoveSet.insert(ValuePair(L, I)); 1705de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1706de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1707de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1708de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1709de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // In cases where both load/stores and the computation of their pointers 1710de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // are chosen for vectorization, we can end up in a situation where the 1711de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // aliasing analysis starts returning different query results as the 1712de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // process of fusing instruction pairs continues. Because the algorithm 1713de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // relies on finding the same use trees here as were found earlier, we'll 1714de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // need to precompute the necessary aliasing information here and then 1715de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // manually update it during the fusion process. 1716de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::collectLoadMoveSet(BasicBlock &BB, 1717de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 1718de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 1719de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &LoadMoveSet) { 1720de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::vector<Value *>::iterator PI = PairableInsts.begin(), 1721de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PIE = PairableInsts.end(); PI != PIE; ++PI) { 1722de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(*PI); 1723de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (P == ChosenPairs.end()) continue; 1724de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1725de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I = cast<Instruction>(P->first); 1726de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel collectPairLoadMoveSet(BB, ChosenPairs, LoadMoveSet, I); 1727de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1728de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1729de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1730de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function fuses the chosen instruction pairs into vector instructions, 1731de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // taking care preserve any needed scalar outputs and, then, it reorders the 1732de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // remaining instructions as needed (users of the first member of the pair 1733de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // need to be moved to after the location of the second member of the pair 1734de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // because the vector instruction is inserted in the location of the pair's 1735de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // second member). 1736de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::fuseChosenPairs(BasicBlock &BB, 1737de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 1738de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs) { 1739de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel LLVMContext& Context = BB.getContext(); 1740de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1741de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // During the vectorization process, the order of the pairs to be fused 1742de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // could be flipped. So we'll add each pair, flipped, into the ChosenPairs 1743de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // list. After a pair is fused, the flipped pair is removed from the list. 1744de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<ValuePair> FlippedPairs; 1745de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel FlippedPairs.reserve(ChosenPairs.size()); 1746de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseMap<Value *, Value *>::iterator P = ChosenPairs.begin(), 1747de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = ChosenPairs.end(); P != E; ++P) 1748de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel FlippedPairs.push_back(ValuePair(P->second, P->first)); 1749de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::vector<ValuePair>::iterator P = FlippedPairs.begin(), 1750de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = FlippedPairs.end(); P != E; ++P) 1751de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairs.insert(*P); 1752de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1753de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> LoadMoveSet; 1754de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel collectLoadMoveSet(BB, PairableInsts, ChosenPairs, LoadMoveSet); 1755de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1756de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n"); 1757de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1758de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) { 1759de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(PI); 1760de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (P == ChosenPairs.end()) { 1761de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++PI; 1762de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 1763de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1764de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1765de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (getDepthFactor(P->first) == 0) { 1766de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // These instructions are not really fused, but are tracked as though 1767de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // they are. Any case in which it would be interesting to fuse them 1768de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // will be taken care of by InstCombine. 1769de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel --NumFusedOps; 1770de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++PI; 1771de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 1772de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1773de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1774de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I = cast<Instruction>(P->first), 1775de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel *J = cast<Instruction>(P->second); 1776de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1777de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: fusing: " << *I << 1778de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " <-> " << *J << "\n"); 1779de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1780de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Remove the pair and flipped pair from the list. 1781de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *>::iterator FP = ChosenPairs.find(P->second); 1782de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel assert(FP != ChosenPairs.end() && "Flipped pair not found in list"); 1783de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairs.erase(FP); 1784de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairs.erase(P); 1785de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1786de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!canMoveUsesOfIAfterJ(BB, LoadMoveSet, I, J)) { 1787de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: fusion of: " << *I << 1788de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " <-> " << *J << 1789de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " aborted because of non-trivial dependency cycle\n"); 1790de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel --NumFusedOps; 1791de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++PI; 1792de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 1793de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1794de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1795de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool FlipMemInputs; 1796de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned NumOperands = I->getNumOperands(); 1797de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel SmallVector<Value *, 3> ReplacedOperands(NumOperands); 1798de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel getReplacementInputsForPair(Context, I, J, ReplacedOperands, 1799de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel FlipMemInputs); 1800de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1801de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Make a copy of the original operation, change its type to the vector 1802de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // type and replace its operands with the vector operands. 1803de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *K = I->clone(); 1804de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (I->hasName()) K->takeName(I); 1805de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1806de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!isa<StoreInst>(K)) 1807de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K->mutateType(getVecTypeForPair(I->getType())); 1808de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1809de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (unsigned o = 0; o < NumOperands; ++o) 1810de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K->setOperand(o, ReplacedOperands[o]); 1811de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1812de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // If we've flipped the memory inputs, make sure that we take the correct 1813de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // alignment. 1814de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (FlipMemInputs) { 1815de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (isa<StoreInst>(K)) 1816de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cast<StoreInst>(K)->setAlignment(cast<StoreInst>(J)->getAlignment()); 1817de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel else 1818de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cast<LoadInst>(K)->setAlignment(cast<LoadInst>(J)->getAlignment()); 1819de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1820de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1821de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K->insertAfter(J); 1822de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1823de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Instruction insertion point: 1824de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *InsertionPt = K; 1825de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *K1 = 0, *K2 = 0; 1826de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel replaceOutputsOfPair(Context, I, J, K, InsertionPt, K1, K2, 1827de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel FlipMemInputs); 1828de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1829de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // The use tree of the first original instruction must be moved to after 1830de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the location of the second instruction. The entire use tree of the 1831de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // first instruction is disjoint from the input tree of the second 1832de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // (by definition), and so commutes with it. 1833de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1834de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel moveUsesOfIAfterJ(BB, LoadMoveSet, InsertionPt, I, J); 1835de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1836de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!isa<StoreInst>(I)) { 1837de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel I->replaceAllUsesWith(K1); 1838de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel J->replaceAllUsesWith(K2); 1839de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AA->replaceWithNewValue(I, K1); 1840de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AA->replaceWithNewValue(J, K2); 1841de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1842de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1843de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Instructions that may read from memory may be in the load move set. 1844de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Once an instruction is fused, we no longer need its move set, and so 1845de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the values of the map never need to be updated. However, when a load 1846de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // is fused, we need to merge the entries from both instructions in the 1847de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pair in case those instructions were in the move set of some other 1848de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // yet-to-be-fused pair. The loads in question are the keys of the map. 1849de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (I->mayReadFromMemory()) { 1850de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<ValuePair> NewSetMembers; 1851de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair IPairRange = LoadMoveSet.equal_range(I); 1852de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair JPairRange = LoadMoveSet.equal_range(J); 1853de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<Value *, Value *>::iterator N = IPairRange.first; 1854de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel N != IPairRange.second; ++N) 1855de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel NewSetMembers.push_back(ValuePair(K, N->second)); 1856de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<Value *, Value *>::iterator N = JPairRange.first; 1857de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel N != JPairRange.second; ++N) 1858de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel NewSetMembers.push_back(ValuePair(K, N->second)); 1859de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::vector<ValuePair>::iterator A = NewSetMembers.begin(), 1860de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AE = NewSetMembers.end(); A != AE; ++A) 1861de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel LoadMoveSet.insert(*A); 1862de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1863de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1864de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Before removing I, set the iterator to the next instruction. 1865de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PI = llvm::next(BasicBlock::iterator(I)); 1866de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (cast<Instruction>(PI) == J) 1867de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++PI; 1868de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1869de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel SE->forgetValue(I); 1870de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel SE->forgetValue(J); 1871de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel I->eraseFromParent(); 1872de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel J->eraseFromParent(); 1873de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1874de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1875de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: final: \n" << BB << "\n"); 1876de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1877de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel} 1878de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1879de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelchar BBVectorize::ID = 0; 1880de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic const char bb_vectorize_name[] = "Basic-Block Vectorization"; 1881de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelINITIALIZE_PASS_BEGIN(BBVectorize, BBV_NAME, bb_vectorize_name, false, false) 1882de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelINITIALIZE_AG_DEPENDENCY(AliasAnalysis) 1883de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelINITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 1884de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelINITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false) 1885de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1886bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin ZhengBasicBlockPass *llvm::createBBVectorizePass(const VectorizeConfig &C) { 1887bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng return new BBVectorize(C); 1888de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel} 1889de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1890bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zhengbool 1891bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zhengllvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB, const VectorizeConfig &C) { 1892bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng BBVectorize BBVectorizer(P, C); 189387825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng return BBVectorizer.vectorizeBB(BB); 189487825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng} 1895bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng 1896bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng//===----------------------------------------------------------------------===// 1897bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin ZhengVectorizeConfig::VectorizeConfig() { 1898bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng VectorBits = ::VectorBits; 189986312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeInts = !::NoInts; 190086312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeFloats = !::NoFloats; 190186312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeCasts = !::NoCasts; 190286312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeMath = !::NoMath; 190386312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeFMA = !::NoFMA; 1904fc3665c87519850f629c9565535e3be447e10addHal Finkel VectorizeSelect = !::NoSelect; 190586312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeMemOps = !::NoMemOps; 1906bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng AlignedOnly = ::AlignedOnly; 1907bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng ReqChainDepth= ::ReqChainDepth; 1908bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng SearchLimit = ::SearchLimit; 1909bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng MaxCandPairsForCycleCheck = ::MaxCandPairsForCycleCheck; 1910bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng SplatBreaksChain = ::SplatBreaksChain; 1911bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng MaxInsts = ::MaxInsts; 1912bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng MaxIter = ::MaxIter; 1913bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng NoMemOpBoost = ::NoMemOpBoost; 1914bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng FastDep = ::FastDep; 1915bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng} 1916