1de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel//===- BBVectorize.cpp - A Basic-Block Vectorizer -------------------------===// 2de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// 3de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// The LLVM Compiler Infrastructure 4de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// 5de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// This file is distributed under the University of Illinois Open Source 6de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// License. See LICENSE.TXT for details. 7de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// 8de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel//===----------------------------------------------------------------------===// 9de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// 10de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// This file implements a basic-block vectorization pass. The algorithm was 11de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// inspired by that used by the Vienna MAP Vectorizor by Franchetti and Kral, 12de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// et al. It works by looking for chains of pairable operations and then 13de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// pairing them. 14de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// 15de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel//===----------------------------------------------------------------------===// 16de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 17de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#define BBV_NAME "bb-vectorize" 18de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#define DEBUG_TYPE BBV_NAME 19de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Constants.h" 20de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/DerivedTypes.h" 21de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Function.h" 22de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Instructions.h" 23de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/IntrinsicInst.h" 24de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Intrinsics.h" 25de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/LLVMContext.h" 26ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel#include "llvm/Metadata.h" 27de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Pass.h" 28de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Type.h" 29de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/DenseMap.h" 30de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/DenseSet.h" 31de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/SmallVector.h" 32de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/Statistic.h" 33de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/STLExtras.h" 34de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/StringExtras.h" 35de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/AliasAnalysis.h" 36de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/AliasSetTracker.h" 37de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/ScalarEvolution.h" 38de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/ScalarEvolutionExpressions.h" 39de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/ValueTracking.h" 40de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Support/CommandLine.h" 41de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Support/Debug.h" 42de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Support/raw_ostream.h" 43de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Support/ValueHandle.h" 44de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Target/TargetData.h" 4564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel#include "llvm/Transforms/Utils/Local.h" 46de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Transforms/Vectorize.h" 47de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include <algorithm> 48de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include <map> 49de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelusing namespace llvm; 50de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 51de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned> 52de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelReqChainDepth("bb-vectorize-req-chain-depth", cl::init(6), cl::Hidden, 53de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("The required chain depth for vectorization")); 54de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 55de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned> 56de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelSearchLimit("bb-vectorize-search-limit", cl::init(400), cl::Hidden, 57de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("The maximum search distance for instruction pairs")); 58de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 59de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 60de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelSplatBreaksChain("bb-vectorize-splat-breaks-chain", cl::init(false), cl::Hidden, 61de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Replicating one element to a pair breaks the chain")); 62de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 63de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned> 64de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelVectorBits("bb-vectorize-vector-bits", cl::init(128), cl::Hidden, 65de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("The size of the native vector registers")); 66de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 67de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned> 68de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelMaxIter("bb-vectorize-max-iter", cl::init(0), cl::Hidden, 69de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("The maximum number of pairing iterations")); 70de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 7164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkelstatic cl::opt<bool> 7264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal FinkelPow2LenOnly("bb-vectorize-pow2-len-only", cl::init(false), cl::Hidden, 7364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel cl::desc("Don't try to form non-2^n-length vectors")); 7464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 75de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned> 765d4e18bc39fea892f523d960213906d296d3cb38Hal FinkelMaxInsts("bb-vectorize-max-instr-per-group", cl::init(500), cl::Hidden, 775d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel cl::desc("The maximum number of pairable instructions per group")); 785d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 795d4e18bc39fea892f523d960213906d296d3cb38Hal Finkelstatic cl::opt<unsigned> 80de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelMaxCandPairsForCycleCheck("bb-vectorize-max-cycle-check-pairs", cl::init(200), 81de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::Hidden, cl::desc("The maximum number of candidate pairs with which to use" 82de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " a full cycle check")); 83de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 84de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 85768edf3cd037aab10391abc279f71470df8e3156Hal FinkelNoBools("bb-vectorize-no-bools", cl::init(false), cl::Hidden, 86768edf3cd037aab10391abc279f71470df8e3156Hal Finkel cl::desc("Don't try to vectorize boolean (i1) values")); 87768edf3cd037aab10391abc279f71470df8e3156Hal Finkel 88768edf3cd037aab10391abc279f71470df8e3156Hal Finkelstatic cl::opt<bool> 89de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoInts("bb-vectorize-no-ints", cl::init(false), cl::Hidden, 90de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize integer values")); 91de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 92de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 93de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoFloats("bb-vectorize-no-floats", cl::init(false), cl::Hidden, 94de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize floating-point values")); 95de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 96de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 97f3f5a1e6f77a842ccb24cc81766437da5197d712Hal FinkelNoPointers("bb-vectorize-no-pointers", cl::init(false), cl::Hidden, 98f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel cl::desc("Don't try to vectorize pointer values")); 99f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel 100f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkelstatic cl::opt<bool> 101de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoCasts("bb-vectorize-no-casts", cl::init(false), cl::Hidden, 102de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize casting (conversion) operations")); 103de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 104de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 105de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoMath("bb-vectorize-no-math", cl::init(false), cl::Hidden, 106de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize floating-point math intrinsics")); 107de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 108de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 109de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoFMA("bb-vectorize-no-fma", cl::init(false), cl::Hidden, 110de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize the fused-multiply-add intrinsic")); 111de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 112de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 113fc3665c87519850f629c9565535e3be447e10addHal FinkelNoSelect("bb-vectorize-no-select", cl::init(false), cl::Hidden, 114fc3665c87519850f629c9565535e3be447e10addHal Finkel cl::desc("Don't try to vectorize select instructions")); 115fc3665c87519850f629c9565535e3be447e10addHal Finkel 116fc3665c87519850f629c9565535e3be447e10addHal Finkelstatic cl::opt<bool> 117e415f96b6a43ac8861148a11a4258bc38c247e8fHal FinkelNoCmp("bb-vectorize-no-cmp", cl::init(false), cl::Hidden, 118e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel cl::desc("Don't try to vectorize comparison instructions")); 119e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel 120e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkelstatic cl::opt<bool> 121f3f5a1e6f77a842ccb24cc81766437da5197d712Hal FinkelNoGEP("bb-vectorize-no-gep", cl::init(false), cl::Hidden, 122f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel cl::desc("Don't try to vectorize getelementptr instructions")); 123f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel 124f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkelstatic cl::opt<bool> 125de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoMemOps("bb-vectorize-no-mem-ops", cl::init(false), cl::Hidden, 126de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize loads and stores")); 127de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 128de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 129de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelAlignedOnly("bb-vectorize-aligned-only", cl::init(false), cl::Hidden, 130de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Only generate aligned loads and stores")); 131de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 132de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 133edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal FinkelNoMemOpBoost("bb-vectorize-no-mem-op-boost", 134edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel cl::init(false), cl::Hidden, 135edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel cl::desc("Don't boost the chain-depth contribution of loads and stores")); 136edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel 137edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkelstatic cl::opt<bool> 138de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelFastDep("bb-vectorize-fast-dep", cl::init(false), cl::Hidden, 139de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Use a fast instruction dependency analysis")); 140de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 141de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#ifndef NDEBUG 142de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 143de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelDebugInstructionExamination("bb-vectorize-debug-instruction-examination", 144de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::init(false), cl::Hidden, 145de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("When debugging is enabled, output information on the" 146de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " instruction-examination process")); 147de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 148de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelDebugCandidateSelection("bb-vectorize-debug-candidate-selection", 149de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::init(false), cl::Hidden, 150de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("When debugging is enabled, output information on the" 151de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " candidate-selection process")); 152de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 153de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelDebugPairSelection("bb-vectorize-debug-pair-selection", 154de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::init(false), cl::Hidden, 155de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("When debugging is enabled, output information on the" 156de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " pair-selection process")); 157de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 158de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelDebugCycleCheck("bb-vectorize-debug-cycle-check", 159de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::init(false), cl::Hidden, 160de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("When debugging is enabled, output information on the" 161de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " cycle-checking process")); 162de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#endif 163de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 164de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelSTATISTIC(NumFusedOps, "Number of operations fused by bb-vectorize"); 165de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 166de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelnamespace { 167de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel struct BBVectorize : public BasicBlockPass { 168de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel static char ID; // Pass identification, replacement for typeid 169bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng 170940371bc65570ec0add1ede4f4d9f0a41ba25e09Hongbin Zheng const VectorizeConfig Config; 171bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng 172bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng BBVectorize(const VectorizeConfig &C = VectorizeConfig()) 173bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng : BasicBlockPass(ID), Config(C) { 174de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel initializeBBVectorizePass(*PassRegistry::getPassRegistry()); 175de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 176de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 177bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng BBVectorize(Pass *P, const VectorizeConfig &C) 178bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng : BasicBlockPass(ID), Config(C) { 17987825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng AA = &P->getAnalysis<AliasAnalysis>(); 18087825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng SE = &P->getAnalysis<ScalarEvolution>(); 18187825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng TD = P->getAnalysisIfAvailable<TargetData>(); 18287825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng } 18387825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng 184de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typedef std::pair<Value *, Value *> ValuePair; 185de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typedef std::pair<ValuePair, size_t> ValuePairWithDepth; 186de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typedef std::pair<ValuePair, ValuePair> VPPair; // A ValuePair pair 187de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typedef std::pair<std::multimap<Value *, Value *>::iterator, 188de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *>::iterator> VPIteratorPair; 189de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typedef std::pair<std::multimap<ValuePair, ValuePair>::iterator, 190de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair>::iterator> 191de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPPIteratorPair; 192de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 193de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasAnalysis *AA; 194de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ScalarEvolution *SE; 195de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel TargetData *TD; 196de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 197de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // FIXME: const correct? 198de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 19964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool vectorizePairs(BasicBlock &BB, bool NonPow2Len = false); 200de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2015d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel bool getCandidatePairs(BasicBlock &BB, 2025d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel BasicBlock::iterator &Start, 203de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 20464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Value *> &PairableInsts, bool NonPow2Len); 205de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 206de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void computeConnectedPairs(std::multimap<Value *, Value *> &CandidatePairs, 207de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 208de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs); 209de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 210de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void buildDepMap(BasicBlock &BB, 211de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 212de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 213de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers); 214de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 215de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void choosePairs(std::multimap<Value *, Value *> &CandidatePairs, 216de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 217de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 218de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 219de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *>& ChosenPairs); 220de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 221de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void fuseChosenPairs(BasicBlock &BB, 222de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 223de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *>& ChosenPairs); 224de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 225de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore); 226de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 227de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool areInstsCompatible(Instruction *I, Instruction *J, 22864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool IsSimpleLoadStore, bool NonPow2Len); 229de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 230de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool trackUsesOfI(DenseSet<Value *> &Users, 231de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker &WriteSet, Instruction *I, 232de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J, bool UpdateUsers = true, 233de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> *LoadMoveSet = 0); 2341230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop 235de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void computePairsConnectedTo( 236de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 237de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 238de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 239de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ValuePair P); 240de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 241de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool pairsConflict(ValuePair P, ValuePair Q, 242de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 243de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> *PairableInstUserMap = 0); 244de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 245de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool pairWillFormCycle(ValuePair P, 246de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &PairableInstUsers, 247de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &CurrentPairs); 248de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 249de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void pruneTreeFor( 250de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 251de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 252de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 253de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 254de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &PairableInstUserMap, 255de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 256de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<ValuePair, size_t> &Tree, 257de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PrunedTree, ValuePair J, 258de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool UseCycleCheck); 259de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 260de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void buildInitialTreeFor( 261de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 262de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 263de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 264de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 265de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 266de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<ValuePair, size_t> &Tree, ValuePair J); 267de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 268de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void findBestTreeFor( 269de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 270de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 271de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 272de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 273de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &PairableInstUserMap, 274de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 275de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth, 276de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t &BestEffSize, VPIteratorPair ChoiceRange, 277de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool UseCycleCheck); 278de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 279de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I, 280282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel Instruction *J, unsigned o, bool FlipMemInputs); 281de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 282de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void fillNewShuffleMask(LLVMContext& Context, Instruction *J, 28364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned MaskOffset, unsigned NumInElem, 28464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned NumInElem1, unsigned IdxOffset, 28564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant*> &Mask); 286de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 287de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *getReplacementShuffleMask(LLVMContext& Context, Instruction *I, 288de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J); 289de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 29064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool expandIEChain(LLVMContext& Context, Instruction *I, Instruction *J, 29164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned o, Value *&LOp, unsigned numElemL, 29264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeL, Type *ArgTypeR, 29364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned IdxOff = 0); 29464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 295de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *getReplacementInput(LLVMContext& Context, Instruction *I, 296de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J, unsigned o, bool FlipMemInputs); 297de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 298de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void getReplacementInputsForPair(LLVMContext& Context, Instruction *I, 299de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J, SmallVector<Value *, 3> &ReplacedOperands, 300282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel bool FlipMemInputs); 301de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 302de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void replaceOutputsOfPair(LLVMContext& Context, Instruction *I, 303de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J, Instruction *K, 304de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *&InsertionPt, Instruction *&K1, 305282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel Instruction *&K2, bool FlipMemInputs); 306de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 307de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void collectPairLoadMoveSet(BasicBlock &BB, 308de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 309de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &LoadMoveSet, 310de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I); 311de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 312de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void collectLoadMoveSet(BasicBlock &BB, 313de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 314de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 315de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &LoadMoveSet); 316de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 317282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel void collectPtrInfo(std::vector<Value *> &PairableInsts, 318282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel DenseMap<Value *, Value *> &ChosenPairs, 319282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel DenseSet<Value *> &LowPtrInsts); 320282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel 321de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool canMoveUsesOfIAfterJ(BasicBlock &BB, 322de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &LoadMoveSet, 323de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J); 324de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 325de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void moveUsesOfIAfterJ(BasicBlock &BB, 326de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &LoadMoveSet, 327de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *&InsertionPt, 328de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J); 329de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 330ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel void combineMetadata(Instruction *K, const Instruction *J); 331ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel 33287825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng bool vectorizeBB(BasicBlock &BB) { 333de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool changed = false; 334de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Iterate a sufficient number of times to merge types of size 1 bit, 335de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // then 2 bits, then 4, etc. up to half of the target vector width of the 336de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // target vector register. 33764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned n = 1; 33864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned v = 2; 339bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng v <= Config.VectorBits && (!Config.MaxIter || n <= Config.MaxIter); 340de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel v *= 2, ++n) { 341bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng DEBUG(dbgs() << "BBV: fusing loop #" << n << 342de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " for " << BB.getName() << " in " << 343de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BB.getParent()->getName() << "...\n"); 344de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (vectorizePairs(BB)) 345de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel changed = true; 346de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel else 347de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 348de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 349de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 35064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (changed && !Pow2LenOnly) { 35164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ++n; 35264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; !Config.MaxIter || n <= Config.MaxIter; ++n) { 35364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel DEBUG(dbgs() << "BBV: fusing for non-2^n-length vectors loop #: " << 35464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel n << " for " << BB.getName() << " in " << 35564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel BB.getParent()->getName() << "...\n"); 35664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!vectorizePairs(BB, true)) break; 35764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 35864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 35964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 360de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: done!\n"); 361de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return changed; 362de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 363de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 36487825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng virtual bool runOnBasicBlock(BasicBlock &BB) { 36587825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng AA = &getAnalysis<AliasAnalysis>(); 36687825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng SE = &getAnalysis<ScalarEvolution>(); 36787825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng TD = getAnalysisIfAvailable<TargetData>(); 36887825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng 36987825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng return vectorizeBB(BB); 37087825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng } 37187825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng 372de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel virtual void getAnalysisUsage(AnalysisUsage &AU) const { 373de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BasicBlockPass::getAnalysisUsage(AU); 374de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AU.addRequired<AliasAnalysis>(); 375de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AU.addRequired<ScalarEvolution>(); 376de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AU.addPreserved<AliasAnalysis>(); 377de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AU.addPreserved<ScalarEvolution>(); 3787e004d177fe76145f75a9417ed2e281f1b9abaf7Hal Finkel AU.setPreservesCFG(); 379de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 380de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 38164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel static inline VectorType *getVecTypeForPair(Type *ElemTy, Type *Elem2Ty) { 38264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel assert(ElemTy->getScalarType() == Elem2Ty->getScalarType() && 38364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel "Cannot form vector from incompatible scalar types"); 38464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *STy = ElemTy->getScalarType(); 38564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 38664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElem; 387de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (VectorType *VTy = dyn_cast<VectorType>(ElemTy)) { 38864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElem = VTy->getNumElements(); 38964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 39064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElem = 1; 39164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 39264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 39364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (VectorType *VTy = dyn_cast<VectorType>(Elem2Ty)) { 39464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElem += VTy->getNumElements(); 39564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 39664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElem += 1; 397de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 3987e004d177fe76145f75a9417ed2e281f1b9abaf7Hal Finkel 39964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return VectorType::get(STy, numElem); 40064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 40164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 40264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel static inline void getInstructionTypes(Instruction *I, 40364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *&T1, Type *&T2) { 40464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (isa<StoreInst>(I)) { 40564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // For stores, it is the value type, not the pointer type that matters 40664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // because the value is what will come from a vector register. 40764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 40864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *IVal = cast<StoreInst>(I)->getValueOperand(); 40964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel T1 = IVal->getType(); 41064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 41164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel T1 = I->getType(); 41264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 41364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 41464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (I->isCast()) 41564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel T2 = cast<CastInst>(I)->getSrcTy(); 41664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else 41764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel T2 = T1; 418de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 419de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 420de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns the weight associated with the provided value. A chain of 421de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // candidate pairs has a length given by the sum of the weights of its 422de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // members (one weight per pair; the weight of each member of the pair 423de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // is assumed to be the same). This length is then compared to the 424de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // chain-length threshold to determine if a given chain is significant 425de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // enough to be vectorized. The length is also used in comparing 426de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // candidate chains where longer chains are considered to be better. 427de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Note: when this function returns 0, the resulting instructions are 428de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // not actually fused. 429bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng inline size_t getDepthFactor(Value *V) { 430de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // InsertElement and ExtractElement have a depth factor of zero. This is 431de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // for two reasons: First, they cannot be usefully fused. Second, because 432de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the pass generates a lot of these, they can confuse the simple metric 433de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // used to compare the trees in the next iteration. Thus, giving them a 434de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // weight of zero allows the pass to essentially ignore them in 435de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // subsequent iterations when looking for vectorization opportunities 436de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // while still tracking dependency chains that flow through those 437de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // instructions. 438de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (isa<InsertElementInst>(V) || isa<ExtractElementInst>(V)) 439de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return 0; 440de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 441edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel // Give a load or store half of the required depth so that load/store 442edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel // pairs will vectorize. 443bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (!Config.NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V))) 444bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng return Config.ReqChainDepth/2; 445edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel 446de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return 1; 447de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 448de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 449de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This determines the relative offset of two loads or stores, returning 450de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // true if the offset could be determined to be some constant value. 451de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // For example, if OffsetInElmts == 1, then J accesses the memory directly 452de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // after I; if OffsetInElmts == -1 then I accesses the memory 45364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // directly after J. 454de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool getPairPtrInfo(Instruction *I, Instruction *J, 455de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *&IPtr, Value *&JPtr, unsigned &IAlignment, unsigned &JAlignment, 456de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel int64_t &OffsetInElmts) { 457de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel OffsetInElmts = 0; 458de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (isa<LoadInst>(I)) { 459de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IPtr = cast<LoadInst>(I)->getPointerOperand(); 460de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel JPtr = cast<LoadInst>(J)->getPointerOperand(); 461de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IAlignment = cast<LoadInst>(I)->getAlignment(); 462de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel JAlignment = cast<LoadInst>(J)->getAlignment(); 463de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 464de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IPtr = cast<StoreInst>(I)->getPointerOperand(); 465de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel JPtr = cast<StoreInst>(J)->getPointerOperand(); 466de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IAlignment = cast<StoreInst>(I)->getAlignment(); 467de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel JAlignment = cast<StoreInst>(J)->getAlignment(); 468de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 469de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 470de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel const SCEV *IPtrSCEV = SE->getSCEV(IPtr); 471de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel const SCEV *JPtrSCEV = SE->getSCEV(JPtr); 472de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 473de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // If this is a trivial offset, then we'll get something like 474de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // 1*sizeof(type). With target data, which we need anyway, this will get 475de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // constant folded into a number. 476de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel const SCEV *OffsetSCEV = SE->getMinusSCEV(JPtrSCEV, IPtrSCEV); 477de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (const SCEVConstant *ConstOffSCEV = 478de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel dyn_cast<SCEVConstant>(OffsetSCEV)) { 479de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ConstantInt *IntOff = ConstOffSCEV->getValue(); 480de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel int64_t Offset = IntOff->getSExtValue(); 481de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 482de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *VTy = cast<PointerType>(IPtr->getType())->getElementType(); 483de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel int64_t VTyTSS = (int64_t) TD->getTypeStoreSize(VTy); 484de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 48564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *VTy2 = cast<PointerType>(JPtr->getType())->getElementType(); 48664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (VTy != VTy2 && Offset < 0) { 48764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel int64_t VTy2TSS = (int64_t) TD->getTypeStoreSize(VTy2); 48864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel OffsetInElmts = Offset/VTy2TSS; 48964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return (abs64(Offset) % VTy2TSS) == 0; 49064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 491de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 492de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel OffsetInElmts = Offset/VTyTSS; 493de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return (abs64(Offset) % VTyTSS) == 0; 494de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 495de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 496de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 497de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 498de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 499de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns true if the provided CallInst represents an intrinsic that can 500de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // be vectorized. 501de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool isVectorizableIntrinsic(CallInst* I) { 502de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Function *F = I->getCalledFunction(); 503de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!F) return false; 504de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 505de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned IID = F->getIntrinsicID(); 506de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!IID) return false; 507de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 508de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel switch(IID) { 509de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel default: 510de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 511de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::sqrt: 512de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::powi: 513de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::sin: 514de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::cos: 515de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::log: 516de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::log2: 517de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::log10: 518de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::exp: 519de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::exp2: 520de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::pow: 52186312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng return Config.VectorizeMath; 522de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::fma: 52386312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng return Config.VectorizeFMA; 524de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 525de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 526de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 527de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns true if J is the second element in some pair referenced by 528de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // some multimap pair iterator pair. 529de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel template <typename V> 530de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool isSecondInIteratorPair(V J, std::pair< 531de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typename std::multimap<V, V>::iterator, 532de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typename std::multimap<V, V>::iterator> PairRange) { 533de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (typename std::multimap<V, V>::iterator K = PairRange.first; 534de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K != PairRange.second; ++K) 535de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (K->second == J) return true; 536de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 537de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 538de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 539de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel }; 540de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 541de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function implements one vectorization iteration on the provided 542de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // basic block. It returns true if the block is changed. 54364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool BBVectorize::vectorizePairs(BasicBlock &BB, bool NonPow2Len) { 5445d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel bool ShouldContinue; 5455d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel BasicBlock::iterator Start = BB.getFirstInsertionPt(); 5465d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 5475d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel std::vector<Value *> AllPairableInsts; 5485d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel DenseMap<Value *, Value *> AllChosenPairs; 5495d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 5505d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel do { 5515d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel std::vector<Value *> PairableInsts; 5525d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel std::multimap<Value *, Value *> CandidatePairs; 5535d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs, 55464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel PairableInsts, NonPow2Len); 5555d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (PairableInsts.empty()) continue; 5563706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 5575d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // Now we have a map of all of the pairable instructions and we need to 5585d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // select the best possible pairing. A good pairing is one such that the 5595d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // users of the pair are also paired. This defines a (directed) forest 5605d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // over the pairs such that two pairs are connected iff the second pair 5615d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // uses the first. 5623706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 5635d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // Note that it only matters that both members of the second pair use some 5645d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // element of the first pair (to allow for splatting). 5653706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 5665d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel std::multimap<ValuePair, ValuePair> ConnectedPairs; 5675d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel computeConnectedPairs(CandidatePairs, PairableInsts, ConnectedPairs); 5685d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (ConnectedPairs.empty()) continue; 5693706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 5705d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // Build the pairable-instruction dependency map 5715d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel DenseSet<ValuePair> PairableInstUsers; 5725d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel buildDepMap(BB, CandidatePairs, PairableInsts, PairableInstUsers); 5733706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 57435564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel // There is now a graph of the connected pairs. For each variable, pick 57535564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel // the pairing with the largest tree meeting the depth requirement on at 57635564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel // least one branch. Then select all pairings that are part of that tree 57735564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel // and remove them from the list of available pairings and pairable 57835564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel // variables. 5793706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 5805d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel DenseMap<Value *, Value *> ChosenPairs; 5815d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel choosePairs(CandidatePairs, PairableInsts, ConnectedPairs, 5825d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel PairableInstUsers, ChosenPairs); 5833706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 5845d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (ChosenPairs.empty()) continue; 5855d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel AllPairableInsts.insert(AllPairableInsts.end(), PairableInsts.begin(), 5865d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel PairableInsts.end()); 5875d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel AllChosenPairs.insert(ChosenPairs.begin(), ChosenPairs.end()); 5885d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel } while (ShouldContinue); 5895d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 5905d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (AllChosenPairs.empty()) return false; 5915d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel NumFusedOps += AllChosenPairs.size(); 5923706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 593de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // A set of pairs has now been selected. It is now necessary to replace the 594de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // paired instructions with vector instructions. For this procedure each 59543ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop // operand must be replaced with a vector operand. This vector is formed 596de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // by using build_vector on the old operands. The replaced values are then 597de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // replaced with a vector_extract on the result. Subsequent optimization 598de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // passes should coalesce the build/extract combinations. 5993706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 6005d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel fuseChosenPairs(BB, AllPairableInsts, AllChosenPairs); 60164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 60264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // It is important to cleanup here so that future iterations of this 60364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // function have less work to do. 6048e0d1c03ca7fd86e6879b4e37d0d7f0e982feef6Benjamin Kramer (void) SimplifyInstructionsInBlock(&BB, TD, AA->getTargetLibraryInfo()); 605de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return true; 606de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 607de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 608de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function returns true if the provided instruction is capable of being 609de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // fused into a vector instruction. This determination is based only on the 610de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // type and other attributes of the instruction. 611de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::isInstVectorizable(Instruction *I, 612de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool &IsSimpleLoadStore) { 613de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IsSimpleLoadStore = false; 614de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 615de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (CallInst *C = dyn_cast<CallInst>(I)) { 616de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!isVectorizableIntrinsic(C)) 617de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 618de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else if (LoadInst *L = dyn_cast<LoadInst>(I)) { 619de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Vectorize simple loads if possbile: 620de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IsSimpleLoadStore = L->isSimple(); 62186312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng if (!IsSimpleLoadStore || !Config.VectorizeMemOps) 622de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 623de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else if (StoreInst *S = dyn_cast<StoreInst>(I)) { 624de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Vectorize simple stores if possbile: 625de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IsSimpleLoadStore = S->isSimple(); 62686312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng if (!IsSimpleLoadStore || !Config.VectorizeMemOps) 627de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 628de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else if (CastInst *C = dyn_cast<CastInst>(I)) { 629de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // We can vectorize casts, but not casts of pointer types, etc. 63086312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng if (!Config.VectorizeCasts) 631de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 632de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 633de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *SrcTy = C->getSrcTy(); 634f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel if (!SrcTy->isSingleValueType()) 635de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 636de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 637de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *DestTy = C->getDestTy(); 638f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel if (!DestTy->isSingleValueType()) 639de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 640fc3665c87519850f629c9565535e3be447e10addHal Finkel } else if (isa<SelectInst>(I)) { 641fc3665c87519850f629c9565535e3be447e10addHal Finkel if (!Config.VectorizeSelect) 642fc3665c87519850f629c9565535e3be447e10addHal Finkel return false; 643e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel } else if (isa<CmpInst>(I)) { 644e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel if (!Config.VectorizeCmp) 645e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel return false; 646f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(I)) { 647f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel if (!Config.VectorizeGEP) 648f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel return false; 649f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel 650f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel // Currently, vector GEPs exist only with one index. 651f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel if (G->getNumIndices() != 1) 652f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel return false; 653de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else if (!(I->isBinaryOp() || isa<ShuffleVectorInst>(I) || 654de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel isa<ExtractElementInst>(I) || isa<InsertElementInst>(I))) { 655de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 656de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 657de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 658de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // We can't vectorize memory operations without target data 659de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (TD == 0 && IsSimpleLoadStore) 660de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 661de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 662de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *T1, *T2; 66364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getInstructionTypes(I, T1, T2); 664de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 665de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Not every type can be vectorized... 666de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!(VectorType::isValidElementType(T1) || T1->isVectorTy()) || 667de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel !(VectorType::isValidElementType(T2) || T2->isVectorTy())) 668de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 669de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 670768edf3cd037aab10391abc279f71470df8e3156Hal Finkel if (T1->getScalarSizeInBits() == 1 && T2->getScalarSizeInBits() == 1) { 671768edf3cd037aab10391abc279f71470df8e3156Hal Finkel if (!Config.VectorizeBools) 672768edf3cd037aab10391abc279f71470df8e3156Hal Finkel return false; 673768edf3cd037aab10391abc279f71470df8e3156Hal Finkel } else { 674768edf3cd037aab10391abc279f71470df8e3156Hal Finkel if (!Config.VectorizeInts 675768edf3cd037aab10391abc279f71470df8e3156Hal Finkel && (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy())) 676768edf3cd037aab10391abc279f71470df8e3156Hal Finkel return false; 677768edf3cd037aab10391abc279f71470df8e3156Hal Finkel } 678768edf3cd037aab10391abc279f71470df8e3156Hal Finkel 67986312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng if (!Config.VectorizeFloats 68086312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy())) 681de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 682de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 683e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel // Don't vectorize target-specific types. 684e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel if (T1->isX86_FP80Ty() || T1->isPPC_FP128Ty() || T1->isX86_MMXTy()) 685e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel return false; 686e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel if (T2->isX86_FP80Ty() || T2->isPPC_FP128Ty() || T2->isX86_MMXTy()) 687e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel return false; 688e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel 68905bc5087a25bbcf59936d71ebfc878b545ef3e5cHal Finkel if ((!Config.VectorizePointers || TD == 0) && 69005bc5087a25bbcf59936d71ebfc878b545ef3e5cHal Finkel (T1->getScalarType()->isPointerTy() || 69105bc5087a25bbcf59936d71ebfc878b545ef3e5cHal Finkel T2->getScalarType()->isPointerTy())) 692f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel return false; 693f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel 69464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (T1->getPrimitiveSizeInBits() >= Config.VectorBits || 69564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel T2->getPrimitiveSizeInBits() >= Config.VectorBits) 696de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 697de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 698de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return true; 699de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 700de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 701de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function returns true if the two provided instructions are compatible 702de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // (meaning that they can be fused into a vector instruction). This assumes 703de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // that I has already been determined to be vectorizable and that J is not 704de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // in the use tree of I. 705de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J, 70664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool IsSimpleLoadStore, bool NonPow2Len) { 707de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugInstructionExamination) dbgs() << "BBV: looking at " << *I << 708de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " <-> " << *J << "\n"); 709de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 710de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Loads and stores can be merged if they have different alignments, 711de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // but are otherwise the same. 71264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!J->isSameOperationAs(I, Instruction::CompareIgnoringAlignment | 71364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel (NonPow2Len ? Instruction::CompareUsingScalarTypes : 0))) 71464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return false; 71564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 71664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *IT1, *IT2, *JT1, *JT2; 71764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getInstructionTypes(I, IT1, IT2); 71864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getInstructionTypes(J, JT1, JT2); 71964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned MaxTypeBits = std::max( 72064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel IT1->getPrimitiveSizeInBits() + JT1->getPrimitiveSizeInBits(), 72164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel IT2->getPrimitiveSizeInBits() + JT2->getPrimitiveSizeInBits()); 72264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (MaxTypeBits > Config.VectorBits) 723de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 724ec4e85e3364f50802f2007e4b1e23661d4610366Hal Finkel 725de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // FIXME: handle addsub-type operations! 726de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 727de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (IsSimpleLoadStore) { 728de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *IPtr, *JPtr; 729de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned IAlignment, JAlignment; 730de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel int64_t OffsetInElmts = 0; 731de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, 732de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel OffsetInElmts) && abs64(OffsetInElmts) == 1) { 733bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (Config.AlignedOnly) { 73464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *aTypeI = isa<StoreInst>(I) ? 735de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cast<StoreInst>(I)->getValueOperand()->getType() : I->getType(); 73664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *aTypeJ = isa<StoreInst>(J) ? 73764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel cast<StoreInst>(J)->getValueOperand()->getType() : J->getType(); 73864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 739de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // An aligned load or store is possible only if the instruction 740de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // with the lower offset has an alignment suitable for the 741de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // vector type. 7421230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop 743de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned BottomAlignment = IAlignment; 744de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (OffsetInElmts < 0) BottomAlignment = JAlignment; 7451230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop 74664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *VType = getVecTypeForPair(aTypeI, aTypeJ); 747de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned VecAlignment = TD->getPrefTypeAlignment(VType); 748de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (BottomAlignment < VecAlignment) 749de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 750de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 751de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 752de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 753de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 754de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 755de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 7566173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel // The powi intrinsic is special because only the first argument is 7576173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel // vectorized, the second arguments must be equal. 7586173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel CallInst *CI = dyn_cast<CallInst>(I); 7596173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel Function *FI; 7606173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel if (CI && (FI = CI->getCalledFunction()) && 7616173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel FI->getIntrinsicID() == Intrinsic::powi) { 7626173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel 7636173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel Value *A1I = CI->getArgOperand(1), 7646173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel *A1J = cast<CallInst>(J)->getArgOperand(1); 7656173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel const SCEV *A1ISCEV = SE->getSCEV(A1I), 7666173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel *A1JSCEV = SE->getSCEV(A1J); 7676173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel return (A1ISCEV == A1JSCEV); 7686173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel } 7696173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel 770de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return true; 771de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 772de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 773de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Figure out whether or not J uses I and update the users and write-set 774de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // structures associated with I. Specifically, Users represents the set of 775de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // instructions that depend on I. WriteSet represents the set 776de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // of memory locations that are dependent on I. If UpdateUsers is true, 777de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // and J uses I, then Users is updated to contain J and WriteSet is updated 778de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // to contain any memory locations to which J writes. The function returns 779de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // true if J uses I. By default, alias analysis is used to determine 780de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // whether J reads from memory that overlaps with a location in WriteSet. 781de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // If LoadMoveSet is not null, then it is a previously-computed multimap 782de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // where the key is the memory-based user instruction and the value is 783de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the instruction to be compared with I. So, if LoadMoveSet is provided, 784de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // then the alias analysis is not used. This is necessary because this 785de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // function is called during the process of moving instructions during 786de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // vectorization and the results of the alias analysis are not stable during 787de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // that process. 788de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::trackUsesOfI(DenseSet<Value *> &Users, 789de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker &WriteSet, Instruction *I, 790de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J, bool UpdateUsers, 791de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> *LoadMoveSet) { 792de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool UsesI = false; 793de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 794de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This instruction may already be marked as a user due, for example, to 795de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // being a member of a selected pair. 796de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (Users.count(J)) 797de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UsesI = true; 798de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 799de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!UsesI) 8007e004d177fe76145f75a9417ed2e281f1b9abaf7Hal Finkel for (User::op_iterator JU = J->op_begin(), JE = J->op_end(); 8017e004d177fe76145f75a9417ed2e281f1b9abaf7Hal Finkel JU != JE; ++JU) { 802de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *V = *JU; 803de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (I == V || Users.count(V)) { 804de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UsesI = true; 805de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 806de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 807de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 808de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!UsesI && J->mayReadFromMemory()) { 809de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (LoadMoveSet) { 810de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair JPairRange = LoadMoveSet->equal_range(J); 811de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UsesI = isSecondInIteratorPair<Value*>(I, JPairRange); 812de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 813de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (AliasSetTracker::iterator W = WriteSet.begin(), 814de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel WE = WriteSet.end(); W != WE; ++W) { 81538a7f22445b8782682d1f8f253454ea0390d4ac5Hal Finkel if (W->aliasesUnknownInst(J, *AA)) { 81638a7f22445b8782682d1f8f253454ea0390d4ac5Hal Finkel UsesI = true; 81738a7f22445b8782682d1f8f253454ea0390d4ac5Hal Finkel break; 818de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 819de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 820de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 821de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 822de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 823de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (UsesI && UpdateUsers) { 824de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (J->mayWriteToMemory()) WriteSet.add(J); 825de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Users.insert(J); 826de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 827de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 828de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return UsesI; 829de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 830de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 831de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function iterates over all instruction pairs in the provided 832de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // basic block and collects all candidate pairs for vectorization. 8335d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel bool BBVectorize::getCandidatePairs(BasicBlock &BB, 8345d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel BasicBlock::iterator &Start, 835de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 83664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Value *> &PairableInsts, bool NonPow2Len) { 837de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BasicBlock::iterator E = BB.end(); 8385d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (Start == E) return false; 8395d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 8405d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel bool ShouldContinue = false, IAfterStart = false; 8415d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel for (BasicBlock::iterator I = Start++; I != E; ++I) { 8425d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (I == Start) IAfterStart = true; 8435d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 844de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool IsSimpleLoadStore; 845de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!isInstVectorizable(I, IsSimpleLoadStore)) continue; 846de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 847de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Look for an instruction with which to pair instruction *I... 848de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> Users; 849de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker WriteSet(*AA); 8505d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel bool JAfterStart = IAfterStart; 8515d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel BasicBlock::iterator J = llvm::next(I); 852bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) { 8535d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (J == Start) JAfterStart = true; 8545d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 855de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Determine if J uses I, if so, exit the loop. 856bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !Config.FastDep); 857bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (Config.FastDep) { 858de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Note: For this heuristic to be effective, independent operations 859de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // must tend to be intermixed. This is likely to be true from some 860de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // kinds of grouped loop unrolling (but not the generic LLVM pass), 861de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // but otherwise may require some kind of reordering pass. 862de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 863de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // When using fast dependency analysis, 864de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // stop searching after first use: 865de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (UsesI) break; 866de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 867de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (UsesI) continue; 868de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 869de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 870de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // J does not use I, and comes before the first use of I, so it can be 871de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // merged with I if the instructions are compatible. 87264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!areInstsCompatible(I, J, IsSimpleLoadStore, NonPow2Len)) continue; 873de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 874de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // J is a candidate for merging with I. 875de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!PairableInsts.size() || 876de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInsts[PairableInsts.size()-1] != I) { 877de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInsts.push_back(I); 878de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 8795d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 880de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CandidatePairs.insert(ValuePair(I, J)); 8815d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 8825d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // The next call to this function must start after the last instruction 8835d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // selected during this invocation. 8845d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (JAfterStart) { 8855d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel Start = llvm::next(J); 8865d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel IAfterStart = JAfterStart = false; 8875d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel } 8885d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 889de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugCandidateSelection) dbgs() << "BBV: candidate pair " 890de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << *I << " <-> " << *J << "\n"); 8915d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 8925d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // If we have already found too many pairs, break here and this function 8935d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // will be called again starting after the last instruction selected 8945d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // during this invocation. 895bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (PairableInsts.size() >= Config.MaxInsts) { 8965d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel ShouldContinue = true; 8975d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel break; 8985d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel } 899de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 9005d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 9015d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (ShouldContinue) 9025d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel break; 903de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 904de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 905de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: found " << PairableInsts.size() 906de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << " instructions with candidate pairs\n"); 9075d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 9085d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel return ShouldContinue; 909de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 910de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 911de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Finds candidate pairs connected to the pair P = <PI, PJ>. This means that 912de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // it looks for pairs such that both members have an input which is an 913de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // output of PI or PJ. 914de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::computePairsConnectedTo( 915de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 916de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 917de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 918de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ValuePair P) { 919bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel StoreInst *SI, *SJ; 920bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel 921de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // For each possible pairing for this variable, look at the uses of 922de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the first value... 923de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (Value::use_iterator I = P.first->use_begin(), 924de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = P.first->use_end(); I != E; ++I) { 925bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel if (isa<LoadInst>(*I)) { 926bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel // A pair cannot be connected to a load because the load only takes one 927bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel // operand (the address) and it is a scalar even after vectorization. 928bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel continue; 929bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel } else if ((SI = dyn_cast<StoreInst>(*I)) && 930bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel P.first == SI->getPointerOperand()) { 931bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel // Similarly, a pair cannot be connected to a store through its 932bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel // pointer operand. 933bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel continue; 934bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel } 935bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel 936de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair IPairRange = CandidatePairs.equal_range(*I); 937de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 938de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // For each use of the first variable, look for uses of the second 939de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // variable... 940de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (Value::use_iterator J = P.second->use_begin(), 941de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E2 = P.second->use_end(); J != E2; ++J) { 942bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel if ((SJ = dyn_cast<StoreInst>(*J)) && 943bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel P.second == SJ->getPointerOperand()) 944bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel continue; 945bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel 946de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair JPairRange = CandidatePairs.equal_range(*J); 947de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 948de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Look for <I, J>: 949de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (isSecondInIteratorPair<Value*>(*J, IPairRange)) 950de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J))); 951de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 952de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Look for <J, I>: 953de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (isSecondInIteratorPair<Value*>(*I, JPairRange)) 954de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ConnectedPairs.insert(VPPair(P, ValuePair(*J, *I))); 955de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 956de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 957bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (Config.SplatBreaksChain) continue; 958de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Look for cases where just the first value in the pair is used by 959de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // both members of another pair (splatting). 960de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (Value::use_iterator J = P.first->use_begin(); J != E; ++J) { 961bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel if ((SJ = dyn_cast<StoreInst>(*J)) && 962bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel P.first == SJ->getPointerOperand()) 963bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel continue; 964bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel 965de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (isSecondInIteratorPair<Value*>(*J, IPairRange)) 966de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J))); 967de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 968de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 969de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 970bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (Config.SplatBreaksChain) return; 971de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Look for cases where just the second value in the pair is used by 972de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // both members of another pair (splatting). 973de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (Value::use_iterator I = P.second->use_begin(), 974de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = P.second->use_end(); I != E; ++I) { 975bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel if (isa<LoadInst>(*I)) 976bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel continue; 977bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel else if ((SI = dyn_cast<StoreInst>(*I)) && 978bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel P.second == SI->getPointerOperand()) 979bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel continue; 980bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel 981de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair IPairRange = CandidatePairs.equal_range(*I); 982de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 983de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (Value::use_iterator J = P.second->use_begin(); J != E; ++J) { 984bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel if ((SJ = dyn_cast<StoreInst>(*J)) && 985bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel P.second == SJ->getPointerOperand()) 986bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel continue; 987bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel 988de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (isSecondInIteratorPair<Value*>(*J, IPairRange)) 989de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J))); 990de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 991de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 992de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 993de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 994de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function figures out which pairs are connected. Two pairs are 995de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // connected if some output of the first pair forms an input to both members 996de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // of the second pair. 997de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::computeConnectedPairs( 998de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 999de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 1000de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs) { 1001de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1002de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::vector<Value *>::iterator PI = PairableInsts.begin(), 1003de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PE = PairableInsts.end(); PI != PE; ++PI) { 1004de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair choiceRange = CandidatePairs.equal_range(*PI); 1005de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1006de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<Value *, Value *>::iterator P = choiceRange.first; 1007de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel P != choiceRange.second; ++P) 1008de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel computePairsConnectedTo(CandidatePairs, PairableInsts, 1009de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ConnectedPairs, *P); 1010de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1011de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1012de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: found " << ConnectedPairs.size() 1013de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << " pair connections.\n"); 1014de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1015de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1016de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function builds a set of use tuples such that <A, B> is in the set 1017de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // if B is in the use tree of A. If B is in the use tree of A, then B 1018de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // depends on the output of A. 1019de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::buildDepMap( 1020de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BasicBlock &BB, 1021de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 1022de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 1023de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers) { 1024de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> IsInPair; 1025de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<Value *, Value *>::iterator C = CandidatePairs.begin(), 1026de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = CandidatePairs.end(); C != E; ++C) { 1027de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IsInPair.insert(C->first); 1028de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IsInPair.insert(C->second); 1029de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1030de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1031de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Iterate through the basic block, recording all Users of each 1032de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pairable instruction. 1033de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1034de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BasicBlock::iterator E = BB.end(); 1035de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (BasicBlock::iterator I = BB.getFirstInsertionPt(); I != E; ++I) { 1036de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (IsInPair.find(I) == IsInPair.end()) continue; 1037de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1038de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> Users; 1039de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker WriteSet(*AA); 1040de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (BasicBlock::iterator J = llvm::next(I); J != E; ++J) 1041de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel (void) trackUsesOfI(Users, WriteSet, I, J); 1042de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1043de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseSet<Value *>::iterator U = Users.begin(), E = Users.end(); 1044de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel U != E; ++U) 1045de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.insert(ValuePair(I, *U)); 1046de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1047de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1048de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1049de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns true if an input to pair P is an output of pair Q and also an 1050de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // input of pair Q is an output of pair P. If this is the case, then these 1051de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // two pairs cannot be simultaneously fused. 1052de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::pairsConflict(ValuePair P, ValuePair Q, 1053de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 1054de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> *PairableInstUserMap) { 1055de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Two pairs are in conflict if they are mutual Users of eachother. 1056de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool QUsesP = PairableInstUsers.count(ValuePair(P.first, Q.first)) || 1057de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(P.first, Q.second)) || 1058de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(P.second, Q.first)) || 1059de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(P.second, Q.second)); 1060de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool PUsesQ = PairableInstUsers.count(ValuePair(Q.first, P.first)) || 1061de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(Q.first, P.second)) || 1062de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(Q.second, P.first)) || 1063de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(Q.second, P.second)); 1064de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (PairableInstUserMap) { 1065de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // FIXME: The expensive part of the cycle check is not so much the cycle 1066de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // check itself but this edge insertion procedure. This needs some 1067de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // profiling and probably a different data structure (same is true of 1068de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // most uses of std::multimap). 1069de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (PUsesQ) { 1070de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPPIteratorPair QPairRange = PairableInstUserMap->equal_range(Q); 1071de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!isSecondInIteratorPair(P, QPairRange)) 1072de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUserMap->insert(VPPair(Q, P)); 1073de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1074de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (QUsesP) { 1075de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPPIteratorPair PPairRange = PairableInstUserMap->equal_range(P); 1076de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!isSecondInIteratorPair(Q, PPairRange)) 1077de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUserMap->insert(VPPair(P, Q)); 1078de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1079de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1080de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1081de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return (QUsesP && PUsesQ); 1082de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1083de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1084de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function walks the use graph of current pairs to see if, starting 1085de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // from P, the walk returns to P. 1086de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::pairWillFormCycle(ValuePair P, 1087de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &PairableInstUserMap, 1088de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &CurrentPairs) { 1089de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugCycleCheck) 1090de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel dbgs() << "BBV: starting cycle check for : " << *P.first << " <-> " 1091de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << *P.second << "\n"); 1092de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // A lookup table of visisted pairs is kept because the PairableInstUserMap 1093de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // contains non-direct associations. 1094de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> Visited; 109535564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel SmallVector<ValuePair, 32> Q; 1096de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // General depth-first post-order traversal: 1097de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.push_back(P); 109835564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel do { 109935564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel ValuePair QTop = Q.pop_back_val(); 1100de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Visited.insert(QTop); 1101de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1102de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugCycleCheck) 1103de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel dbgs() << "BBV: cycle check visiting: " << *QTop.first << " <-> " 1104de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << *QTop.second << "\n"); 1105de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPPIteratorPair QPairRange = PairableInstUserMap.equal_range(QTop); 1106de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<ValuePair, ValuePair>::iterator C = QPairRange.first; 1107de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C != QPairRange.second; ++C) { 1108de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C->second == P) { 1109de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() 1110de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << "BBV: rejected to prevent non-trivial cycle formation: " 1111de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << *C->first.first << " <-> " << *C->first.second << "\n"); 1112de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return true; 1113de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1114de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 11150b2500c504156c45cd71817a9ef6749b6cde5703David Blaikie if (CurrentPairs.count(C->second) && !Visited.count(C->second)) 1116de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.push_back(C->second); 1117de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 111835564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel } while (!Q.empty()); 1119de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1120de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 1121de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1122de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1123de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function builds the initial tree of connected pairs with the 1124de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pair J at the root. 1125de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::buildInitialTreeFor( 1126de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 1127de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 1128de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 1129de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 1130de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 1131de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<ValuePair, size_t> &Tree, ValuePair J) { 1132de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Each of these pairs is viewed as the root node of a Tree. The Tree 1133de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // is then walked (depth-first). As this happens, we keep track of 1134de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the pairs that compose the Tree and the maximum depth of the Tree. 113535564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel SmallVector<ValuePairWithDepth, 32> Q; 1136de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // General depth-first post-order traversal: 1137de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first))); 113835564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel do { 1139de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ValuePairWithDepth QTop = Q.back(); 1140de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1141de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Push each child onto the queue: 1142de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool MoreChildren = false; 1143de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t MaxChildDepth = QTop.second; 1144de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPPIteratorPair qtRange = ConnectedPairs.equal_range(QTop.first); 1145478eed85f96f0d93da43e26cfb7fc6dee981c9aaNAKAMURA Takumi for (std::multimap<ValuePair, ValuePair>::iterator k = qtRange.first; 1146de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel k != qtRange.second; ++k) { 1147de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Make sure that this child pair is still a candidate: 1148de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool IsStillCand = false; 1149de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair checkRange = 1150de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CandidatePairs.equal_range(k->second.first); 1151de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<Value *, Value *>::iterator m = checkRange.first; 1152de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel m != checkRange.second; ++m) { 1153de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (m->second == k->second.second) { 1154de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IsStillCand = true; 1155de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1156de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1157de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1158de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1159de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (IsStillCand) { 1160de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<ValuePair, size_t>::iterator C = Tree.find(k->second); 1161de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C == Tree.end()) { 1162de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t d = getDepthFactor(k->second.first); 1163de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.push_back(ValuePairWithDepth(k->second, QTop.second+d)); 1164de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel MoreChildren = true; 1165de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 1166de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel MaxChildDepth = std::max(MaxChildDepth, C->second); 1167de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1168de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1169de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1170de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1171de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!MoreChildren) { 1172de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Record the current pair as part of the Tree: 1173de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Tree.insert(ValuePairWithDepth(QTop.first, MaxChildDepth)); 1174de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.pop_back(); 1175de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 117635564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel } while (!Q.empty()); 1177de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1178de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1179de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Given some initial tree, prune it by removing conflicting pairs (pairs 1180de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // that cannot be simultaneously chosen for vectorization). 1181de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::pruneTreeFor( 1182de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 1183de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 1184de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 1185de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 1186de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &PairableInstUserMap, 1187de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 1188de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<ValuePair, size_t> &Tree, 1189de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PrunedTree, ValuePair J, 1190de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool UseCycleCheck) { 119135564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel SmallVector<ValuePairWithDepth, 32> Q; 1192de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // General depth-first post-order traversal: 1193de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first))); 119435564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel do { 119535564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel ValuePairWithDepth QTop = Q.pop_back_val(); 1196de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PrunedTree.insert(QTop.first); 1197de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1198de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Visit each child, pruning as necessary... 119943ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop DenseMap<ValuePair, size_t> BestChildren; 1200de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPPIteratorPair QTopRange = ConnectedPairs.equal_range(QTop.first); 1201478eed85f96f0d93da43e26cfb7fc6dee981c9aaNAKAMURA Takumi for (std::multimap<ValuePair, ValuePair>::iterator K = QTopRange.first; 1202de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K != QTopRange.second; ++K) { 1203de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<ValuePair, size_t>::iterator C = Tree.find(K->second); 1204de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C == Tree.end()) continue; 1205de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1206de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This child is in the Tree, now we need to make sure it is the 1207de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // best of any conflicting children. There could be multiple 1208de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // conflicting children, so first, determine if we're keeping 1209de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // this child, then delete conflicting children as necessary. 1210de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1211de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // It is also necessary to guard against pairing-induced 1212de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // dependencies. Consider instructions a .. x .. y .. b 1213de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // such that (a,b) are to be fused and (x,y) are to be fused 1214de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // but a is an input to x and b is an output from y. This 1215de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // means that y cannot be moved after b but x must be moved 1216de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // after b for (a,b) to be fused. In other words, after 1217de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // fusing (a,b) we have y .. a/b .. x where y is an input 1218de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // to a/b and x is an output to a/b: x and y can no longer 1219de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // be legally fused. To prevent this condition, we must 1220de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // make sure that a child pair added to the Tree is not 1221de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // both an input and output of an already-selected pair. 1222de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1223de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Pairing-induced dependencies can also form from more complicated 1224de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // cycles. The pair vs. pair conflicts are easy to check, and so 1225de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // that is done explicitly for "fast rejection", and because for 1226de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // child vs. child conflicts, we may prefer to keep the current 1227de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pair in preference to the already-selected child. 1228de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> CurrentPairs; 1229de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1230de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool CanAdd = true; 1231de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseMap<ValuePair, size_t>::iterator C2 123243ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop = BestChildren.begin(), E2 = BestChildren.end(); 1233de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2 != E2; ++C2) { 1234de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C2->first.first == C->first.first || 1235de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.first == C->first.second || 1236de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.first || 1237de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.second || 1238de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairsConflict(C2->first, C->first, PairableInstUsers, 1239de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UseCycleCheck ? &PairableInstUserMap : 0)) { 1240de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C2->second >= C->second) { 1241de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CanAdd = false; 1242de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1243de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1244de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1245de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CurrentPairs.insert(C2->first); 1246de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1247de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1248de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!CanAdd) continue; 1249de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1250de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Even worse, this child could conflict with another node already 1251de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // selected for the Tree. If that is the case, ignore this child. 1252de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseSet<ValuePair>::iterator T = PrunedTree.begin(), 1253de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E2 = PrunedTree.end(); T != E2; ++T) { 1254de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (T->first == C->first.first || 1255de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel T->first == C->first.second || 1256de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel T->second == C->first.first || 1257de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel T->second == C->first.second || 1258de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairsConflict(*T, C->first, PairableInstUsers, 1259de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UseCycleCheck ? &PairableInstUserMap : 0)) { 1260de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CanAdd = false; 1261de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1262de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1263de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1264de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CurrentPairs.insert(*T); 1265de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1266de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!CanAdd) continue; 1267de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1268de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // And check the queue too... 126935564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel for (SmallVector<ValuePairWithDepth, 32>::iterator C2 = Q.begin(), 1270de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E2 = Q.end(); C2 != E2; ++C2) { 1271de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C2->first.first == C->first.first || 1272de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.first == C->first.second || 1273de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.first || 1274de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.second || 1275de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairsConflict(C2->first, C->first, PairableInstUsers, 1276de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UseCycleCheck ? &PairableInstUserMap : 0)) { 1277de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CanAdd = false; 1278de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1279de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1280de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1281de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CurrentPairs.insert(C2->first); 1282de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1283de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!CanAdd) continue; 1284de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1285de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Last but not least, check for a conflict with any of the 1286de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // already-chosen pairs. 1287de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseMap<Value *, Value *>::iterator C2 = 1288de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairs.begin(), E2 = ChosenPairs.end(); 1289de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2 != E2; ++C2) { 1290de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (pairsConflict(*C2, C->first, PairableInstUsers, 1291de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UseCycleCheck ? &PairableInstUserMap : 0)) { 1292de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CanAdd = false; 1293de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1294de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1295de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1296de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CurrentPairs.insert(*C2); 1297de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1298de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!CanAdd) continue; 1299de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 13001230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop // To check for non-trivial cycles formed by the addition of the 13011230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop // current pair we've formed a list of all relevant pairs, now use a 13021230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop // graph walk to check for a cycle. We start from the current pair and 13031230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop // walk the use tree to see if we again reach the current pair. If we 13041230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop // do, then the current pair is rejected. 1305de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1306de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // FIXME: It may be more efficient to use a topological-ordering 1307de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // algorithm to improve the cycle check. This should be investigated. 1308de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (UseCycleCheck && 1309de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairWillFormCycle(C->first, PairableInstUserMap, CurrentPairs)) 1310de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 1311de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1312de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This child can be added, but we may have chosen it in preference 1313de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // to an already-selected child. Check for this here, and if a 1314de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // conflict is found, then remove the previously-selected child 1315de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // before adding this one in its place. 1316de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseMap<ValuePair, size_t>::iterator C2 131743ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop = BestChildren.begin(); C2 != BestChildren.end();) { 1318de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C2->first.first == C->first.first || 1319de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.first == C->first.second || 1320de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.first || 1321de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.second || 1322de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairsConflict(C2->first, C->first, PairableInstUsers)) 132343ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop BestChildren.erase(C2++); 1324de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel else 1325de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++C2; 1326de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1327de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 132843ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop BestChildren.insert(ValuePairWithDepth(C->first, C->second)); 1329de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1330de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1331de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseMap<ValuePair, size_t>::iterator C 133243ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop = BestChildren.begin(), E2 = BestChildren.end(); 1333de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C != E2; ++C) { 1334de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t DepthF = getDepthFactor(C->first.first); 1335de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.push_back(ValuePairWithDepth(C->first, QTop.second+DepthF)); 1336de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 133735564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel } while (!Q.empty()); 1338de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1339de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1340de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function finds the best tree of mututally-compatible connected 1341de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pairs, given the choice of root pairs as an iterator range. 1342de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::findBestTreeFor( 1343de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 1344de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 1345de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 1346de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 1347de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &PairableInstUserMap, 1348de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 1349de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth, 1350de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t &BestEffSize, VPIteratorPair ChoiceRange, 1351de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool UseCycleCheck) { 1352de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<Value *, Value *>::iterator J = ChoiceRange.first; 1353de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel J != ChoiceRange.second; ++J) { 1354de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1355de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Before going any further, make sure that this pair does not 1356de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // conflict with any already-selected pairs (see comment below 1357de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // near the Tree pruning for more details). 1358de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> ChosenPairSet; 1359de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool DoesConflict = false; 1360de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseMap<Value *, Value *>::iterator C = ChosenPairs.begin(), 1361de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = ChosenPairs.end(); C != E; ++C) { 1362de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (pairsConflict(*C, *J, PairableInstUsers, 1363de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UseCycleCheck ? &PairableInstUserMap : 0)) { 1364de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DoesConflict = true; 1365de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1366de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1367de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1368de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairSet.insert(*C); 1369de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1370de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (DoesConflict) continue; 1371de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1372de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (UseCycleCheck && 1373de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairWillFormCycle(*J, PairableInstUserMap, ChosenPairSet)) 1374de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 1375de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1376de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<ValuePair, size_t> Tree; 1377de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel buildInitialTreeFor(CandidatePairs, PairableInsts, ConnectedPairs, 1378de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers, ChosenPairs, Tree, *J); 1379de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1380de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Because we'll keep the child with the largest depth, the largest 1381de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // depth is still the same in the unpruned Tree. 1382de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t MaxDepth = Tree.lookup(*J); 1383de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1384de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugPairSelection) dbgs() << "BBV: found Tree for pair {" 1385de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << *J->first << " <-> " << *J->second << "} of depth " << 1386de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel MaxDepth << " and size " << Tree.size() << "\n"); 1387de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1388de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // At this point the Tree has been constructed, but, may contain 1389de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // contradictory children (meaning that different children of 1390de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // some tree node may be attempting to fuse the same instruction). 1391de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // So now we walk the tree again, in the case of a conflict, 1392de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // keep only the child with the largest depth. To break a tie, 1393de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // favor the first child. 1394de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1395de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> PrunedTree; 1396de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pruneTreeFor(CandidatePairs, PairableInsts, ConnectedPairs, 1397de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers, PairableInstUserMap, ChosenPairs, Tree, 1398de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PrunedTree, *J, UseCycleCheck); 1399de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1400de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t EffSize = 0; 1401de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(), 1402de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = PrunedTree.end(); S != E; ++S) 1403de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel EffSize += getDepthFactor(S->first); 1404de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1405de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugPairSelection) 1406de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel dbgs() << "BBV: found pruned Tree for pair {" 1407de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << *J->first << " <-> " << *J->second << "} of depth " << 1408de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel MaxDepth << " and size " << PrunedTree.size() << 1409de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " (effective size: " << EffSize << ")\n"); 1410bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (MaxDepth >= Config.ReqChainDepth && EffSize > BestEffSize) { 1411de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BestMaxDepth = MaxDepth; 1412de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BestEffSize = EffSize; 1413de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BestTree = PrunedTree; 1414de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1415de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1416de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1417de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1418de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Given the list of candidate pairs, this function selects those 1419de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // that will be fused into vector instructions. 1420de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::choosePairs( 1421de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 1422de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 1423de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 1424de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 1425de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *>& ChosenPairs) { 1426bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng bool UseCycleCheck = 1427bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng CandidatePairs.size() <= Config.MaxCandPairsForCycleCheck; 1428de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> PairableInstUserMap; 1429de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::vector<Value *>::iterator I = PairableInsts.begin(), 1430de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = PairableInsts.end(); I != E; ++I) { 1431de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // The number of possible pairings for this variable: 1432de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t NumChoices = CandidatePairs.count(*I); 1433de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!NumChoices) continue; 1434de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1435de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair ChoiceRange = CandidatePairs.equal_range(*I); 1436de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1437de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // The best pair to choose and its tree: 1438de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t BestMaxDepth = 0, BestEffSize = 0; 1439de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> BestTree; 1440de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel findBestTreeFor(CandidatePairs, PairableInsts, ConnectedPairs, 1441de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers, PairableInstUserMap, ChosenPairs, 1442de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BestTree, BestMaxDepth, BestEffSize, ChoiceRange, 1443de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UseCycleCheck); 1444de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1445de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // A tree has been chosen (or not) at this point. If no tree was 1446de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // chosen, then this instruction, I, cannot be paired (and is no longer 1447de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // considered). 1448de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1449de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (BestTree.size() > 0) 1450de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel dbgs() << "BBV: selected pairs in the best tree for: " 1451de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << *cast<Instruction>(*I) << "\n"); 1452de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1453de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseSet<ValuePair>::iterator S = BestTree.begin(), 1454de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel SE2 = BestTree.end(); S != SE2; ++S) { 1455de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Insert the members of this tree into the list of chosen pairs. 1456de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairs.insert(ValuePair(S->first, S->second)); 1457de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: selected pair: " << *S->first << " <-> " << 1458de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel *S->second << "\n"); 1459de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1460de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Remove all candidate pairs that have values in the chosen tree. 1461de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<Value *, Value *>::iterator K = 1462de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CandidatePairs.begin(); K != CandidatePairs.end();) { 1463de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (K->first == S->first || K->second == S->first || 1464de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K->second == S->second || K->first == S->second) { 1465de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Don't remove the actual pair chosen so that it can be used 1466de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // in subsequent tree selections. 1467de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!(K->first == S->first && K->second == S->second)) 1468de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CandidatePairs.erase(K++); 1469de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel else 1470de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++K; 1471de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 1472de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++K; 1473de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1474de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1475de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1476de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1477de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1478de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: selected " << ChosenPairs.size() << " pairs.\n"); 1479de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1480de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1481de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::string getReplacementName(Instruction *I, bool IsInput, unsigned o, 1482de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned n = 0) { 1483de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!I->hasName()) 1484de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return ""; 1485de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1486de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return (I->getName() + (IsInput ? ".v.i" : ".v.r") + utostr(o) + 1487de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel (n > 0 ? "." + utostr(n) : "")).str(); 1488de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1489de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1490de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns the value that is to be used as the pointer input to the vector 1491de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // instruction that fuses I with J. 1492de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *BBVectorize::getReplacementPointerInput(LLVMContext& Context, 1493de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J, unsigned o, 1494282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel bool FlipMemInputs) { 1495de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *IPtr, *JPtr; 1496de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned IAlignment, JAlignment; 1497de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel int64_t OffsetInElmts; 1498282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel 1499282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel // Note: the analysis might fail here, that is why FlipMemInputs has 1500282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel // been precomputed (OffsetInElmts must be unused here). 1501de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel (void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, 1502de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel OffsetInElmts); 1503de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1504de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // The pointer value is taken to be the one with the lowest offset. 1505de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *VPtr; 1506282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel if (!FlipMemInputs) { 1507de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPtr = IPtr; 1508de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 1509de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPtr = JPtr; 1510de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1511de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 151264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeI = cast<PointerType>(IPtr->getType())->getElementType(); 151364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeJ = cast<PointerType>(JPtr->getType())->getElementType(); 151464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); 1515de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *VArgPtrType = PointerType::get(VArgType, 1516de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cast<PointerType>(IPtr->getType())->getAddressSpace()); 1517de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return new BitCastInst(VPtr, VArgPtrType, getReplacementName(I, true, o), 1518de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel /* insert before */ FlipMemInputs ? J : I); 1519de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1520de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1521de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::fillNewShuffleMask(LLVMContext& Context, Instruction *J, 152264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned MaskOffset, unsigned NumInElem, 152364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned NumInElem1, unsigned IdxOffset, 152464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant*> &Mask) { 152564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned NumElem1 = cast<VectorType>(J->getType())->getNumElements(); 152664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned v = 0; v < NumElem1; ++v) { 1527de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel int m = cast<ShuffleVectorInst>(J)->getMaskValue(v); 1528de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (m < 0) { 1529de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Mask[v+MaskOffset] = UndefValue::get(Type::getInt32Ty(Context)); 1530de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 1531de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned mm = m + (int) IdxOffset; 153264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (m >= (int) NumInElem1) 1533de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel mm += (int) NumInElem; 1534de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1535de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Mask[v+MaskOffset] = 1536de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ConstantInt::get(Type::getInt32Ty(Context), mm); 1537de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1538de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1539de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1540de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1541de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns the value that is to be used as the vector-shuffle mask to the 1542de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // vector instruction that fuses I with J. 1543de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *BBVectorize::getReplacementShuffleMask(LLVMContext& Context, 1544de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J) { 1545de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This is the shuffle mask. We need to append the second 1546de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // mask to the first, and the numbers need to be adjusted. 1547de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 154864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeI = I->getType(); 154964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeJ = J->getType(); 155064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); 155164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 155264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned NumElemI = cast<VectorType>(ArgTypeI)->getNumElements(); 1553de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1554de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Get the total number of elements in the fused vector type. 1555de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // By definition, this must equal the number of elements in 1556de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the final mask. 1557de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned NumElem = cast<VectorType>(VArgType)->getNumElements(); 1558de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Constant*> Mask(NumElem); 1559de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 156064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *OpTypeI = I->getOperand(0)->getType(); 156164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned NumInElemI = cast<VectorType>(OpTypeI)->getNumElements(); 156264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *OpTypeJ = J->getOperand(0)->getType(); 156364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned NumInElemJ = cast<VectorType>(OpTypeJ)->getNumElements(); 156464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 156564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // The fused vector will be: 156664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // ----------------------------------------------------- 156764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // | NumInElemI | NumInElemJ | NumInElemI | NumInElemJ | 156864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // ----------------------------------------------------- 156964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // from which we'll extract NumElem total elements (where the first NumElemI 157064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // of them come from the mask in I and the remainder come from the mask 157164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // in J. 1572de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1573de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // For the mask from the first pair... 157464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel fillNewShuffleMask(Context, I, 0, NumInElemJ, NumInElemI, 157564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 0, Mask); 1576de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1577de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // For the mask from the second pair... 157864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel fillNewShuffleMask(Context, J, NumElemI, NumInElemI, NumInElemJ, 157964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel NumInElemI, Mask); 1580de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1581de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return ConstantVector::get(Mask); 1582de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1583de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 158464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool BBVectorize::expandIEChain(LLVMContext& Context, Instruction *I, 158564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *J, unsigned o, Value *&LOp, 158664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElemL, 158764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeL, Type *ArgTypeH, 158864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned IdxOff) { 158964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool ExpandedIEChain = false; 159064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (InsertElementInst *LIE = dyn_cast<InsertElementInst>(LOp)) { 159164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // If we have a pure insertelement chain, then this can be rewritten 159264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // into a chain that directly builds the larger type. 159364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool PureChain = true; 159464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel InsertElementInst *LIENext = LIE; 159564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel do { 159664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!isa<UndefValue>(LIENext->getOperand(0)) && 159764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel !isa<InsertElementInst>(LIENext->getOperand(0))) { 159864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel PureChain = false; 159964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel break; 160064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 160164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } while ((LIENext = 160264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel dyn_cast<InsertElementInst>(LIENext->getOperand(0)))); 160364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 160464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (PureChain) { 160564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel SmallVector<Value *, 8> VectElemts(numElemL, 160664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel UndefValue::get(ArgTypeL->getScalarType())); 160764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel InsertElementInst *LIENext = LIE; 160864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel do { 160964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned Idx = 161064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel cast<ConstantInt>(LIENext->getOperand(2))->getSExtValue(); 161164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel VectElemts[Idx] = LIENext->getOperand(1); 161264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } while ((LIENext = 161364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel dyn_cast<InsertElementInst>(LIENext->getOperand(0)))); 161464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 161564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel LIENext = 0; 161664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *LIEPrev = UndefValue::get(ArgTypeH); 161764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned i = 0; i < numElemL; ++i) { 161864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (isa<UndefValue>(VectElemts[i])) continue; 161964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel LIENext = InsertElementInst::Create(LIEPrev, VectElemts[i], 162064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantInt::get(Type::getInt32Ty(Context), 162164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel i + IdxOff), 162264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getReplacementName(I, true, o, i+1)); 162364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel LIENext->insertBefore(J); 162464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel LIEPrev = LIENext; 162564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 162664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 162764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel LOp = LIENext ? (Value*) LIENext : UndefValue::get(ArgTypeH); 162864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ExpandedIEChain = true; 162964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 163064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 163164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 163264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return ExpandedIEChain; 163364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 163464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 1635de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns the value to be used as the specified operand of the vector 1636de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // instruction that fuses I with J. 1637de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *BBVectorize::getReplacementInput(LLVMContext& Context, Instruction *I, 1638de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J, unsigned o, bool FlipMemInputs) { 1639de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0); 1640de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), 1); 1641de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 164264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // Compute the fused vector type for this operand 164364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeI = I->getOperand(o)->getType(); 164464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeJ = J->getOperand(o)->getType(); 164564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel VectorType *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); 1646de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1647de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *L = I, *H = J; 164864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeL = ArgTypeI, *ArgTypeH = ArgTypeJ; 1649de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (FlipMemInputs) { 1650de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel L = J; 1651de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel H = I; 165264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ArgTypeL = ArgTypeJ; 165364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ArgTypeH = ArgTypeI; 1654de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1655de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 165664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElemL; 165764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (ArgTypeL->isVectorTy()) 165864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemL = cast<VectorType>(ArgTypeL)->getNumElements(); 165964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else 166064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemL = 1; 1661de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 166264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElemH; 166364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (ArgTypeH->isVectorTy()) 166464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemH = cast<VectorType>(ArgTypeH)->getNumElements(); 166564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else 166664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemH = 1; 166764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 166864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *LOp = L->getOperand(o); 166964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *HOp = H->getOperand(o); 167064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElem = VArgType->getNumElements(); 167164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 167264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // First, we check if we can reuse the "original" vector outputs (if these 167364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // exist). We might need a shuffle. 167464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ExtractElementInst *LEE = dyn_cast<ExtractElementInst>(LOp); 167564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ExtractElementInst *HEE = dyn_cast<ExtractElementInst>(HOp); 167664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ShuffleVectorInst *LSV = dyn_cast<ShuffleVectorInst>(LOp); 167764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ShuffleVectorInst *HSV = dyn_cast<ShuffleVectorInst>(HOp); 167864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 167964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // FIXME: If we're fusing shuffle instructions, then we can't apply this 168064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // optimization. The input vectors to the shuffle might be a different 168164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // length from the shuffle outputs. Unfortunately, the replacement 168264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // shuffle mask has already been formed, and the mask entries are sensitive 168364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // to the sizes of the inputs. 168464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool IsSizeChangeShuffle = 168564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel isa<ShuffleVectorInst>(L) && 168664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel (LOp->getType() != L->getType() || HOp->getType() != H->getType()); 168764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 168864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if ((LEE || LSV) && (HEE || HSV) && !IsSizeChangeShuffle) { 168964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // We can have at most two unique vector inputs. 169064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool CanUseInputs = true; 169164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *I1, *I2 = 0; 169264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (LEE) { 169364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I1 = LEE->getOperand(0); 169464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 169564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I1 = LSV->getOperand(0); 169664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2 = LSV->getOperand(1); 169764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (I2 == I1 || isa<UndefValue>(I2)) 169864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2 = 0; 169964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 170064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 170164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (HEE) { 170264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *I3 = HEE->getOperand(0); 170364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!I2 && I3 != I1) 170464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2 = I3; 170564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else if (I3 != I1 && I3 != I2) 170664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel CanUseInputs = false; 170764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 170864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *I3 = HSV->getOperand(0); 170964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!I2 && I3 != I1) 171064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2 = I3; 171164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else if (I3 != I1 && I3 != I2) 171264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel CanUseInputs = false; 171364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 171464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (CanUseInputs) { 171564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *I4 = HSV->getOperand(1); 171664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!isa<UndefValue>(I4)) { 171764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!I2 && I4 != I1) 171864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2 = I4; 171964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else if (I4 != I1 && I4 != I2) 172064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel CanUseInputs = false; 172164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 172264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 172364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 172464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 172564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (CanUseInputs) { 172664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned LOpElem = 172764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel cast<VectorType>(cast<Instruction>(LOp)->getOperand(0)->getType()) 172864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ->getNumElements(); 172964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned HOpElem = 173064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel cast<VectorType>(cast<Instruction>(HOp)->getOperand(0)->getType()) 173164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ->getNumElements(); 173264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 173364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // We have one or two input vectors. We need to map each index of the 173464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // operands to the index of the original vector. 173564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel SmallVector<std::pair<int, int>, 8> II(numElem); 173664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned i = 0; i < numElemL; ++i) { 173764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel int Idx, INum; 173864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (LEE) { 173964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Idx = 174064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel cast<ConstantInt>(LEE->getOperand(1))->getSExtValue(); 174164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel INum = LEE->getOperand(0) == I1 ? 0 : 1; 174264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 174364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Idx = LSV->getMaskValue(i); 174464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (Idx < (int) LOpElem) { 174564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel INum = LSV->getOperand(0) == I1 ? 0 : 1; 174664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 174764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Idx -= LOpElem; 174864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel INum = LSV->getOperand(1) == I1 ? 0 : 1; 174964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 175064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 175164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 175264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel II[i] = std::pair<int, int>(Idx, INum); 175364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 175464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned i = 0; i < numElemH; ++i) { 175564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel int Idx, INum; 175664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (HEE) { 175764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Idx = 175864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel cast<ConstantInt>(HEE->getOperand(1))->getSExtValue(); 175964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel INum = HEE->getOperand(0) == I1 ? 0 : 1; 176064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 176164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Idx = HSV->getMaskValue(i); 176264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (Idx < (int) HOpElem) { 176364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel INum = HSV->getOperand(0) == I1 ? 0 : 1; 176464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 176564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Idx -= HOpElem; 176664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel INum = HSV->getOperand(1) == I1 ? 0 : 1; 176764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 176864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 176964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 177064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel II[i + numElemL] = std::pair<int, int>(Idx, INum); 177164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 177264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 177364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // We now have an array which tells us from which index of which 177464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // input vector each element of the operand comes. 177564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel VectorType *I1T = cast<VectorType>(I1->getType()); 177664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned I1Elem = I1T->getNumElements(); 177764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 177864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!I2) { 177964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // In this case there is only one underlying vector input. Check for 178064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // the trivial case where we can use the input directly. 178164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (I1Elem == numElem) { 178264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool ElemInOrder = true; 178364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned i = 0; i < numElem; ++i) { 178464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (II[i].first != (int) i && II[i].first != -1) { 178564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ElemInOrder = false; 178664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel break; 178764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 178864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 178964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 179064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (ElemInOrder) 179164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return I1; 179264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 179364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 179464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // A shuffle is needed. 179564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant *> Mask(numElem); 179664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned i = 0; i < numElem; ++i) { 179764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel int Idx = II[i].first; 179864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (Idx == -1) 179964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[i] = UndefValue::get(Type::getInt32Ty(Context)); 180064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else 180164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[i] = ConstantInt::get(Type::getInt32Ty(Context), Idx); 180264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 180364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 180464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *S = 180564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel new ShuffleVectorInst(I1, UndefValue::get(I1T), 180664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantVector::get(Mask), 180764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getReplacementName(I, true, o)); 180864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel S->insertBefore(J); 180964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return S; 181064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 181164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 181264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel VectorType *I2T = cast<VectorType>(I2->getType()); 181364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned I2Elem = I2T->getNumElements(); 181464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 181564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // This input comes from two distinct vectors. The first step is to 181664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // make sure that both vectors are the same length. If not, the 181764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // smaller one will need to grow before they can be shuffled together. 181864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (I1Elem < I2Elem) { 181964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant *> Mask(I2Elem); 182064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned v = 0; 182164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < I1Elem; ++v) 182264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); 182364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < I2Elem; ++v) 182464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); 182564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 182664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *NewI1 = 182764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel new ShuffleVectorInst(I1, UndefValue::get(I1T), 182864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantVector::get(Mask), 182964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getReplacementName(I, true, o, 1)); 183064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel NewI1->insertBefore(J); 183164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I1 = NewI1; 183264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I1T = I2T; 183364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I1Elem = I2Elem; 183464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else if (I1Elem > I2Elem) { 183564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant *> Mask(I1Elem); 183664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned v = 0; 183764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < I2Elem; ++v) 183864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); 183964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < I1Elem; ++v) 184064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); 184164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 184264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *NewI2 = 184364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel new ShuffleVectorInst(I2, UndefValue::get(I2T), 184464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantVector::get(Mask), 184564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getReplacementName(I, true, o, 1)); 184664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel NewI2->insertBefore(J); 184764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2 = NewI2; 184864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2T = I1T; 184964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2Elem = I1Elem; 185064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 185164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 185264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // Now that both I1 and I2 are the same length we can shuffle them 185364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // together (and use the result). 185464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant *> Mask(numElem); 185564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned v = 0; v < numElem; ++v) { 185664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (II[v].first == -1) { 185764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); 185864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 185964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel int Idx = II[v].first + II[v].second * I1Elem; 186064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), Idx); 186164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 186264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 186364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 186464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *NewOp = 186564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel new ShuffleVectorInst(I1, I2, ConstantVector::get(Mask), 186664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getReplacementName(I, true, o)); 186764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel NewOp->insertBefore(J); 186864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return NewOp; 186964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 1870de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1871de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 187264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgType = ArgTypeL; 187364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (numElemL < numElemH) { 187464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (numElemL == 1 && expandIEChain(Context, I, J, o, HOp, numElemH, 187564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ArgTypeL, VArgType, 1)) { 187664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // This is another short-circuit case: we're combining a scalar into 187764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // a vector that is formed by an IE chain. We've just expanded the IE 187864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // chain, now insert the scalar and we're done. 187964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 188064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *S = InsertElementInst::Create(HOp, LOp, CV0, 188164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getReplacementName(I, true, o)); 188264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel S->insertBefore(J); 188364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return S; 188464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else if (!expandIEChain(Context, I, J, o, LOp, numElemL, ArgTypeL, 188564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ArgTypeH)) { 188664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // The two vector inputs to the shuffle must be the same length, 188764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // so extend the smaller vector to be the same length as the larger one. 188864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *NLOp; 188964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (numElemL > 1) { 189064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 189164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant *> Mask(numElemH); 189264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned v = 0; 189364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < numElemL; ++v) 189464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); 189564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < numElemH; ++v) 189664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); 189764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 189864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel NLOp = new ShuffleVectorInst(LOp, UndefValue::get(ArgTypeL), 189964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantVector::get(Mask), 190064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getReplacementName(I, true, o, 1)); 190164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 190264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel NLOp = InsertElementInst::Create(UndefValue::get(ArgTypeH), LOp, CV0, 190364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getReplacementName(I, true, o, 1)); 190464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 190564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 190664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel NLOp->insertBefore(J); 190764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel LOp = NLOp; 190864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 190964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 191064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ArgType = ArgTypeH; 191164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else if (numElemL > numElemH) { 191264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (numElemH == 1 && expandIEChain(Context, I, J, o, LOp, numElemL, 191364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ArgTypeH, VArgType)) { 191464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *S = 191564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel InsertElementInst::Create(LOp, HOp, 191664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantInt::get(Type::getInt32Ty(Context), 191764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemL), 191864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getReplacementName(I, true, o)); 191964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel S->insertBefore(J); 192064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return S; 192164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else if (!expandIEChain(Context, I, J, o, HOp, numElemH, ArgTypeH, 192264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ArgTypeL)) { 192364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *NHOp; 192464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (numElemH > 1) { 192564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant *> Mask(numElemL); 192664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned v = 0; 192764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < numElemH; ++v) 192864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); 192964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < numElemL; ++v) 193064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); 193164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 193264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel NHOp = new ShuffleVectorInst(HOp, UndefValue::get(ArgTypeH), 193364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantVector::get(Mask), 193464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getReplacementName(I, true, o, 1)); 193564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 193664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel NHOp = InsertElementInst::Create(UndefValue::get(ArgTypeL), HOp, CV0, 193764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getReplacementName(I, true, o, 1)); 193864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 193964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 194064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel NHOp->insertBefore(J); 194164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel HOp = NHOp; 1942de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 194364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 1944de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 194564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (ArgType->isVectorTy()) { 194664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElem = cast<VectorType>(VArgType)->getNumElements(); 194764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant*> Mask(numElem); 194864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned v = 0; v < numElem; ++v) { 194964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned Idx = v; 195064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // If the low vector was expanded, we need to skip the extra 195164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // undefined entries. 195264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (v >= numElemL && numElemH > numElemL) 195364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Idx += (numElemH - numElemL); 195464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), Idx); 195564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 1956de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 195764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *BV = new ShuffleVectorInst(LOp, HOp, 195864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantVector::get(Mask), 195964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getReplacementName(I, true, o)); 1960de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BV->insertBefore(J); 1961de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return BV; 1962de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1963de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1964de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *BV1 = InsertElementInst::Create( 196564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel UndefValue::get(VArgType), LOp, CV0, 1966de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel getReplacementName(I, true, o, 1)); 1967de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BV1->insertBefore(I); 196864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *BV2 = InsertElementInst::Create(BV1, HOp, CV1, 1969de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel getReplacementName(I, true, o, 2)); 1970de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BV2->insertBefore(J); 1971de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return BV2; 1972de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1973de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1974de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function creates an array of values that will be used as the inputs 1975de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // to the vector instruction that fuses I with J. 1976de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::getReplacementInputsForPair(LLVMContext& Context, 1977de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J, 1978de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel SmallVector<Value *, 3> &ReplacedOperands, 1979282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel bool FlipMemInputs) { 1980de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned NumOperands = I->getNumOperands(); 1981de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1982de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) { 1983de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Iterate backward so that we look at the store pointer 1984de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // first and know whether or not we need to flip the inputs. 1985de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1986de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (isa<LoadInst>(I) || (o == 1 && isa<StoreInst>(I))) { 1987de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This is the pointer for a load/store instruction. 1988de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ReplacedOperands[o] = getReplacementPointerInput(Context, I, J, o, 1989de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel FlipMemInputs); 1990de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 19916173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel } else if (isa<CallInst>(I)) { 1992de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Function *F = cast<CallInst>(I)->getCalledFunction(); 1993de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned IID = F->getIntrinsicID(); 19946173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel if (o == NumOperands-1) { 19956173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel BasicBlock &BB = *I->getParent(); 1996bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng 19976173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel Module *M = BB.getParent()->getParent(); 199864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeI = I->getType(); 199964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeJ = J->getType(); 200064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); 2001bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng 20026173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel ReplacedOperands[o] = Intrinsic::getDeclaration(M, 20036173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel (Intrinsic::ID) IID, VArgType); 20046173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel continue; 20056173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel } else if (IID == Intrinsic::powi && o == 1) { 20066173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel // The second argument of powi is a single integer and we've already 20076173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel // checked that both arguments are equal. As a result, we just keep 20086173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel // I's second argument. 20096173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel ReplacedOperands[o] = I->getOperand(o); 20106173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel continue; 20116173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel } 2012de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else if (isa<ShuffleVectorInst>(I) && o == NumOperands-1) { 2013de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ReplacedOperands[o] = getReplacementShuffleMask(Context, I, J); 2014de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 2015de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2016de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2017de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ReplacedOperands[o] = 2018de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel getReplacementInput(Context, I, J, o, FlipMemInputs); 2019de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2020de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2021de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2022de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function creates two values that represent the outputs of the 2023de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // original I and J instructions. These are generally vector shuffles 2024de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // or extracts. In many cases, these will end up being unused and, thus, 2025de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // eliminated by later passes. 2026de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::replaceOutputsOfPair(LLVMContext& Context, Instruction *I, 2027de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J, Instruction *K, 2028de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *&InsertionPt, 2029de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *&K1, Instruction *&K2, 2030282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel bool FlipMemInputs) { 2031de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (isa<StoreInst>(I)) { 2032de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AA->replaceWithNewValue(I, K); 2033de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AA->replaceWithNewValue(J, K); 2034de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 2035de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *IType = I->getType(); 203664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *JType = J->getType(); 203764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 203864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel VectorType *VType = getVecTypeForPair(IType, JType); 203964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElem = VType->getNumElements(); 204064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 204164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElemI, numElemJ; 204264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (IType->isVectorTy()) 204364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemI = cast<VectorType>(IType)->getNumElements(); 204464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else 204564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemI = 1; 204664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 204764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (JType->isVectorTy()) 204864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemJ = cast<VectorType>(JType)->getNumElements(); 204964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else 205064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemJ = 1; 2051de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2052de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (IType->isVectorTy()) { 205364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant*> Mask1(numElemI), Mask2(numElemI); 205464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned v = 0; v < numElemI; ++v) { 205564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v); 205664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemJ+v); 205764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 2058de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 205964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel K1 = new ShuffleVectorInst(K, UndefValue::get(VType), 206064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantVector::get( 206164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel FlipMemInputs ? Mask2 : Mask1), 206264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getReplacementName(K, false, 1)); 2063de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 206464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0); 206564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem-1); 2066de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K1 = ExtractElementInst::Create(K, FlipMemInputs ? CV1 : CV0, 2067de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel getReplacementName(K, false, 1)); 206864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 206964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 207064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (JType->isVectorTy()) { 207164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant*> Mask1(numElemJ), Mask2(numElemJ); 207264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned v = 0; v < numElemJ; ++v) { 207364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v); 207464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemI+v); 207564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 207664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 207764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel K2 = new ShuffleVectorInst(K, UndefValue::get(VType), 207864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantVector::get( 207964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel FlipMemInputs ? Mask1 : Mask2), 208064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getReplacementName(K, false, 2)); 208164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 208264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0); 208364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem-1); 2084de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K2 = ExtractElementInst::Create(K, FlipMemInputs ? CV0 : CV1, 2085de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel getReplacementName(K, false, 2)); 2086de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2087de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2088de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K1->insertAfter(K); 2089de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K2->insertAfter(K1); 2090de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel InsertionPt = K2; 2091de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2092de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2093de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2094de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Move all uses of the function I (including pairing-induced uses) after J. 2095de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::canMoveUsesOfIAfterJ(BasicBlock &BB, 2096de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &LoadMoveSet, 2097de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J) { 2098de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Skip to the first instruction past I. 2099ded681d2725907c7de9db53d59cee0c51fad6fcbBenjamin Kramer BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I)); 2100de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2101de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> Users; 2102de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker WriteSet(*AA); 2103de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (; cast<Instruction>(L) != J; ++L) 2104de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel (void) trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSet); 2105de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2106de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel assert(cast<Instruction>(L) == J && 2107de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel "Tracking has not proceeded far enough to check for dependencies"); 2108de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // If J is now in the use set of I, then trackUsesOfI will return true 2109de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // and we have a dependency cycle (and the fusing operation must abort). 2110de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return !trackUsesOfI(Users, WriteSet, I, J, true, &LoadMoveSet); 2111de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2112de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2113de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Move all uses of the function I (including pairing-induced uses) after J. 2114de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::moveUsesOfIAfterJ(BasicBlock &BB, 2115de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &LoadMoveSet, 2116de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *&InsertionPt, 2117de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J) { 2118de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Skip to the first instruction past I. 2119ded681d2725907c7de9db53d59cee0c51fad6fcbBenjamin Kramer BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I)); 2120de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2121de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> Users; 2122de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker WriteSet(*AA); 2123de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (; cast<Instruction>(L) != J;) { 2124de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSet)) { 2125de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Move this instruction 2126de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *InstToMove = L; ++L; 2127de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2128de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: moving: " << *InstToMove << 2129de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " to after " << *InsertionPt << "\n"); 2130de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel InstToMove->removeFromParent(); 2131de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel InstToMove->insertAfter(InsertionPt); 2132de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel InsertionPt = InstToMove; 2133de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 2134de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++L; 2135de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2136de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2137de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2138de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2139de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Collect all load instruction that are in the move set of a given first 2140de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pair member. These loads depend on the first instruction, I, and so need 2141de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // to be moved after J (the second instruction) when the pair is fused. 2142de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::collectPairLoadMoveSet(BasicBlock &BB, 2143de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 2144de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &LoadMoveSet, 2145de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I) { 2146de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Skip to the first instruction past I. 2147ded681d2725907c7de9db53d59cee0c51fad6fcbBenjamin Kramer BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I)); 2148de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2149de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> Users; 2150de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker WriteSet(*AA); 2151de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2152de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Note: We cannot end the loop when we reach J because J could be moved 2153de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // farther down the use chain by another instruction pairing. Also, J 2154de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // could be before I if this is an inverted input. 2155de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (BasicBlock::iterator E = BB.end(); cast<Instruction>(L) != E; ++L) { 2156de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (trackUsesOfI(Users, WriteSet, I, L)) { 2157de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (L->mayReadFromMemory()) 2158de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel LoadMoveSet.insert(ValuePair(L, I)); 2159de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2160de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2161de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2162de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2163de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // In cases where both load/stores and the computation of their pointers 2164de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // are chosen for vectorization, we can end up in a situation where the 2165de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // aliasing analysis starts returning different query results as the 2166de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // process of fusing instruction pairs continues. Because the algorithm 2167de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // relies on finding the same use trees here as were found earlier, we'll 2168de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // need to precompute the necessary aliasing information here and then 2169de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // manually update it during the fusion process. 2170de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::collectLoadMoveSet(BasicBlock &BB, 2171de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 2172de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 2173de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &LoadMoveSet) { 2174de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::vector<Value *>::iterator PI = PairableInsts.begin(), 2175de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PIE = PairableInsts.end(); PI != PIE; ++PI) { 2176de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(*PI); 2177de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (P == ChosenPairs.end()) continue; 2178de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2179de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I = cast<Instruction>(P->first); 2180de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel collectPairLoadMoveSet(BB, ChosenPairs, LoadMoveSet, I); 2181de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2182de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2183de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2184282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel // As with the aliasing information, SCEV can also change because of 2185282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel // vectorization. This information is used to compute relative pointer 2186282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel // offsets; the necessary information will be cached here prior to 2187282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel // fusion. 2188282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel void BBVectorize::collectPtrInfo(std::vector<Value *> &PairableInsts, 2189282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel DenseMap<Value *, Value *> &ChosenPairs, 2190282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel DenseSet<Value *> &LowPtrInsts) { 2191282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel for (std::vector<Value *>::iterator PI = PairableInsts.begin(), 2192282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel PIE = PairableInsts.end(); PI != PIE; ++PI) { 2193282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(*PI); 2194282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel if (P == ChosenPairs.end()) continue; 2195282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel 2196282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel Instruction *I = cast<Instruction>(P->first); 2197282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel Instruction *J = cast<Instruction>(P->second); 2198282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel 2199282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel if (!isa<LoadInst>(I) && !isa<StoreInst>(I)) 2200282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel continue; 2201282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel 2202282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel Value *IPtr, *JPtr; 2203282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel unsigned IAlignment, JAlignment; 2204282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel int64_t OffsetInElmts; 2205282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel if (!getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, 2206282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel OffsetInElmts) || abs64(OffsetInElmts) != 1) 2207282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel llvm_unreachable("Pre-fusion pointer analysis failed"); 2208282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel 2209282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel Value *LowPI = (OffsetInElmts > 0) ? I : J; 2210282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel LowPtrInsts.insert(LowPI); 2211282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel } 2212282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel } 2213282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel 2214ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel // When the first instruction in each pair is cloned, it will inherit its 2215ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel // parent's metadata. This metadata must be combined with that of the other 2216ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel // instruction in a safe way. 2217ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel void BBVectorize::combineMetadata(Instruction *K, const Instruction *J) { 2218ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel SmallVector<std::pair<unsigned, MDNode*>, 4> Metadata; 2219ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel K->getAllMetadataOtherThanDebugLoc(Metadata); 2220ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel for (unsigned i = 0, n = Metadata.size(); i < n; ++i) { 2221ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel unsigned Kind = Metadata[i].first; 2222ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel MDNode *JMD = J->getMetadata(Kind); 2223ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel MDNode *KMD = Metadata[i].second; 2224ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel 2225ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel switch (Kind) { 2226ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel default: 2227ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel K->setMetadata(Kind, 0); // Remove unknown metadata 2228ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel break; 2229ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel case LLVMContext::MD_tbaa: 2230ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel K->setMetadata(Kind, MDNode::getMostGenericTBAA(JMD, KMD)); 2231ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel break; 2232ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel case LLVMContext::MD_fpmath: 2233ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel K->setMetadata(Kind, MDNode::getMostGenericFPMath(JMD, KMD)); 2234ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel break; 2235ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel } 2236ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel } 2237ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel } 2238ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel 2239de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function fuses the chosen instruction pairs into vector instructions, 2240de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // taking care preserve any needed scalar outputs and, then, it reorders the 2241de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // remaining instructions as needed (users of the first member of the pair 2242de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // need to be moved to after the location of the second member of the pair 2243de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // because the vector instruction is inserted in the location of the pair's 2244de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // second member). 2245de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::fuseChosenPairs(BasicBlock &BB, 2246de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 2247de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs) { 2248de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel LLVMContext& Context = BB.getContext(); 2249de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2250de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // During the vectorization process, the order of the pairs to be fused 2251de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // could be flipped. So we'll add each pair, flipped, into the ChosenPairs 2252de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // list. After a pair is fused, the flipped pair is removed from the list. 2253de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<ValuePair> FlippedPairs; 2254de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel FlippedPairs.reserve(ChosenPairs.size()); 2255de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseMap<Value *, Value *>::iterator P = ChosenPairs.begin(), 2256de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = ChosenPairs.end(); P != E; ++P) 2257de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel FlippedPairs.push_back(ValuePair(P->second, P->first)); 2258de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::vector<ValuePair>::iterator P = FlippedPairs.begin(), 2259de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = FlippedPairs.end(); P != E; ++P) 2260de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairs.insert(*P); 2261de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2262de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> LoadMoveSet; 2263de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel collectLoadMoveSet(BB, PairableInsts, ChosenPairs, LoadMoveSet); 2264de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2265282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel DenseSet<Value *> LowPtrInsts; 2266282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel collectPtrInfo(PairableInsts, ChosenPairs, LowPtrInsts); 2267282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel 2268de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n"); 2269de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2270de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) { 2271de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(PI); 2272de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (P == ChosenPairs.end()) { 2273de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++PI; 2274de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 2275de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2276de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2277de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (getDepthFactor(P->first) == 0) { 2278de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // These instructions are not really fused, but are tracked as though 2279de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // they are. Any case in which it would be interesting to fuse them 2280de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // will be taken care of by InstCombine. 2281de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel --NumFusedOps; 2282de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++PI; 2283de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 2284de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2285de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2286de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I = cast<Instruction>(P->first), 2287de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel *J = cast<Instruction>(P->second); 2288de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2289de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: fusing: " << *I << 2290de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " <-> " << *J << "\n"); 2291de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2292de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Remove the pair and flipped pair from the list. 2293de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *>::iterator FP = ChosenPairs.find(P->second); 2294de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel assert(FP != ChosenPairs.end() && "Flipped pair not found in list"); 2295de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairs.erase(FP); 2296de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairs.erase(P); 2297de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2298de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!canMoveUsesOfIAfterJ(BB, LoadMoveSet, I, J)) { 2299de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: fusion of: " << *I << 2300de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " <-> " << *J << 2301de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " aborted because of non-trivial dependency cycle\n"); 2302de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel --NumFusedOps; 2303de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++PI; 2304de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 2305de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2306de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2307282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel bool FlipMemInputs = false; 2308282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel if (isa<LoadInst>(I) || isa<StoreInst>(I)) 2309282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel FlipMemInputs = (LowPtrInsts.find(I) == LowPtrInsts.end()); 2310282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel 2311de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned NumOperands = I->getNumOperands(); 2312de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel SmallVector<Value *, 3> ReplacedOperands(NumOperands); 2313de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel getReplacementInputsForPair(Context, I, J, ReplacedOperands, 2314de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel FlipMemInputs); 2315de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2316de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Make a copy of the original operation, change its type to the vector 2317de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // type and replace its operands with the vector operands. 2318de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *K = I->clone(); 2319de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (I->hasName()) K->takeName(I); 2320de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2321de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!isa<StoreInst>(K)) 232264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel K->mutateType(getVecTypeForPair(I->getType(), J->getType())); 2323de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2324ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel combineMetadata(K, J); 2325ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel 2326de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (unsigned o = 0; o < NumOperands; ++o) 2327de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K->setOperand(o, ReplacedOperands[o]); 2328de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2329de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // If we've flipped the memory inputs, make sure that we take the correct 2330de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // alignment. 2331de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (FlipMemInputs) { 2332de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (isa<StoreInst>(K)) 2333de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cast<StoreInst>(K)->setAlignment(cast<StoreInst>(J)->getAlignment()); 2334de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel else 2335de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cast<LoadInst>(K)->setAlignment(cast<LoadInst>(J)->getAlignment()); 2336de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2337de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2338de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K->insertAfter(J); 2339de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2340de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Instruction insertion point: 2341de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *InsertionPt = K; 2342de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *K1 = 0, *K2 = 0; 2343de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel replaceOutputsOfPair(Context, I, J, K, InsertionPt, K1, K2, 2344de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel FlipMemInputs); 2345de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2346de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // The use tree of the first original instruction must be moved to after 2347de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the location of the second instruction. The entire use tree of the 2348de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // first instruction is disjoint from the input tree of the second 2349de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // (by definition), and so commutes with it. 2350de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2351de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel moveUsesOfIAfterJ(BB, LoadMoveSet, InsertionPt, I, J); 2352de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2353de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!isa<StoreInst>(I)) { 2354de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel I->replaceAllUsesWith(K1); 2355de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel J->replaceAllUsesWith(K2); 2356de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AA->replaceWithNewValue(I, K1); 2357de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AA->replaceWithNewValue(J, K2); 2358de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2359de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2360de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Instructions that may read from memory may be in the load move set. 2361de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Once an instruction is fused, we no longer need its move set, and so 2362de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the values of the map never need to be updated. However, when a load 2363de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // is fused, we need to merge the entries from both instructions in the 2364de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pair in case those instructions were in the move set of some other 2365de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // yet-to-be-fused pair. The loads in question are the keys of the map. 2366de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (I->mayReadFromMemory()) { 2367de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<ValuePair> NewSetMembers; 2368de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair IPairRange = LoadMoveSet.equal_range(I); 2369de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair JPairRange = LoadMoveSet.equal_range(J); 2370de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<Value *, Value *>::iterator N = IPairRange.first; 2371de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel N != IPairRange.second; ++N) 2372de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel NewSetMembers.push_back(ValuePair(K, N->second)); 2373de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<Value *, Value *>::iterator N = JPairRange.first; 2374de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel N != JPairRange.second; ++N) 2375de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel NewSetMembers.push_back(ValuePair(K, N->second)); 2376de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::vector<ValuePair>::iterator A = NewSetMembers.begin(), 2377de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AE = NewSetMembers.end(); A != AE; ++A) 2378de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel LoadMoveSet.insert(*A); 2379de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2380de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2381de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Before removing I, set the iterator to the next instruction. 2382de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PI = llvm::next(BasicBlock::iterator(I)); 2383de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (cast<Instruction>(PI) == J) 2384de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++PI; 2385de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2386de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel SE->forgetValue(I); 2387de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel SE->forgetValue(J); 2388de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel I->eraseFromParent(); 2389de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel J->eraseFromParent(); 2390de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2391de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2392de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: final: \n" << BB << "\n"); 2393de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2394de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel} 2395de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2396de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelchar BBVectorize::ID = 0; 2397de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic const char bb_vectorize_name[] = "Basic-Block Vectorization"; 2398de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelINITIALIZE_PASS_BEGIN(BBVectorize, BBV_NAME, bb_vectorize_name, false, false) 2399de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelINITIALIZE_AG_DEPENDENCY(AliasAnalysis) 2400de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelINITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 2401de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelINITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false) 2402de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2403bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin ZhengBasicBlockPass *llvm::createBBVectorizePass(const VectorizeConfig &C) { 2404bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng return new BBVectorize(C); 2405de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel} 2406de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2407bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zhengbool 2408bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zhengllvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB, const VectorizeConfig &C) { 2409bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng BBVectorize BBVectorizer(P, C); 241087825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng return BBVectorizer.vectorizeBB(BB); 241187825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng} 2412bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng 2413bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng//===----------------------------------------------------------------------===// 2414bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin ZhengVectorizeConfig::VectorizeConfig() { 2415bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng VectorBits = ::VectorBits; 2416768edf3cd037aab10391abc279f71470df8e3156Hal Finkel VectorizeBools = !::NoBools; 241786312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeInts = !::NoInts; 241886312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeFloats = !::NoFloats; 2419f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel VectorizePointers = !::NoPointers; 242086312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeCasts = !::NoCasts; 242186312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeMath = !::NoMath; 242286312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeFMA = !::NoFMA; 2423fc3665c87519850f629c9565535e3be447e10addHal Finkel VectorizeSelect = !::NoSelect; 2424e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel VectorizeCmp = !::NoCmp; 2425f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel VectorizeGEP = !::NoGEP; 242686312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeMemOps = !::NoMemOps; 2427bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng AlignedOnly = ::AlignedOnly; 2428bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng ReqChainDepth= ::ReqChainDepth; 2429bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng SearchLimit = ::SearchLimit; 2430bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng MaxCandPairsForCycleCheck = ::MaxCandPairsForCycleCheck; 2431bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng SplatBreaksChain = ::SplatBreaksChain; 2432bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng MaxInsts = ::MaxInsts; 2433bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng MaxIter = ::MaxIter; 243464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Pow2LenOnly = ::Pow2LenOnly; 2435bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng NoMemOpBoost = ::NoMemOpBoost; 2436bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng FastDep = ::FastDep; 2437bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng} 2438