BBVectorize.cpp revision 4387b8c95971a512e07bfda30dea6459e8419e8f
1de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel//===- BBVectorize.cpp - A Basic-Block Vectorizer -------------------------===// 2de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// 3de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// The LLVM Compiler Infrastructure 4de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// 5de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// This file is distributed under the University of Illinois Open Source 6de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// License. See LICENSE.TXT for details. 7de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// 8de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel//===----------------------------------------------------------------------===// 9de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// 10de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// This file implements a basic-block vectorization pass. The algorithm was 11de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// inspired by that used by the Vienna MAP Vectorizor by Franchetti and Kral, 12de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// et al. It works by looking for chains of pairable operations and then 13de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// pairing them. 14de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// 15de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel//===----------------------------------------------------------------------===// 16de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 17de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#define BBV_NAME "bb-vectorize" 18de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#define DEBUG_TYPE BBV_NAME 19de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Constants.h" 20de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/DerivedTypes.h" 21de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Function.h" 22de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Instructions.h" 23de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/IntrinsicInst.h" 24de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Intrinsics.h" 25de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/LLVMContext.h" 26ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel#include "llvm/Metadata.h" 27de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Pass.h" 28de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Type.h" 29de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/DenseMap.h" 30de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/DenseSet.h" 3186c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel#include "llvm/ADT/SmallSet.h" 32de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/SmallVector.h" 33de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/Statistic.h" 34de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/STLExtras.h" 35de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/StringExtras.h" 36de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/AliasAnalysis.h" 37de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/AliasSetTracker.h" 38e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel#include "llvm/Analysis/Dominators.h" 39de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/ScalarEvolution.h" 40de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/ScalarEvolutionExpressions.h" 41de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/ValueTracking.h" 42de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Support/CommandLine.h" 43de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Support/Debug.h" 44de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Support/raw_ostream.h" 45de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Support/ValueHandle.h" 463574eca1b02600bac4e625297f4ecf745f4c4f32Micah Villmow#include "llvm/DataLayout.h" 4765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel#include "llvm/TargetTransformInfo.h" 4864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel#include "llvm/Transforms/Utils/Local.h" 49de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Transforms/Vectorize.h" 50de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include <algorithm> 51de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include <map> 52de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelusing namespace llvm; 53de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 5465309660fa61a837cc05323f69c618a7d8134d56Hal Finkelstatic cl::opt<bool> 5565309660fa61a837cc05323f69c618a7d8134d56Hal FinkelIgnoreTargetInfo("bb-vectorize-ignore-target-info", cl::init(false), 5665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel cl::Hidden, cl::desc("Ignore target information")); 5765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel 58de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned> 59de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelReqChainDepth("bb-vectorize-req-chain-depth", cl::init(6), cl::Hidden, 60de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("The required chain depth for vectorization")); 61de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 6278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkelstatic cl::opt<bool> 6378fd353d5e5daedc47ecc31b6193ca48793c249cHal FinkelUseChainDepthWithTI("bb-vectorize-use-chain-depth", cl::init(false), 6478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel cl::Hidden, cl::desc("Use the chain depth requirement with" 6578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel " target information")); 6678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 67de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned> 68de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelSearchLimit("bb-vectorize-search-limit", cl::init(400), cl::Hidden, 69de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("The maximum search distance for instruction pairs")); 70de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 71de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 72de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelSplatBreaksChain("bb-vectorize-splat-breaks-chain", cl::init(false), cl::Hidden, 73de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Replicating one element to a pair breaks the chain")); 74de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 75de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned> 76de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelVectorBits("bb-vectorize-vector-bits", cl::init(128), cl::Hidden, 77de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("The size of the native vector registers")); 78de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 79de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned> 80de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelMaxIter("bb-vectorize-max-iter", cl::init(0), cl::Hidden, 81de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("The maximum number of pairing iterations")); 82de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 8364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkelstatic cl::opt<bool> 8464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal FinkelPow2LenOnly("bb-vectorize-pow2-len-only", cl::init(false), cl::Hidden, 8564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel cl::desc("Don't try to form non-2^n-length vectors")); 8664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 87de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned> 885d4e18bc39fea892f523d960213906d296d3cb38Hal FinkelMaxInsts("bb-vectorize-max-instr-per-group", cl::init(500), cl::Hidden, 895d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel cl::desc("The maximum number of pairable instructions per group")); 905d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 915d4e18bc39fea892f523d960213906d296d3cb38Hal Finkelstatic cl::opt<unsigned> 92de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelMaxCandPairsForCycleCheck("bb-vectorize-max-cycle-check-pairs", cl::init(200), 93de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::Hidden, cl::desc("The maximum number of candidate pairs with which to use" 94de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " a full cycle check")); 95de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 96de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 97768edf3cd037aab10391abc279f71470df8e3156Hal FinkelNoBools("bb-vectorize-no-bools", cl::init(false), cl::Hidden, 98768edf3cd037aab10391abc279f71470df8e3156Hal Finkel cl::desc("Don't try to vectorize boolean (i1) values")); 99768edf3cd037aab10391abc279f71470df8e3156Hal Finkel 100768edf3cd037aab10391abc279f71470df8e3156Hal Finkelstatic cl::opt<bool> 101de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoInts("bb-vectorize-no-ints", cl::init(false), cl::Hidden, 102de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize integer values")); 103de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 104de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 105de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoFloats("bb-vectorize-no-floats", cl::init(false), cl::Hidden, 106de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize floating-point values")); 107de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 108822ab00847da841a63be4e3883cb5f442dc69069Hal Finkel// FIXME: This should default to false once pointer vector support works. 109de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 110822ab00847da841a63be4e3883cb5f442dc69069Hal FinkelNoPointers("bb-vectorize-no-pointers", cl::init(/*false*/ true), cl::Hidden, 111f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel cl::desc("Don't try to vectorize pointer values")); 112f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel 113f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkelstatic cl::opt<bool> 114de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoCasts("bb-vectorize-no-casts", cl::init(false), cl::Hidden, 115de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize casting (conversion) operations")); 116de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 117de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 118de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoMath("bb-vectorize-no-math", cl::init(false), cl::Hidden, 119de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize floating-point math intrinsics")); 120de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 121de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 122de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoFMA("bb-vectorize-no-fma", cl::init(false), cl::Hidden, 123de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize the fused-multiply-add intrinsic")); 124de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 125de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 126fc3665c87519850f629c9565535e3be447e10addHal FinkelNoSelect("bb-vectorize-no-select", cl::init(false), cl::Hidden, 127fc3665c87519850f629c9565535e3be447e10addHal Finkel cl::desc("Don't try to vectorize select instructions")); 128fc3665c87519850f629c9565535e3be447e10addHal Finkel 129fc3665c87519850f629c9565535e3be447e10addHal Finkelstatic cl::opt<bool> 130e415f96b6a43ac8861148a11a4258bc38c247e8fHal FinkelNoCmp("bb-vectorize-no-cmp", cl::init(false), cl::Hidden, 131e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel cl::desc("Don't try to vectorize comparison instructions")); 132e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel 133e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkelstatic cl::opt<bool> 134f3f5a1e6f77a842ccb24cc81766437da5197d712Hal FinkelNoGEP("bb-vectorize-no-gep", cl::init(false), cl::Hidden, 135f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel cl::desc("Don't try to vectorize getelementptr instructions")); 136f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel 137f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkelstatic cl::opt<bool> 138de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoMemOps("bb-vectorize-no-mem-ops", cl::init(false), cl::Hidden, 139de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize loads and stores")); 140de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 141de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 142de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelAlignedOnly("bb-vectorize-aligned-only", cl::init(false), cl::Hidden, 143de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Only generate aligned loads and stores")); 144de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 145de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 146edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal FinkelNoMemOpBoost("bb-vectorize-no-mem-op-boost", 147edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel cl::init(false), cl::Hidden, 148edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel cl::desc("Don't boost the chain-depth contribution of loads and stores")); 149edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel 150edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkelstatic cl::opt<bool> 151de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelFastDep("bb-vectorize-fast-dep", cl::init(false), cl::Hidden, 152de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Use a fast instruction dependency analysis")); 153de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 154de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#ifndef NDEBUG 155de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 156de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelDebugInstructionExamination("bb-vectorize-debug-instruction-examination", 157de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::init(false), cl::Hidden, 158de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("When debugging is enabled, output information on the" 159de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " instruction-examination process")); 160de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 161de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelDebugCandidateSelection("bb-vectorize-debug-candidate-selection", 162de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::init(false), cl::Hidden, 163de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("When debugging is enabled, output information on the" 164de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " candidate-selection process")); 165de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 166de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelDebugPairSelection("bb-vectorize-debug-pair-selection", 167de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::init(false), cl::Hidden, 168de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("When debugging is enabled, output information on the" 169de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " pair-selection process")); 170de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 171de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelDebugCycleCheck("bb-vectorize-debug-cycle-check", 172de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::init(false), cl::Hidden, 173de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("When debugging is enabled, output information on the" 174de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " cycle-checking process")); 17572465ea23d010507d3746adc126d719005981e05Hal Finkel 17672465ea23d010507d3746adc126d719005981e05Hal Finkelstatic cl::opt<bool> 17772465ea23d010507d3746adc126d719005981e05Hal FinkelPrintAfterEveryPair("bb-vectorize-debug-print-after-every-pair", 17872465ea23d010507d3746adc126d719005981e05Hal Finkel cl::init(false), cl::Hidden, 17972465ea23d010507d3746adc126d719005981e05Hal Finkel cl::desc("When debugging is enabled, dump the basic block after" 18072465ea23d010507d3746adc126d719005981e05Hal Finkel " every pair is fused")); 181de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#endif 182de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 183de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelSTATISTIC(NumFusedOps, "Number of operations fused by bb-vectorize"); 184de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 185de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelnamespace { 186de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel struct BBVectorize : public BasicBlockPass { 187de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel static char ID; // Pass identification, replacement for typeid 188bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng 189940371bc65570ec0add1ede4f4d9f0a41ba25e09Hongbin Zheng const VectorizeConfig Config; 190bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng 191bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng BBVectorize(const VectorizeConfig &C = VectorizeConfig()) 192bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng : BasicBlockPass(ID), Config(C) { 193de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel initializeBBVectorizePass(*PassRegistry::getPassRegistry()); 194de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 195de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 196bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng BBVectorize(Pass *P, const VectorizeConfig &C) 197bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng : BasicBlockPass(ID), Config(C) { 19887825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng AA = &P->getAnalysis<AliasAnalysis>(); 199e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel DT = &P->getAnalysis<DominatorTree>(); 20087825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng SE = &P->getAnalysis<ScalarEvolution>(); 2013574eca1b02600bac4e625297f4ecf745f4c4f32Micah Villmow TD = P->getAnalysisIfAvailable<DataLayout>(); 20265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel TTI = IgnoreTargetInfo ? 0 : 20365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel P->getAnalysisIfAvailable<TargetTransformInfo>(); 20465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel VTTI = TTI ? TTI->getVectorTargetTransformInfo() : 0; 20587825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng } 20687825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng 207de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typedef std::pair<Value *, Value *> ValuePair; 20865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel typedef std::pair<ValuePair, int> ValuePairWithCost; 209de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typedef std::pair<ValuePair, size_t> ValuePairWithDepth; 210de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typedef std::pair<ValuePair, ValuePair> VPPair; // A ValuePair pair 21172465ea23d010507d3746adc126d719005981e05Hal Finkel typedef std::pair<VPPair, unsigned> VPPairWithType; 212de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typedef std::pair<std::multimap<Value *, Value *>::iterator, 213de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *>::iterator> VPIteratorPair; 214de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typedef std::pair<std::multimap<ValuePair, ValuePair>::iterator, 215de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair>::iterator> 216de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPPIteratorPair; 217de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 218de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasAnalysis *AA; 219e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel DominatorTree *DT; 220de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ScalarEvolution *SE; 2213574eca1b02600bac4e625297f4ecf745f4c4f32Micah Villmow DataLayout *TD; 22265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel TargetTransformInfo *TTI; 22365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel const VectorTargetTransformInfo *VTTI; 224de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 225de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // FIXME: const correct? 226de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 22764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool vectorizePairs(BasicBlock &BB, bool NonPow2Len = false); 228de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2295d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel bool getCandidatePairs(BasicBlock &BB, 2305d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel BasicBlock::iterator &Start, 231de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 232a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel DenseSet<ValuePair> &FixedOrderPairs, 23365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel DenseMap<ValuePair, int> &CandidatePairCostSavings, 23464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Value *> &PairableInsts, bool NonPow2Len); 235de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 23678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // FIXME: The current implementation does not account for pairs that 23778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // are connected in multiple ways. For example: 23878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // C1 = A1 / A2; C2 = A2 / A1 (which may be both direct and a swap) 23972465ea23d010507d3746adc126d719005981e05Hal Finkel enum PairConnectionType { 24072465ea23d010507d3746adc126d719005981e05Hal Finkel PairConnectionDirect, 24172465ea23d010507d3746adc126d719005981e05Hal Finkel PairConnectionSwap, 24272465ea23d010507d3746adc126d719005981e05Hal Finkel PairConnectionSplat 24372465ea23d010507d3746adc126d719005981e05Hal Finkel }; 24472465ea23d010507d3746adc126d719005981e05Hal Finkel 245de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void computeConnectedPairs(std::multimap<Value *, Value *> &CandidatePairs, 246de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 24772465ea23d010507d3746adc126d719005981e05Hal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 24872465ea23d010507d3746adc126d719005981e05Hal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes); 249de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 250de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void buildDepMap(BasicBlock &BB, 251de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 252de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 253de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers); 254de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 255de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void choosePairs(std::multimap<Value *, Value *> &CandidatePairs, 25665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel DenseMap<ValuePair, int> &CandidatePairCostSavings, 257de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 25886ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel DenseSet<ValuePair> &FixedOrderPairs, 25986ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes, 260de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 26186ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairDeps, 262de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 263de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *>& ChosenPairs); 264de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 265de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void fuseChosenPairs(BasicBlock &BB, 266de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 267a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel DenseMap<Value *, Value *>& ChosenPairs, 26872465ea23d010507d3746adc126d719005981e05Hal Finkel DenseSet<ValuePair> &FixedOrderPairs, 26972465ea23d010507d3746adc126d719005981e05Hal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes, 27072465ea23d010507d3746adc126d719005981e05Hal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 27172465ea23d010507d3746adc126d719005981e05Hal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairDeps); 27272465ea23d010507d3746adc126d719005981e05Hal Finkel 273de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 274de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore); 275de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 276de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool areInstsCompatible(Instruction *I, Instruction *J, 27765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel bool IsSimpleLoadStore, bool NonPow2Len, 278a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel int &CostSavings, int &FixedOrder); 279de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 280de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool trackUsesOfI(DenseSet<Value *> &Users, 281de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker &WriteSet, Instruction *I, 282de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J, bool UpdateUsers = true, 283de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> *LoadMoveSet = 0); 2841230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop 285de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void computePairsConnectedTo( 286de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 287de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 288de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 28972465ea23d010507d3746adc126d719005981e05Hal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes, 290de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ValuePair P); 291de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 292de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool pairsConflict(ValuePair P, ValuePair Q, 293de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 294de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> *PairableInstUserMap = 0); 295de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 296de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool pairWillFormCycle(ValuePair P, 297de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &PairableInstUsers, 298de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &CurrentPairs); 299de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 300de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void pruneTreeFor( 301de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 302de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 303de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 304de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 305de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &PairableInstUserMap, 306de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 307de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<ValuePair, size_t> &Tree, 308de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PrunedTree, ValuePair J, 309de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool UseCycleCheck); 310de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 311de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void buildInitialTreeFor( 312de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 313de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 314de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 315de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 316de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 317de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<ValuePair, size_t> &Tree, ValuePair J); 318de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 319de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void findBestTreeFor( 320de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 32165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel DenseMap<ValuePair, int> &CandidatePairCostSavings, 322de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 32386ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel DenseSet<ValuePair> &FixedOrderPairs, 32486ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes, 325de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 32686ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairDeps, 327de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 328de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &PairableInstUserMap, 329de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 330de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth, 33165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel int &BestEffSize, VPIteratorPair ChoiceRange, 332de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool UseCycleCheck); 333de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 334de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I, 335202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel Instruction *J, unsigned o); 336de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 337de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void fillNewShuffleMask(LLVMContext& Context, Instruction *J, 33864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned MaskOffset, unsigned NumInElem, 33964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned NumInElem1, unsigned IdxOffset, 34064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant*> &Mask); 341de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 342de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *getReplacementShuffleMask(LLVMContext& Context, Instruction *I, 343de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J); 344de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 34564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool expandIEChain(LLVMContext& Context, Instruction *I, Instruction *J, 34664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned o, Value *&LOp, unsigned numElemL, 34772465ea23d010507d3746adc126d719005981e05Hal Finkel Type *ArgTypeL, Type *ArgTypeR, bool IBeforeJ, 34864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned IdxOff = 0); 34964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 350de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *getReplacementInput(LLVMContext& Context, Instruction *I, 35172465ea23d010507d3746adc126d719005981e05Hal Finkel Instruction *J, unsigned o, bool IBeforeJ); 352de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 353de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void getReplacementInputsForPair(LLVMContext& Context, Instruction *I, 35472465ea23d010507d3746adc126d719005981e05Hal Finkel Instruction *J, SmallVector<Value *, 3> &ReplacedOperands, 35572465ea23d010507d3746adc126d719005981e05Hal Finkel bool IBeforeJ); 356de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 357de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void replaceOutputsOfPair(LLVMContext& Context, Instruction *I, 358de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J, Instruction *K, 359de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *&InsertionPt, Instruction *&K1, 360202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel Instruction *&K2); 361de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 362de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void collectPairLoadMoveSet(BasicBlock &BB, 363de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 364de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &LoadMoveSet, 365de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I); 366de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 367de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void collectLoadMoveSet(BasicBlock &BB, 368de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 369de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 370de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &LoadMoveSet); 371de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 372de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool canMoveUsesOfIAfterJ(BasicBlock &BB, 373de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &LoadMoveSet, 374de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J); 375de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 376de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void moveUsesOfIAfterJ(BasicBlock &BB, 377de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &LoadMoveSet, 378de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *&InsertionPt, 379de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J); 380de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 381ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel void combineMetadata(Instruction *K, const Instruction *J); 382ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel 38387825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng bool vectorizeBB(BasicBlock &BB) { 384e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel if (!DT->isReachableFromEntry(&BB)) { 385e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel DEBUG(dbgs() << "BBV: skipping unreachable " << BB.getName() << 386e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel " in " << BB.getParent()->getName() << "\n"); 387e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel return false; 388e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel } 389e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel 39065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel DEBUG(if (VTTI) dbgs() << "BBV: using target information\n"); 39165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel 392de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool changed = false; 393de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Iterate a sufficient number of times to merge types of size 1 bit, 394de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // then 2 bits, then 4, etc. up to half of the target vector width of the 395de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // target vector register. 39664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned n = 1; 39764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned v = 2; 39865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel (VTTI || v <= Config.VectorBits) && 39965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel (!Config.MaxIter || n <= Config.MaxIter); 400de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel v *= 2, ++n) { 401bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng DEBUG(dbgs() << "BBV: fusing loop #" << n << 402de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " for " << BB.getName() << " in " << 403de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BB.getParent()->getName() << "...\n"); 40486c88c938aec8006d2ce83325ec1f31e1154620bHal Finkelassert(n < 10 && "hrmm, really?"); 405de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (vectorizePairs(BB)) 406de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel changed = true; 407de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel else 408de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 409de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 410de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 41164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (changed && !Pow2LenOnly) { 41264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ++n; 41364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; !Config.MaxIter || n <= Config.MaxIter; ++n) { 41464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel DEBUG(dbgs() << "BBV: fusing for non-2^n-length vectors loop #: " << 41564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel n << " for " << BB.getName() << " in " << 41664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel BB.getParent()->getName() << "...\n"); 41764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!vectorizePairs(BB, true)) break; 41864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 41964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 42064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 421de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: done!\n"); 422de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return changed; 423de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 424de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 42587825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng virtual bool runOnBasicBlock(BasicBlock &BB) { 42687825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng AA = &getAnalysis<AliasAnalysis>(); 427e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel DT = &getAnalysis<DominatorTree>(); 42887825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng SE = &getAnalysis<ScalarEvolution>(); 4293574eca1b02600bac4e625297f4ecf745f4c4f32Micah Villmow TD = getAnalysisIfAvailable<DataLayout>(); 43065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel TTI = IgnoreTargetInfo ? 0 : 43165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel getAnalysisIfAvailable<TargetTransformInfo>(); 43265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel VTTI = TTI ? TTI->getVectorTargetTransformInfo() : 0; 43387825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng 43487825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng return vectorizeBB(BB); 43587825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng } 43687825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng 437de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel virtual void getAnalysisUsage(AnalysisUsage &AU) const { 438de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BasicBlockPass::getAnalysisUsage(AU); 439de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AU.addRequired<AliasAnalysis>(); 440e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel AU.addRequired<DominatorTree>(); 441de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AU.addRequired<ScalarEvolution>(); 442de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AU.addPreserved<AliasAnalysis>(); 443e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel AU.addPreserved<DominatorTree>(); 444de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AU.addPreserved<ScalarEvolution>(); 4457e004d177fe76145f75a9417ed2e281f1b9abaf7Hal Finkel AU.setPreservesCFG(); 446de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 447de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 44864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel static inline VectorType *getVecTypeForPair(Type *ElemTy, Type *Elem2Ty) { 44964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel assert(ElemTy->getScalarType() == Elem2Ty->getScalarType() && 45064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel "Cannot form vector from incompatible scalar types"); 45164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *STy = ElemTy->getScalarType(); 45264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 45364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElem; 454de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (VectorType *VTy = dyn_cast<VectorType>(ElemTy)) { 45564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElem = VTy->getNumElements(); 45664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 45764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElem = 1; 45864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 45964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 46064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (VectorType *VTy = dyn_cast<VectorType>(Elem2Ty)) { 46164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElem += VTy->getNumElements(); 46264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 46364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElem += 1; 464de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 4657e004d177fe76145f75a9417ed2e281f1b9abaf7Hal Finkel 46664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return VectorType::get(STy, numElem); 46764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 46864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 46964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel static inline void getInstructionTypes(Instruction *I, 47064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *&T1, Type *&T2) { 47164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (isa<StoreInst>(I)) { 47264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // For stores, it is the value type, not the pointer type that matters 47364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // because the value is what will come from a vector register. 47464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 47564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *IVal = cast<StoreInst>(I)->getValueOperand(); 47664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel T1 = IVal->getType(); 47764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 47864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel T1 = I->getType(); 47964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 48064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 48164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (I->isCast()) 48264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel T2 = cast<CastInst>(I)->getSrcTy(); 48364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else 48464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel T2 = T1; 48565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel 48665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (SelectInst *SI = dyn_cast<SelectInst>(I)) { 48765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel T2 = SI->getCondition()->getType(); 4888b9796f4f83bea2bbefcd2822eb574abdb7f3d1bHal Finkel } else if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(I)) { 4898b9796f4f83bea2bbefcd2822eb574abdb7f3d1bHal Finkel T2 = SI->getOperand(0)->getType(); 4905094257518ea7b615d87ef5bea657625ffa81991Hal Finkel } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) { 4915094257518ea7b615d87ef5bea657625ffa81991Hal Finkel T2 = CI->getOperand(0)->getType(); 49265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel } 493de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 494de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 495de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns the weight associated with the provided value. A chain of 496de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // candidate pairs has a length given by the sum of the weights of its 497de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // members (one weight per pair; the weight of each member of the pair 498de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // is assumed to be the same). This length is then compared to the 499de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // chain-length threshold to determine if a given chain is significant 500de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // enough to be vectorized. The length is also used in comparing 501de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // candidate chains where longer chains are considered to be better. 502de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Note: when this function returns 0, the resulting instructions are 503de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // not actually fused. 504bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng inline size_t getDepthFactor(Value *V) { 505de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // InsertElement and ExtractElement have a depth factor of zero. This is 506de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // for two reasons: First, they cannot be usefully fused. Second, because 507de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the pass generates a lot of these, they can confuse the simple metric 508de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // used to compare the trees in the next iteration. Thus, giving them a 509de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // weight of zero allows the pass to essentially ignore them in 510de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // subsequent iterations when looking for vectorization opportunities 511de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // while still tracking dependency chains that flow through those 512de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // instructions. 513de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (isa<InsertElementInst>(V) || isa<ExtractElementInst>(V)) 514de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return 0; 515de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 516edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel // Give a load or store half of the required depth so that load/store 517edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel // pairs will vectorize. 518bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (!Config.NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V))) 519bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng return Config.ReqChainDepth/2; 520edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel 521de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return 1; 522de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 523de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 52446fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel // Returns the cost of the provided instruction using VTTI. 52546fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel // This does not handle loads and stores. 52646fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel unsigned getInstrCost(unsigned Opcode, Type *T1, Type *T2) { 52746fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel switch (Opcode) { 52846fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel default: break; 52946fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::GetElementPtr: 53046fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel // We mark this instruction as zero-cost because scalar GEPs are usually 53146fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel // lowered to the intruction addressing mode. At the moment we don't 53246fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel // generate vector GEPs. 53346fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel return 0; 53446fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::Br: 53546fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel return VTTI->getCFInstrCost(Opcode); 53646fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::PHI: 53746fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel return 0; 53846fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::Add: 53946fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FAdd: 54046fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::Sub: 54146fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FSub: 54246fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::Mul: 54346fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FMul: 54446fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::UDiv: 54546fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::SDiv: 54646fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FDiv: 54746fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::URem: 54846fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::SRem: 54946fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FRem: 55046fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::Shl: 55146fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::LShr: 55246fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::AShr: 55346fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::And: 55446fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::Or: 55546fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::Xor: 55646fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel return VTTI->getArithmeticInstrCost(Opcode, T1); 55746fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::Select: 55846fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::ICmp: 55946fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FCmp: 56046fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel return VTTI->getCmpSelInstrCost(Opcode, T1, T2); 56146fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::ZExt: 56246fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::SExt: 56346fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FPToUI: 56446fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FPToSI: 56546fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FPExt: 56646fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::PtrToInt: 56746fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::IntToPtr: 56846fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::SIToFP: 56946fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::UIToFP: 57046fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::Trunc: 57146fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FPTrunc: 57246fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::BitCast: 57386ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel case Instruction::ShuffleVector: 57446fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel return VTTI->getCastInstrCost(Opcode, T1, T2); 57546fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel } 57646fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel 57746fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel return 1; 57846fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel } 57946fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel 580de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This determines the relative offset of two loads or stores, returning 581de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // true if the offset could be determined to be some constant value. 582de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // For example, if OffsetInElmts == 1, then J accesses the memory directly 583de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // after I; if OffsetInElmts == -1 then I accesses the memory 58464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // directly after J. 585de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool getPairPtrInfo(Instruction *I, Instruction *J, 586de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *&IPtr, Value *&JPtr, unsigned &IAlignment, unsigned &JAlignment, 58765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel unsigned &IAddressSpace, unsigned &JAddressSpace, 58893f6f457614299eee3d22f376ab8f42a130f1912Hal Finkel int64_t &OffsetInElmts, bool ComputeOffset = true) { 589de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel OffsetInElmts = 0; 59065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 59165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel LoadInst *LJ = cast<LoadInst>(J); 59265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel IPtr = LI->getPointerOperand(); 59365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel JPtr = LJ->getPointerOperand(); 59465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel IAlignment = LI->getAlignment(); 59565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel JAlignment = LJ->getAlignment(); 59665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel IAddressSpace = LI->getPointerAddressSpace(); 59765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel JAddressSpace = LJ->getPointerAddressSpace(); 598de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 59965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel StoreInst *SI = cast<StoreInst>(I), *SJ = cast<StoreInst>(J); 60065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel IPtr = SI->getPointerOperand(); 60165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel JPtr = SJ->getPointerOperand(); 60265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel IAlignment = SI->getAlignment(); 60365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel JAlignment = SJ->getAlignment(); 60465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel IAddressSpace = SI->getPointerAddressSpace(); 60565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel JAddressSpace = SJ->getPointerAddressSpace(); 606de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 607de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 60893f6f457614299eee3d22f376ab8f42a130f1912Hal Finkel if (!ComputeOffset) 60993f6f457614299eee3d22f376ab8f42a130f1912Hal Finkel return true; 61093f6f457614299eee3d22f376ab8f42a130f1912Hal Finkel 611de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel const SCEV *IPtrSCEV = SE->getSCEV(IPtr); 612de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel const SCEV *JPtrSCEV = SE->getSCEV(JPtr); 613de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 614de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // If this is a trivial offset, then we'll get something like 615de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // 1*sizeof(type). With target data, which we need anyway, this will get 616de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // constant folded into a number. 617de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel const SCEV *OffsetSCEV = SE->getMinusSCEV(JPtrSCEV, IPtrSCEV); 618de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (const SCEVConstant *ConstOffSCEV = 619de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel dyn_cast<SCEVConstant>(OffsetSCEV)) { 620de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ConstantInt *IntOff = ConstOffSCEV->getValue(); 621de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel int64_t Offset = IntOff->getSExtValue(); 622de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 623de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *VTy = cast<PointerType>(IPtr->getType())->getElementType(); 624de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel int64_t VTyTSS = (int64_t) TD->getTypeStoreSize(VTy); 625de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 62664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *VTy2 = cast<PointerType>(JPtr->getType())->getElementType(); 62764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (VTy != VTy2 && Offset < 0) { 62864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel int64_t VTy2TSS = (int64_t) TD->getTypeStoreSize(VTy2); 62964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel OffsetInElmts = Offset/VTy2TSS; 63064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return (abs64(Offset) % VTy2TSS) == 0; 63164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 632de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 633de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel OffsetInElmts = Offset/VTyTSS; 634de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return (abs64(Offset) % VTyTSS) == 0; 635de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 636de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 637de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 638de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 639de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 640de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns true if the provided CallInst represents an intrinsic that can 641de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // be vectorized. 642de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool isVectorizableIntrinsic(CallInst* I) { 643de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Function *F = I->getCalledFunction(); 644de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!F) return false; 645de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 646de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned IID = F->getIntrinsicID(); 647de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!IID) return false; 648de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 649de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel switch(IID) { 650de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel default: 651de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 652de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::sqrt: 653de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::powi: 654de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::sin: 655de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::cos: 656de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::log: 657de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::log2: 658de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::log10: 659de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::exp: 660de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::exp2: 661de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::pow: 66286312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng return Config.VectorizeMath; 663de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::fma: 66486312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng return Config.VectorizeFMA; 665de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 666de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 667de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 668de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns true if J is the second element in some pair referenced by 669de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // some multimap pair iterator pair. 670de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel template <typename V> 671de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool isSecondInIteratorPair(V J, std::pair< 672de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typename std::multimap<V, V>::iterator, 673de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typename std::multimap<V, V>::iterator> PairRange) { 674de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (typename std::multimap<V, V>::iterator K = PairRange.first; 675de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K != PairRange.second; ++K) 676de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (K->second == J) return true; 677de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 678de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 679de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 680b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel 681b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel bool isPureIEChain(InsertElementInst *IE) { 682b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel InsertElementInst *IENext = IE; 683b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel do { 684b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel if (!isa<UndefValue>(IENext->getOperand(0)) && 685b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel !isa<InsertElementInst>(IENext->getOperand(0))) { 686b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel return false; 687b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel } 688b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel } while ((IENext = 689b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel dyn_cast<InsertElementInst>(IENext->getOperand(0)))); 690b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel 691b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel return true; 692b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel } 693de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel }; 694de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 695de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function implements one vectorization iteration on the provided 696de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // basic block. It returns true if the block is changed. 69764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool BBVectorize::vectorizePairs(BasicBlock &BB, bool NonPow2Len) { 6985d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel bool ShouldContinue; 6995d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel BasicBlock::iterator Start = BB.getFirstInsertionPt(); 7005d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 7015d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel std::vector<Value *> AllPairableInsts; 7025d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel DenseMap<Value *, Value *> AllChosenPairs; 703a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel DenseSet<ValuePair> AllFixedOrderPairs; 70472465ea23d010507d3746adc126d719005981e05Hal Finkel DenseMap<VPPair, unsigned> AllPairConnectionTypes; 70572465ea23d010507d3746adc126d719005981e05Hal Finkel std::multimap<ValuePair, ValuePair> AllConnectedPairs, AllConnectedPairDeps; 7065d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 7075d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel do { 7085d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel std::vector<Value *> PairableInsts; 7095d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel std::multimap<Value *, Value *> CandidatePairs; 710a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel DenseSet<ValuePair> FixedOrderPairs; 71165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel DenseMap<ValuePair, int> CandidatePairCostSavings; 7125d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs, 713a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel FixedOrderPairs, 71465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel CandidatePairCostSavings, 71564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel PairableInsts, NonPow2Len); 7165d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (PairableInsts.empty()) continue; 7173706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 7185d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // Now we have a map of all of the pairable instructions and we need to 7195d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // select the best possible pairing. A good pairing is one such that the 7205d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // users of the pair are also paired. This defines a (directed) forest 72194c22716d60ff5edf6a98a3c67e0faa001be1142Sylvestre Ledru // over the pairs such that two pairs are connected iff the second pair 7225d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // uses the first. 7233706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 7245d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // Note that it only matters that both members of the second pair use some 7255d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // element of the first pair (to allow for splatting). 7263706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 72772465ea23d010507d3746adc126d719005981e05Hal Finkel std::multimap<ValuePair, ValuePair> ConnectedPairs, ConnectedPairDeps; 72872465ea23d010507d3746adc126d719005981e05Hal Finkel DenseMap<VPPair, unsigned> PairConnectionTypes; 72972465ea23d010507d3746adc126d719005981e05Hal Finkel computeConnectedPairs(CandidatePairs, PairableInsts, ConnectedPairs, 73072465ea23d010507d3746adc126d719005981e05Hal Finkel PairConnectionTypes); 7315d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (ConnectedPairs.empty()) continue; 7323706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 73372465ea23d010507d3746adc126d719005981e05Hal Finkel for (std::multimap<ValuePair, ValuePair>::iterator 73472465ea23d010507d3746adc126d719005981e05Hal Finkel I = ConnectedPairs.begin(), IE = ConnectedPairs.end(); 73572465ea23d010507d3746adc126d719005981e05Hal Finkel I != IE; ++I) { 73672465ea23d010507d3746adc126d719005981e05Hal Finkel ConnectedPairDeps.insert(VPPair(I->second, I->first)); 73772465ea23d010507d3746adc126d719005981e05Hal Finkel } 73872465ea23d010507d3746adc126d719005981e05Hal Finkel 7395d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // Build the pairable-instruction dependency map 7405d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel DenseSet<ValuePair> PairableInstUsers; 7415d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel buildDepMap(BB, CandidatePairs, PairableInsts, PairableInstUsers); 7423706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 74335564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel // There is now a graph of the connected pairs. For each variable, pick 74435564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel // the pairing with the largest tree meeting the depth requirement on at 74535564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel // least one branch. Then select all pairings that are part of that tree 74635564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel // and remove them from the list of available pairings and pairable 74735564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel // variables. 7483706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 7495d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel DenseMap<Value *, Value *> ChosenPairs; 75065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel choosePairs(CandidatePairs, CandidatePairCostSavings, 75186ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel PairableInsts, FixedOrderPairs, PairConnectionTypes, 75286ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel ConnectedPairs, ConnectedPairDeps, 7535d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel PairableInstUsers, ChosenPairs); 7543706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 7555d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (ChosenPairs.empty()) continue; 7565d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel AllPairableInsts.insert(AllPairableInsts.end(), PairableInsts.begin(), 7575d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel PairableInsts.end()); 7585d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel AllChosenPairs.insert(ChosenPairs.begin(), ChosenPairs.end()); 759a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel 76072465ea23d010507d3746adc126d719005981e05Hal Finkel // Only for the chosen pairs, propagate information on fixed-order pairs, 76172465ea23d010507d3746adc126d719005981e05Hal Finkel // pair connections, and their types to the data structures used by the 76272465ea23d010507d3746adc126d719005981e05Hal Finkel // pair fusion procedures. 763a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel for (DenseMap<Value *, Value *>::iterator I = ChosenPairs.begin(), 764a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel IE = ChosenPairs.end(); I != IE; ++I) { 765a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel if (FixedOrderPairs.count(*I)) 766a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel AllFixedOrderPairs.insert(*I); 767a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel else if (FixedOrderPairs.count(ValuePair(I->second, I->first))) 768a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel AllFixedOrderPairs.insert(ValuePair(I->second, I->first)); 76972465ea23d010507d3746adc126d719005981e05Hal Finkel 77072465ea23d010507d3746adc126d719005981e05Hal Finkel for (DenseMap<Value *, Value *>::iterator J = ChosenPairs.begin(); 77172465ea23d010507d3746adc126d719005981e05Hal Finkel J != IE; ++J) { 77272465ea23d010507d3746adc126d719005981e05Hal Finkel DenseMap<VPPair, unsigned>::iterator K = 77372465ea23d010507d3746adc126d719005981e05Hal Finkel PairConnectionTypes.find(VPPair(*I, *J)); 77472465ea23d010507d3746adc126d719005981e05Hal Finkel if (K != PairConnectionTypes.end()) { 77572465ea23d010507d3746adc126d719005981e05Hal Finkel AllPairConnectionTypes.insert(*K); 77672465ea23d010507d3746adc126d719005981e05Hal Finkel } else { 77772465ea23d010507d3746adc126d719005981e05Hal Finkel K = PairConnectionTypes.find(VPPair(*J, *I)); 77872465ea23d010507d3746adc126d719005981e05Hal Finkel if (K != PairConnectionTypes.end()) 77972465ea23d010507d3746adc126d719005981e05Hal Finkel AllPairConnectionTypes.insert(*K); 78072465ea23d010507d3746adc126d719005981e05Hal Finkel } 78172465ea23d010507d3746adc126d719005981e05Hal Finkel } 78272465ea23d010507d3746adc126d719005981e05Hal Finkel } 78372465ea23d010507d3746adc126d719005981e05Hal Finkel 78472465ea23d010507d3746adc126d719005981e05Hal Finkel for (std::multimap<ValuePair, ValuePair>::iterator 78572465ea23d010507d3746adc126d719005981e05Hal Finkel I = ConnectedPairs.begin(), IE = ConnectedPairs.end(); 78672465ea23d010507d3746adc126d719005981e05Hal Finkel I != IE; ++I) { 78772465ea23d010507d3746adc126d719005981e05Hal Finkel if (AllPairConnectionTypes.count(*I)) { 78872465ea23d010507d3746adc126d719005981e05Hal Finkel AllConnectedPairs.insert(*I); 78972465ea23d010507d3746adc126d719005981e05Hal Finkel AllConnectedPairDeps.insert(VPPair(I->second, I->first)); 79072465ea23d010507d3746adc126d719005981e05Hal Finkel } 791a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel } 7925d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel } while (ShouldContinue); 7935d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 7945d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (AllChosenPairs.empty()) return false; 7955d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel NumFusedOps += AllChosenPairs.size(); 7963706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 797de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // A set of pairs has now been selected. It is now necessary to replace the 798de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // paired instructions with vector instructions. For this procedure each 79943ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop // operand must be replaced with a vector operand. This vector is formed 800de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // by using build_vector on the old operands. The replaced values are then 801de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // replaced with a vector_extract on the result. Subsequent optimization 802de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // passes should coalesce the build/extract combinations. 8033706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 80472465ea23d010507d3746adc126d719005981e05Hal Finkel fuseChosenPairs(BB, AllPairableInsts, AllChosenPairs, AllFixedOrderPairs, 80572465ea23d010507d3746adc126d719005981e05Hal Finkel AllPairConnectionTypes, 80672465ea23d010507d3746adc126d719005981e05Hal Finkel AllConnectedPairs, AllConnectedPairDeps); 80764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 80864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // It is important to cleanup here so that future iterations of this 80964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // function have less work to do. 8108e0d1c03ca7fd86e6879b4e37d0d7f0e982feef6Benjamin Kramer (void) SimplifyInstructionsInBlock(&BB, TD, AA->getTargetLibraryInfo()); 811de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return true; 812de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 813de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 814de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function returns true if the provided instruction is capable of being 815de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // fused into a vector instruction. This determination is based only on the 816de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // type and other attributes of the instruction. 817de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::isInstVectorizable(Instruction *I, 818de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool &IsSimpleLoadStore) { 819de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IsSimpleLoadStore = false; 820de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 821de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (CallInst *C = dyn_cast<CallInst>(I)) { 822de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!isVectorizableIntrinsic(C)) 823de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 824de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else if (LoadInst *L = dyn_cast<LoadInst>(I)) { 825de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Vectorize simple loads if possbile: 826de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IsSimpleLoadStore = L->isSimple(); 82786312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng if (!IsSimpleLoadStore || !Config.VectorizeMemOps) 828de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 829de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else if (StoreInst *S = dyn_cast<StoreInst>(I)) { 830de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Vectorize simple stores if possbile: 831de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IsSimpleLoadStore = S->isSimple(); 83286312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng if (!IsSimpleLoadStore || !Config.VectorizeMemOps) 833de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 834de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else if (CastInst *C = dyn_cast<CastInst>(I)) { 835de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // We can vectorize casts, but not casts of pointer types, etc. 83686312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng if (!Config.VectorizeCasts) 837de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 838de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 839de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *SrcTy = C->getSrcTy(); 840f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel if (!SrcTy->isSingleValueType()) 841de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 842de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 843de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *DestTy = C->getDestTy(); 844f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel if (!DestTy->isSingleValueType()) 845de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 846fc3665c87519850f629c9565535e3be447e10addHal Finkel } else if (isa<SelectInst>(I)) { 847fc3665c87519850f629c9565535e3be447e10addHal Finkel if (!Config.VectorizeSelect) 848fc3665c87519850f629c9565535e3be447e10addHal Finkel return false; 849e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel } else if (isa<CmpInst>(I)) { 850e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel if (!Config.VectorizeCmp) 851e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel return false; 852f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(I)) { 853f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel if (!Config.VectorizeGEP) 854f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel return false; 855f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel 856f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel // Currently, vector GEPs exist only with one index. 857f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel if (G->getNumIndices() != 1) 858f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel return false; 859de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else if (!(I->isBinaryOp() || isa<ShuffleVectorInst>(I) || 860de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel isa<ExtractElementInst>(I) || isa<InsertElementInst>(I))) { 861de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 862de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 863de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 864de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // We can't vectorize memory operations without target data 865de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (TD == 0 && IsSimpleLoadStore) 866de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 867de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 868de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *T1, *T2; 86964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getInstructionTypes(I, T1, T2); 870de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 871de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Not every type can be vectorized... 872de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!(VectorType::isValidElementType(T1) || T1->isVectorTy()) || 873de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel !(VectorType::isValidElementType(T2) || T2->isVectorTy())) 874de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 875de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 87665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (T1->getScalarSizeInBits() == 1) { 877768edf3cd037aab10391abc279f71470df8e3156Hal Finkel if (!Config.VectorizeBools) 878768edf3cd037aab10391abc279f71470df8e3156Hal Finkel return false; 879768edf3cd037aab10391abc279f71470df8e3156Hal Finkel } else { 88065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (!Config.VectorizeInts && T1->isIntOrIntVectorTy()) 881768edf3cd037aab10391abc279f71470df8e3156Hal Finkel return false; 882768edf3cd037aab10391abc279f71470df8e3156Hal Finkel } 88365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel 88465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (T2->getScalarSizeInBits() == 1) { 88565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (!Config.VectorizeBools) 88665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel return false; 88765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel } else { 88865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (!Config.VectorizeInts && T2->isIntOrIntVectorTy()) 88965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel return false; 89065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel } 89165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel 89286312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng if (!Config.VectorizeFloats 89386312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy())) 894de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 895de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 896e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel // Don't vectorize target-specific types. 897e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel if (T1->isX86_FP80Ty() || T1->isPPC_FP128Ty() || T1->isX86_MMXTy()) 898e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel return false; 899e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel if (T2->isX86_FP80Ty() || T2->isPPC_FP128Ty() || T2->isX86_MMXTy()) 900e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel return false; 901e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel 90205bc5087a25bbcf59936d71ebfc878b545ef3e5cHal Finkel if ((!Config.VectorizePointers || TD == 0) && 90305bc5087a25bbcf59936d71ebfc878b545ef3e5cHal Finkel (T1->getScalarType()->isPointerTy() || 90405bc5087a25bbcf59936d71ebfc878b545ef3e5cHal Finkel T2->getScalarType()->isPointerTy())) 905f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel return false; 906f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel 90765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (!VTTI && (T1->getPrimitiveSizeInBits() >= Config.VectorBits || 90865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel T2->getPrimitiveSizeInBits() >= Config.VectorBits)) 909de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 910de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 911de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return true; 912de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 913de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 914de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function returns true if the two provided instructions are compatible 915de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // (meaning that they can be fused into a vector instruction). This assumes 916de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // that I has already been determined to be vectorizable and that J is not 917de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // in the use tree of I. 918de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J, 91965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel bool IsSimpleLoadStore, bool NonPow2Len, 920a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel int &CostSavings, int &FixedOrder) { 921de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugInstructionExamination) dbgs() << "BBV: looking at " << *I << 922de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " <-> " << *J << "\n"); 923de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 92465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel CostSavings = 0; 925a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel FixedOrder = 0; 92665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel 927de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Loads and stores can be merged if they have different alignments, 928de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // but are otherwise the same. 92964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!J->isSameOperationAs(I, Instruction::CompareIgnoringAlignment | 93064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel (NonPow2Len ? Instruction::CompareUsingScalarTypes : 0))) 93164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return false; 93264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 93364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *IT1, *IT2, *JT1, *JT2; 93464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getInstructionTypes(I, IT1, IT2); 93564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getInstructionTypes(J, JT1, JT2); 93664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned MaxTypeBits = std::max( 93764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel IT1->getPrimitiveSizeInBits() + JT1->getPrimitiveSizeInBits(), 93864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel IT2->getPrimitiveSizeInBits() + JT2->getPrimitiveSizeInBits()); 93965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (!VTTI && MaxTypeBits > Config.VectorBits) 940de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 941ec4e85e3364f50802f2007e4b1e23661d4610366Hal Finkel 942de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // FIXME: handle addsub-type operations! 943de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 944de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (IsSimpleLoadStore) { 945de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *IPtr, *JPtr; 94665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace; 947de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel int64_t OffsetInElmts = 0; 948de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, 94965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel IAddressSpace, JAddressSpace, 950de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel OffsetInElmts) && abs64(OffsetInElmts) == 1) { 951a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel FixedOrder = (int) OffsetInElmts; 95265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel unsigned BottomAlignment = IAlignment; 95365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (OffsetInElmts < 0) BottomAlignment = JAlignment; 95465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel 95565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel Type *aTypeI = isa<StoreInst>(I) ? 95665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel cast<StoreInst>(I)->getValueOperand()->getType() : I->getType(); 95765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel Type *aTypeJ = isa<StoreInst>(J) ? 95865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel cast<StoreInst>(J)->getValueOperand()->getType() : J->getType(); 95965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel Type *VType = getVecTypeForPair(aTypeI, aTypeJ); 96064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 96165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (Config.AlignedOnly) { 962de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // An aligned load or store is possible only if the instruction 963de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // with the lower offset has an alignment suitable for the 964de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // vector type. 9651230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop 966de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned VecAlignment = TD->getPrefTypeAlignment(VType); 967de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (BottomAlignment < VecAlignment) 968de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 969de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 97065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel 97165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (VTTI) { 97265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel unsigned ICost = VTTI->getMemoryOpCost(I->getOpcode(), I->getType(), 97365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel IAlignment, IAddressSpace); 97465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel unsigned JCost = VTTI->getMemoryOpCost(J->getOpcode(), J->getType(), 97565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel JAlignment, JAddressSpace); 97665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel unsigned VCost = VTTI->getMemoryOpCost(I->getOpcode(), VType, 97765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel BottomAlignment, 97865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel IAddressSpace); 97965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (VCost > ICost + JCost) 98065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel return false; 98182149a9106f221aa6a7271977c236b078e621f21Hal Finkel 982dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel // We don't want to fuse to a type that will be split, even 98382149a9106f221aa6a7271977c236b078e621f21Hal Finkel // if the two input types will also be split and there is no other 984dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel // associated cost. 985dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel unsigned VParts = VTTI->getNumberOfParts(VType); 986dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel if (VParts > 1) 987dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel return false; 988dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel else if (!VParts && VCost == ICost + JCost) 98982149a9106f221aa6a7271977c236b078e621f21Hal Finkel return false; 99082149a9106f221aa6a7271977c236b078e621f21Hal Finkel 99165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel CostSavings = ICost + JCost - VCost; 99265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel } 993de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 994de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 995de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 99665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel } else if (VTTI) { 99746fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel unsigned ICost = getInstrCost(I->getOpcode(), IT1, IT2); 99846fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel unsigned JCost = getInstrCost(J->getOpcode(), JT1, JT2); 99965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel Type *VT1 = getVecTypeForPair(IT1, JT1), 100065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel *VT2 = getVecTypeForPair(IT2, JT2); 100146fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel unsigned VCost = getInstrCost(I->getOpcode(), VT1, VT2); 100265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel 100365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (VCost > ICost + JCost) 100465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel return false; 100582149a9106f221aa6a7271977c236b078e621f21Hal Finkel 1006dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel // We don't want to fuse to a type that will be split, even 100782149a9106f221aa6a7271977c236b078e621f21Hal Finkel // if the two input types will also be split and there is no other 1008dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel // associated cost. 10098b9796f4f83bea2bbefcd2822eb574abdb7f3d1bHal Finkel unsigned VParts1 = VTTI->getNumberOfParts(VT1), 10108b9796f4f83bea2bbefcd2822eb574abdb7f3d1bHal Finkel VParts2 = VTTI->getNumberOfParts(VT2); 10118b9796f4f83bea2bbefcd2822eb574abdb7f3d1bHal Finkel if (VParts1 > 1 || VParts2 > 1) 1012dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel return false; 10138b9796f4f83bea2bbefcd2822eb574abdb7f3d1bHal Finkel else if ((!VParts1 || !VParts2) && VCost == ICost + JCost) 101482149a9106f221aa6a7271977c236b078e621f21Hal Finkel return false; 101582149a9106f221aa6a7271977c236b078e621f21Hal Finkel 101665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel CostSavings = ICost + JCost - VCost; 1017de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1018de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 10196173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel // The powi intrinsic is special because only the first argument is 10206173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel // vectorized, the second arguments must be equal. 10216173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel CallInst *CI = dyn_cast<CallInst>(I); 10226173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel Function *FI; 10236173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel if (CI && (FI = CI->getCalledFunction()) && 10246173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel FI->getIntrinsicID() == Intrinsic::powi) { 10256173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel 10266173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel Value *A1I = CI->getArgOperand(1), 10276173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel *A1J = cast<CallInst>(J)->getArgOperand(1); 10286173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel const SCEV *A1ISCEV = SE->getSCEV(A1I), 10296173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel *A1JSCEV = SE->getSCEV(A1J); 10306173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel return (A1ISCEV == A1JSCEV); 10316173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel } 10326173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel 1033de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return true; 1034de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1035de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1036de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Figure out whether or not J uses I and update the users and write-set 1037de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // structures associated with I. Specifically, Users represents the set of 1038de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // instructions that depend on I. WriteSet represents the set 1039de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // of memory locations that are dependent on I. If UpdateUsers is true, 1040de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // and J uses I, then Users is updated to contain J and WriteSet is updated 1041de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // to contain any memory locations to which J writes. The function returns 1042de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // true if J uses I. By default, alias analysis is used to determine 1043de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // whether J reads from memory that overlaps with a location in WriteSet. 1044de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // If LoadMoveSet is not null, then it is a previously-computed multimap 1045de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // where the key is the memory-based user instruction and the value is 1046de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the instruction to be compared with I. So, if LoadMoveSet is provided, 1047de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // then the alias analysis is not used. This is necessary because this 1048de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // function is called during the process of moving instructions during 1049de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // vectorization and the results of the alias analysis are not stable during 1050de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // that process. 1051de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::trackUsesOfI(DenseSet<Value *> &Users, 1052de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker &WriteSet, Instruction *I, 1053de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J, bool UpdateUsers, 1054de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> *LoadMoveSet) { 1055de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool UsesI = false; 1056de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1057de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This instruction may already be marked as a user due, for example, to 1058de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // being a member of a selected pair. 1059de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (Users.count(J)) 1060de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UsesI = true; 1061de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1062de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!UsesI) 10637e004d177fe76145f75a9417ed2e281f1b9abaf7Hal Finkel for (User::op_iterator JU = J->op_begin(), JE = J->op_end(); 10647e004d177fe76145f75a9417ed2e281f1b9abaf7Hal Finkel JU != JE; ++JU) { 1065de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *V = *JU; 1066de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (I == V || Users.count(V)) { 1067de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UsesI = true; 1068de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1069de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1070de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1071de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!UsesI && J->mayReadFromMemory()) { 1072de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (LoadMoveSet) { 1073de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair JPairRange = LoadMoveSet->equal_range(J); 1074de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UsesI = isSecondInIteratorPair<Value*>(I, JPairRange); 1075de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 1076de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (AliasSetTracker::iterator W = WriteSet.begin(), 1077de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel WE = WriteSet.end(); W != WE; ++W) { 107838a7f22445b8782682d1f8f253454ea0390d4ac5Hal Finkel if (W->aliasesUnknownInst(J, *AA)) { 107938a7f22445b8782682d1f8f253454ea0390d4ac5Hal Finkel UsesI = true; 108038a7f22445b8782682d1f8f253454ea0390d4ac5Hal Finkel break; 1081de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1082de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1083de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1084de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1085de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1086de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (UsesI && UpdateUsers) { 1087de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (J->mayWriteToMemory()) WriteSet.add(J); 1088de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Users.insert(J); 1089de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1090de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1091de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return UsesI; 1092de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1093de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1094de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function iterates over all instruction pairs in the provided 1095de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // basic block and collects all candidate pairs for vectorization. 10965d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel bool BBVectorize::getCandidatePairs(BasicBlock &BB, 10975d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel BasicBlock::iterator &Start, 1098de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 1099a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel DenseSet<ValuePair> &FixedOrderPairs, 110065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel DenseMap<ValuePair, int> &CandidatePairCostSavings, 110164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Value *> &PairableInsts, bool NonPow2Len) { 1102de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BasicBlock::iterator E = BB.end(); 11035d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (Start == E) return false; 11045d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 11055d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel bool ShouldContinue = false, IAfterStart = false; 11065d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel for (BasicBlock::iterator I = Start++; I != E; ++I) { 11075d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (I == Start) IAfterStart = true; 11085d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 1109de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool IsSimpleLoadStore; 1110de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!isInstVectorizable(I, IsSimpleLoadStore)) continue; 1111de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1112de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Look for an instruction with which to pair instruction *I... 1113de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> Users; 1114de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker WriteSet(*AA); 11155d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel bool JAfterStart = IAfterStart; 11165d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel BasicBlock::iterator J = llvm::next(I); 1117bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) { 11185d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (J == Start) JAfterStart = true; 11195d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 1120de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Determine if J uses I, if so, exit the loop. 1121bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !Config.FastDep); 1122bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (Config.FastDep) { 1123de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Note: For this heuristic to be effective, independent operations 1124de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // must tend to be intermixed. This is likely to be true from some 1125de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // kinds of grouped loop unrolling (but not the generic LLVM pass), 1126de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // but otherwise may require some kind of reordering pass. 1127de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1128de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // When using fast dependency analysis, 1129de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // stop searching after first use: 1130de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (UsesI) break; 1131de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 1132de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (UsesI) continue; 1133de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1134de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1135de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // J does not use I, and comes before the first use of I, so it can be 1136de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // merged with I if the instructions are compatible. 1137a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel int CostSavings, FixedOrder; 113865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (!areInstsCompatible(I, J, IsSimpleLoadStore, NonPow2Len, 1139a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel CostSavings, FixedOrder)) continue; 1140de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1141de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // J is a candidate for merging with I. 1142de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!PairableInsts.size() || 1143de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInsts[PairableInsts.size()-1] != I) { 1144de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInsts.push_back(I); 1145de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 11465d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 1147de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CandidatePairs.insert(ValuePair(I, J)); 114865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (VTTI) 114965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel CandidatePairCostSavings.insert(ValuePairWithCost(ValuePair(I, J), 115065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel CostSavings)); 11515d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 1152a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel if (FixedOrder == 1) 1153a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel FixedOrderPairs.insert(ValuePair(I, J)); 1154a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel else if (FixedOrder == -1) 1155a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel FixedOrderPairs.insert(ValuePair(J, I)); 1156a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel 11575d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // The next call to this function must start after the last instruction 11585d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // selected during this invocation. 11595d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (JAfterStart) { 11605d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel Start = llvm::next(J); 11615d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel IAfterStart = JAfterStart = false; 11625d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel } 11635d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 1164de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugCandidateSelection) dbgs() << "BBV: candidate pair " 116565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel << *I << " <-> " << *J << " (cost savings: " << 116665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel CostSavings << ")\n"); 11675d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 11685d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // If we have already found too many pairs, break here and this function 11695d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // will be called again starting after the last instruction selected 11705d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // during this invocation. 1171bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (PairableInsts.size() >= Config.MaxInsts) { 11725d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel ShouldContinue = true; 11735d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel break; 11745d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel } 1175de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 11765d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 11775d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (ShouldContinue) 11785d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel break; 1179de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1180de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1181de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: found " << PairableInsts.size() 1182de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << " instructions with candidate pairs\n"); 11835d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 11845d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel return ShouldContinue; 1185de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1186de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1187de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Finds candidate pairs connected to the pair P = <PI, PJ>. This means that 1188de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // it looks for pairs such that both members have an input which is an 1189de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // output of PI or PJ. 1190de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::computePairsConnectedTo( 1191de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 1192de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 1193de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 119472465ea23d010507d3746adc126d719005981e05Hal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes, 1195de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ValuePair P) { 1196bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel StoreInst *SI, *SJ; 1197bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel 1198de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // For each possible pairing for this variable, look at the uses of 1199de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the first value... 1200de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (Value::use_iterator I = P.first->use_begin(), 1201de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = P.first->use_end(); I != E; ++I) { 1202bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel if (isa<LoadInst>(*I)) { 1203bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel // A pair cannot be connected to a load because the load only takes one 1204bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel // operand (the address) and it is a scalar even after vectorization. 1205bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel continue; 1206bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel } else if ((SI = dyn_cast<StoreInst>(*I)) && 1207bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel P.first == SI->getPointerOperand()) { 1208bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel // Similarly, a pair cannot be connected to a store through its 1209bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel // pointer operand. 1210bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel continue; 1211bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel } 1212bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel 1213de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair IPairRange = CandidatePairs.equal_range(*I); 1214de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1215de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // For each use of the first variable, look for uses of the second 1216de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // variable... 1217de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (Value::use_iterator J = P.second->use_begin(), 1218de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E2 = P.second->use_end(); J != E2; ++J) { 1219bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel if ((SJ = dyn_cast<StoreInst>(*J)) && 1220bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel P.second == SJ->getPointerOperand()) 1221bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel continue; 1222bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel 1223de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair JPairRange = CandidatePairs.equal_range(*J); 1224de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1225de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Look for <I, J>: 122672465ea23d010507d3746adc126d719005981e05Hal Finkel if (isSecondInIteratorPair<Value*>(*J, IPairRange)) { 122772465ea23d010507d3746adc126d719005981e05Hal Finkel VPPair VP(P, ValuePair(*I, *J)); 122872465ea23d010507d3746adc126d719005981e05Hal Finkel ConnectedPairs.insert(VP); 122972465ea23d010507d3746adc126d719005981e05Hal Finkel PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionDirect)); 123072465ea23d010507d3746adc126d719005981e05Hal Finkel } 1231de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1232de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Look for <J, I>: 123372465ea23d010507d3746adc126d719005981e05Hal Finkel if (isSecondInIteratorPair<Value*>(*I, JPairRange)) { 123472465ea23d010507d3746adc126d719005981e05Hal Finkel VPPair VP(P, ValuePair(*J, *I)); 123572465ea23d010507d3746adc126d719005981e05Hal Finkel ConnectedPairs.insert(VP); 123672465ea23d010507d3746adc126d719005981e05Hal Finkel PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSwap)); 123772465ea23d010507d3746adc126d719005981e05Hal Finkel } 1238de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1239de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1240bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (Config.SplatBreaksChain) continue; 1241de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Look for cases where just the first value in the pair is used by 1242de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // both members of another pair (splatting). 1243de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (Value::use_iterator J = P.first->use_begin(); J != E; ++J) { 1244bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel if ((SJ = dyn_cast<StoreInst>(*J)) && 1245bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel P.first == SJ->getPointerOperand()) 1246bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel continue; 1247bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel 124872465ea23d010507d3746adc126d719005981e05Hal Finkel if (isSecondInIteratorPair<Value*>(*J, IPairRange)) { 124972465ea23d010507d3746adc126d719005981e05Hal Finkel VPPair VP(P, ValuePair(*I, *J)); 125072465ea23d010507d3746adc126d719005981e05Hal Finkel ConnectedPairs.insert(VP); 125172465ea23d010507d3746adc126d719005981e05Hal Finkel PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat)); 125272465ea23d010507d3746adc126d719005981e05Hal Finkel } 1253de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1254de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1255de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1256bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (Config.SplatBreaksChain) return; 1257de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Look for cases where just the second value in the pair is used by 1258de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // both members of another pair (splatting). 1259de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (Value::use_iterator I = P.second->use_begin(), 1260de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = P.second->use_end(); I != E; ++I) { 1261bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel if (isa<LoadInst>(*I)) 1262bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel continue; 1263bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel else if ((SI = dyn_cast<StoreInst>(*I)) && 1264bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel P.second == SI->getPointerOperand()) 1265bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel continue; 1266bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel 1267de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair IPairRange = CandidatePairs.equal_range(*I); 1268de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1269de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (Value::use_iterator J = P.second->use_begin(); J != E; ++J) { 1270bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel if ((SJ = dyn_cast<StoreInst>(*J)) && 1271bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel P.second == SJ->getPointerOperand()) 1272bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel continue; 1273bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel 127472465ea23d010507d3746adc126d719005981e05Hal Finkel if (isSecondInIteratorPair<Value*>(*J, IPairRange)) { 127572465ea23d010507d3746adc126d719005981e05Hal Finkel VPPair VP(P, ValuePair(*I, *J)); 127672465ea23d010507d3746adc126d719005981e05Hal Finkel ConnectedPairs.insert(VP); 127772465ea23d010507d3746adc126d719005981e05Hal Finkel PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat)); 127872465ea23d010507d3746adc126d719005981e05Hal Finkel } 1279de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1280de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1281de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1282de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1283de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function figures out which pairs are connected. Two pairs are 1284de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // connected if some output of the first pair forms an input to both members 1285de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // of the second pair. 1286de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::computeConnectedPairs( 1287de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 1288de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 128972465ea23d010507d3746adc126d719005981e05Hal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 129072465ea23d010507d3746adc126d719005981e05Hal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes) { 1291de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1292de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::vector<Value *>::iterator PI = PairableInsts.begin(), 1293de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PE = PairableInsts.end(); PI != PE; ++PI) { 1294de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair choiceRange = CandidatePairs.equal_range(*PI); 1295de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1296de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<Value *, Value *>::iterator P = choiceRange.first; 1297de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel P != choiceRange.second; ++P) 1298de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel computePairsConnectedTo(CandidatePairs, PairableInsts, 129972465ea23d010507d3746adc126d719005981e05Hal Finkel ConnectedPairs, PairConnectionTypes, *P); 1300de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1301de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1302de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: found " << ConnectedPairs.size() 1303de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << " pair connections.\n"); 1304de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1305de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1306de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function builds a set of use tuples such that <A, B> is in the set 1307de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // if B is in the use tree of A. If B is in the use tree of A, then B 1308de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // depends on the output of A. 1309de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::buildDepMap( 1310de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BasicBlock &BB, 1311de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 1312de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 1313de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers) { 1314de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> IsInPair; 1315de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<Value *, Value *>::iterator C = CandidatePairs.begin(), 1316de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = CandidatePairs.end(); C != E; ++C) { 1317de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IsInPair.insert(C->first); 1318de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IsInPair.insert(C->second); 1319de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1320de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1321de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Iterate through the basic block, recording all Users of each 1322de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pairable instruction. 1323de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1324de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BasicBlock::iterator E = BB.end(); 1325de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (BasicBlock::iterator I = BB.getFirstInsertionPt(); I != E; ++I) { 1326de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (IsInPair.find(I) == IsInPair.end()) continue; 1327de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1328de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> Users; 1329de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker WriteSet(*AA); 1330de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (BasicBlock::iterator J = llvm::next(I); J != E; ++J) 1331de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel (void) trackUsesOfI(Users, WriteSet, I, J); 1332de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1333de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseSet<Value *>::iterator U = Users.begin(), E = Users.end(); 1334de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel U != E; ++U) 1335de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.insert(ValuePair(I, *U)); 1336de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1337de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1338de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1339de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns true if an input to pair P is an output of pair Q and also an 1340de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // input of pair Q is an output of pair P. If this is the case, then these 1341de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // two pairs cannot be simultaneously fused. 1342de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::pairsConflict(ValuePair P, ValuePair Q, 1343de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 1344de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> *PairableInstUserMap) { 1345de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Two pairs are in conflict if they are mutual Users of eachother. 1346de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool QUsesP = PairableInstUsers.count(ValuePair(P.first, Q.first)) || 1347de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(P.first, Q.second)) || 1348de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(P.second, Q.first)) || 1349de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(P.second, Q.second)); 1350de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool PUsesQ = PairableInstUsers.count(ValuePair(Q.first, P.first)) || 1351de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(Q.first, P.second)) || 1352de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(Q.second, P.first)) || 1353de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(Q.second, P.second)); 1354de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (PairableInstUserMap) { 1355de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // FIXME: The expensive part of the cycle check is not so much the cycle 1356de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // check itself but this edge insertion procedure. This needs some 1357de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // profiling and probably a different data structure (same is true of 1358de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // most uses of std::multimap). 1359de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (PUsesQ) { 1360de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPPIteratorPair QPairRange = PairableInstUserMap->equal_range(Q); 1361de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!isSecondInIteratorPair(P, QPairRange)) 1362de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUserMap->insert(VPPair(Q, P)); 1363de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1364de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (QUsesP) { 1365de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPPIteratorPair PPairRange = PairableInstUserMap->equal_range(P); 1366de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!isSecondInIteratorPair(Q, PPairRange)) 1367de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUserMap->insert(VPPair(P, Q)); 1368de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1369de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1370de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1371de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return (QUsesP && PUsesQ); 1372de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1373de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1374de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function walks the use graph of current pairs to see if, starting 1375de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // from P, the walk returns to P. 1376de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::pairWillFormCycle(ValuePair P, 1377de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &PairableInstUserMap, 1378de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &CurrentPairs) { 1379de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugCycleCheck) 1380de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel dbgs() << "BBV: starting cycle check for : " << *P.first << " <-> " 1381de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << *P.second << "\n"); 1382de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // A lookup table of visisted pairs is kept because the PairableInstUserMap 1383de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // contains non-direct associations. 1384de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> Visited; 138535564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel SmallVector<ValuePair, 32> Q; 1386de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // General depth-first post-order traversal: 1387de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.push_back(P); 138835564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel do { 138935564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel ValuePair QTop = Q.pop_back_val(); 1390de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Visited.insert(QTop); 1391de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1392de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugCycleCheck) 1393de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel dbgs() << "BBV: cycle check visiting: " << *QTop.first << " <-> " 1394de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << *QTop.second << "\n"); 1395de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPPIteratorPair QPairRange = PairableInstUserMap.equal_range(QTop); 1396de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<ValuePair, ValuePair>::iterator C = QPairRange.first; 1397de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C != QPairRange.second; ++C) { 1398de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C->second == P) { 1399de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() 1400de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << "BBV: rejected to prevent non-trivial cycle formation: " 1401de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << *C->first.first << " <-> " << *C->first.second << "\n"); 1402de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return true; 1403de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1404de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 14050b2500c504156c45cd71817a9ef6749b6cde5703David Blaikie if (CurrentPairs.count(C->second) && !Visited.count(C->second)) 1406de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.push_back(C->second); 1407de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 140835564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel } while (!Q.empty()); 1409de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1410de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 1411de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1412de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1413de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function builds the initial tree of connected pairs with the 1414de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pair J at the root. 1415de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::buildInitialTreeFor( 1416de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 1417de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 1418de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 1419de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 1420de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 1421de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<ValuePair, size_t> &Tree, ValuePair J) { 1422de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Each of these pairs is viewed as the root node of a Tree. The Tree 1423de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // is then walked (depth-first). As this happens, we keep track of 1424de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the pairs that compose the Tree and the maximum depth of the Tree. 142535564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel SmallVector<ValuePairWithDepth, 32> Q; 1426de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // General depth-first post-order traversal: 1427de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first))); 142835564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel do { 1429de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ValuePairWithDepth QTop = Q.back(); 1430de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1431de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Push each child onto the queue: 1432de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool MoreChildren = false; 1433de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t MaxChildDepth = QTop.second; 1434de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPPIteratorPair qtRange = ConnectedPairs.equal_range(QTop.first); 1435478eed85f96f0d93da43e26cfb7fc6dee981c9aaNAKAMURA Takumi for (std::multimap<ValuePair, ValuePair>::iterator k = qtRange.first; 1436de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel k != qtRange.second; ++k) { 1437de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Make sure that this child pair is still a candidate: 1438de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool IsStillCand = false; 1439de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair checkRange = 1440de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CandidatePairs.equal_range(k->second.first); 1441de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<Value *, Value *>::iterator m = checkRange.first; 1442de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel m != checkRange.second; ++m) { 1443de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (m->second == k->second.second) { 1444de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IsStillCand = true; 1445de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1446de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1447de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1448de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1449de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (IsStillCand) { 1450de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<ValuePair, size_t>::iterator C = Tree.find(k->second); 1451de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C == Tree.end()) { 1452de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t d = getDepthFactor(k->second.first); 1453de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.push_back(ValuePairWithDepth(k->second, QTop.second+d)); 1454de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel MoreChildren = true; 1455de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 1456de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel MaxChildDepth = std::max(MaxChildDepth, C->second); 1457de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1458de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1459de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1460de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1461de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!MoreChildren) { 1462de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Record the current pair as part of the Tree: 1463de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Tree.insert(ValuePairWithDepth(QTop.first, MaxChildDepth)); 1464de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.pop_back(); 1465de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 146635564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel } while (!Q.empty()); 1467de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1468de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1469de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Given some initial tree, prune it by removing conflicting pairs (pairs 1470de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // that cannot be simultaneously chosen for vectorization). 1471de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::pruneTreeFor( 1472de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 1473de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 1474de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 1475de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 1476de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &PairableInstUserMap, 1477de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 1478de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<ValuePair, size_t> &Tree, 1479de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PrunedTree, ValuePair J, 1480de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool UseCycleCheck) { 148135564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel SmallVector<ValuePairWithDepth, 32> Q; 1482de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // General depth-first post-order traversal: 1483de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first))); 148435564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel do { 148535564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel ValuePairWithDepth QTop = Q.pop_back_val(); 1486de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PrunedTree.insert(QTop.first); 1487de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1488de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Visit each child, pruning as necessary... 148943ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop DenseMap<ValuePair, size_t> BestChildren; 1490de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPPIteratorPair QTopRange = ConnectedPairs.equal_range(QTop.first); 1491478eed85f96f0d93da43e26cfb7fc6dee981c9aaNAKAMURA Takumi for (std::multimap<ValuePair, ValuePair>::iterator K = QTopRange.first; 1492de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K != QTopRange.second; ++K) { 1493de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<ValuePair, size_t>::iterator C = Tree.find(K->second); 1494de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C == Tree.end()) continue; 1495de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1496de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This child is in the Tree, now we need to make sure it is the 1497de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // best of any conflicting children. There could be multiple 1498de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // conflicting children, so first, determine if we're keeping 1499de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // this child, then delete conflicting children as necessary. 1500de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1501de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // It is also necessary to guard against pairing-induced 1502de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // dependencies. Consider instructions a .. x .. y .. b 1503de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // such that (a,b) are to be fused and (x,y) are to be fused 1504de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // but a is an input to x and b is an output from y. This 1505de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // means that y cannot be moved after b but x must be moved 1506de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // after b for (a,b) to be fused. In other words, after 1507de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // fusing (a,b) we have y .. a/b .. x where y is an input 1508de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // to a/b and x is an output to a/b: x and y can no longer 1509de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // be legally fused. To prevent this condition, we must 1510de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // make sure that a child pair added to the Tree is not 1511de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // both an input and output of an already-selected pair. 1512de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1513de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Pairing-induced dependencies can also form from more complicated 1514de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // cycles. The pair vs. pair conflicts are easy to check, and so 1515de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // that is done explicitly for "fast rejection", and because for 1516de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // child vs. child conflicts, we may prefer to keep the current 1517de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pair in preference to the already-selected child. 1518de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> CurrentPairs; 1519de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1520de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool CanAdd = true; 1521de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseMap<ValuePair, size_t>::iterator C2 152243ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop = BestChildren.begin(), E2 = BestChildren.end(); 1523de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2 != E2; ++C2) { 1524de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C2->first.first == C->first.first || 1525de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.first == C->first.second || 1526de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.first || 1527de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.second || 1528de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairsConflict(C2->first, C->first, PairableInstUsers, 1529de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UseCycleCheck ? &PairableInstUserMap : 0)) { 1530de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C2->second >= C->second) { 1531de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CanAdd = false; 1532de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1533de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1534de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1535de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CurrentPairs.insert(C2->first); 1536de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1537de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1538de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!CanAdd) continue; 1539de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1540de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Even worse, this child could conflict with another node already 1541de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // selected for the Tree. If that is the case, ignore this child. 1542de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseSet<ValuePair>::iterator T = PrunedTree.begin(), 1543de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E2 = PrunedTree.end(); T != E2; ++T) { 1544de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (T->first == C->first.first || 1545de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel T->first == C->first.second || 1546de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel T->second == C->first.first || 1547de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel T->second == C->first.second || 1548de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairsConflict(*T, C->first, PairableInstUsers, 1549de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UseCycleCheck ? &PairableInstUserMap : 0)) { 1550de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CanAdd = false; 1551de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1552de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1553de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1554de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CurrentPairs.insert(*T); 1555de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1556de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!CanAdd) continue; 1557de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1558de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // And check the queue too... 155935564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel for (SmallVector<ValuePairWithDepth, 32>::iterator C2 = Q.begin(), 1560de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E2 = Q.end(); C2 != E2; ++C2) { 1561de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C2->first.first == C->first.first || 1562de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.first == C->first.second || 1563de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.first || 1564de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.second || 1565de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairsConflict(C2->first, C->first, PairableInstUsers, 1566de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UseCycleCheck ? &PairableInstUserMap : 0)) { 1567de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CanAdd = false; 1568de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1569de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1570de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1571de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CurrentPairs.insert(C2->first); 1572de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1573de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!CanAdd) continue; 1574de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1575de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Last but not least, check for a conflict with any of the 1576de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // already-chosen pairs. 1577de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseMap<Value *, Value *>::iterator C2 = 1578de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairs.begin(), E2 = ChosenPairs.end(); 1579de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2 != E2; ++C2) { 1580de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (pairsConflict(*C2, C->first, PairableInstUsers, 1581de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UseCycleCheck ? &PairableInstUserMap : 0)) { 1582de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CanAdd = false; 1583de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1584de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1585de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1586de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CurrentPairs.insert(*C2); 1587de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1588de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!CanAdd) continue; 1589de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 15901230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop // To check for non-trivial cycles formed by the addition of the 15911230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop // current pair we've formed a list of all relevant pairs, now use a 15921230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop // graph walk to check for a cycle. We start from the current pair and 15931230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop // walk the use tree to see if we again reach the current pair. If we 15941230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop // do, then the current pair is rejected. 1595de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1596de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // FIXME: It may be more efficient to use a topological-ordering 1597de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // algorithm to improve the cycle check. This should be investigated. 1598de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (UseCycleCheck && 1599de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairWillFormCycle(C->first, PairableInstUserMap, CurrentPairs)) 1600de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 1601de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1602de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This child can be added, but we may have chosen it in preference 1603de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // to an already-selected child. Check for this here, and if a 1604de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // conflict is found, then remove the previously-selected child 1605de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // before adding this one in its place. 1606de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseMap<ValuePair, size_t>::iterator C2 160743ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop = BestChildren.begin(); C2 != BestChildren.end();) { 1608de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C2->first.first == C->first.first || 1609de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.first == C->first.second || 1610de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.first || 1611de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.second || 1612de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairsConflict(C2->first, C->first, PairableInstUsers)) 161343ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop BestChildren.erase(C2++); 1614de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel else 1615de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++C2; 1616de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1617de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 161843ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop BestChildren.insert(ValuePairWithDepth(C->first, C->second)); 1619de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1620de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1621de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseMap<ValuePair, size_t>::iterator C 162243ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop = BestChildren.begin(), E2 = BestChildren.end(); 1623de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C != E2; ++C) { 1624de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t DepthF = getDepthFactor(C->first.first); 1625de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.push_back(ValuePairWithDepth(C->first, QTop.second+DepthF)); 1626de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 162735564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel } while (!Q.empty()); 1628de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1629de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1630de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function finds the best tree of mututally-compatible connected 1631de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pairs, given the choice of root pairs as an iterator range. 1632de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::findBestTreeFor( 1633de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 163465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel DenseMap<ValuePair, int> &CandidatePairCostSavings, 1635de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 163686ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel DenseSet<ValuePair> &FixedOrderPairs, 163786ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes, 1638de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 163986ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairDeps, 1640de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 1641de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &PairableInstUserMap, 1642de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 1643de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth, 164465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel int &BestEffSize, VPIteratorPair ChoiceRange, 1645de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool UseCycleCheck) { 1646de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<Value *, Value *>::iterator J = ChoiceRange.first; 1647de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel J != ChoiceRange.second; ++J) { 1648de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1649de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Before going any further, make sure that this pair does not 1650de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // conflict with any already-selected pairs (see comment below 1651de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // near the Tree pruning for more details). 1652de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> ChosenPairSet; 1653de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool DoesConflict = false; 1654de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseMap<Value *, Value *>::iterator C = ChosenPairs.begin(), 1655de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = ChosenPairs.end(); C != E; ++C) { 1656de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (pairsConflict(*C, *J, PairableInstUsers, 1657de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UseCycleCheck ? &PairableInstUserMap : 0)) { 1658de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DoesConflict = true; 1659de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1660de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1661de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1662de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairSet.insert(*C); 1663de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1664de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (DoesConflict) continue; 1665de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1666de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (UseCycleCheck && 1667de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairWillFormCycle(*J, PairableInstUserMap, ChosenPairSet)) 1668de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 1669de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1670de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<ValuePair, size_t> Tree; 1671de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel buildInitialTreeFor(CandidatePairs, PairableInsts, ConnectedPairs, 1672de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers, ChosenPairs, Tree, *J); 1673de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1674de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Because we'll keep the child with the largest depth, the largest 1675de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // depth is still the same in the unpruned Tree. 1676de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t MaxDepth = Tree.lookup(*J); 1677de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1678de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugPairSelection) dbgs() << "BBV: found Tree for pair {" 1679de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << *J->first << " <-> " << *J->second << "} of depth " << 1680de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel MaxDepth << " and size " << Tree.size() << "\n"); 1681de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1682de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // At this point the Tree has been constructed, but, may contain 1683de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // contradictory children (meaning that different children of 1684de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // some tree node may be attempting to fuse the same instruction). 1685de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // So now we walk the tree again, in the case of a conflict, 1686de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // keep only the child with the largest depth. To break a tie, 1687de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // favor the first child. 1688de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1689de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> PrunedTree; 1690de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pruneTreeFor(CandidatePairs, PairableInsts, ConnectedPairs, 1691de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers, PairableInstUserMap, ChosenPairs, Tree, 1692de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PrunedTree, *J, UseCycleCheck); 1693de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 169465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel int EffSize = 0; 169565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (VTTI) { 169678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel DenseSet<Value *> PrunedTreeInstrs; 169778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(), 169878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel E = PrunedTree.end(); S != E; ++S) { 169978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel PrunedTreeInstrs.insert(S->first); 170078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel PrunedTreeInstrs.insert(S->second); 170178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 170278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 170378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // The set of pairs that have already contributed to the total cost. 170478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel DenseSet<ValuePair> IncomingPairs; 170578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 17064387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel // If the cost model were perfect, this might not be necessary; but we 17074387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel // need to make sure that we don't get stuck vectorizing our own 17084387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel // shuffle chains. 17094387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel bool HasNontrivialInsts = false; 17104387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel 171186ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel // The node weights represent the cost savings associated with 171286ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel // fusing the pair of instructions. 171365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(), 171465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel E = PrunedTree.end(); S != E; ++S) { 17154387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel if (!isa<ShuffleVectorInst>(S->first) && 17164387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel !isa<InsertElementInst>(S->first) && 17174387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel !isa<ExtractElementInst>(S->first)) 17184387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel HasNontrivialInsts = true; 17194387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel 172078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel bool FlipOrder = false; 172178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 172278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (getDepthFactor(S->first)) { 172378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel int ESContrib = CandidatePairCostSavings.find(*S)->second; 172478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel DEBUG(if (DebugPairSelection) dbgs() << "\tweight {" 172578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel << *S->first << " <-> " << *S->second << "} = " << 172678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib << "\n"); 172778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel EffSize += ESContrib; 172878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 172986ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel 173078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // The edge weights contribute in a negative sense: they represent 173178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // the cost of shuffles. 173286ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel VPPIteratorPair IP = ConnectedPairDeps.equal_range(*S); 173386ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel if (IP.first != ConnectedPairDeps.end()) { 173486ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel unsigned NumDepsDirect = 0, NumDepsSwap = 0; 173586ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first; 173686ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel Q != IP.second; ++Q) { 173778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (!PrunedTree.count(Q->second)) 173878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel continue; 173986ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel DenseMap<VPPair, unsigned>::iterator R = 174086ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel PairConnectionTypes.find(VPPair(Q->second, Q->first)); 174186ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel assert(R != PairConnectionTypes.end() && 174286ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel "Cannot find pair connection type"); 174386ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel if (R->second == PairConnectionDirect) 174486ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel ++NumDepsDirect; 174586ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel else if (R->second == PairConnectionSwap) 174686ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel ++NumDepsSwap; 174786ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel } 174886ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel 174986ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel // If there are more swaps than direct connections, then 175086ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel // the pair order will be flipped during fusion. So the real 175186ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel // number of swaps is the minimum number. 175278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel FlipOrder = !FixedOrderPairs.count(*S) && 175386ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel ((NumDepsSwap > NumDepsDirect) || 175486ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel FixedOrderPairs.count(ValuePair(S->second, S->first))); 175586ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel 175686ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first; 175786ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel Q != IP.second; ++Q) { 175878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (!PrunedTree.count(Q->second)) 175978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel continue; 176086ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel DenseMap<VPPair, unsigned>::iterator R = 176186ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel PairConnectionTypes.find(VPPair(Q->second, Q->first)); 176286ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel assert(R != PairConnectionTypes.end() && 176386ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel "Cannot find pair connection type"); 176486ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel Type *Ty1 = Q->second.first->getType(), 176586ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel *Ty2 = Q->second.second->getType(); 176686ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel Type *VTy = getVecTypeForPair(Ty1, Ty2); 176786ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel if ((R->second == PairConnectionDirect && FlipOrder) || 176886ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel (R->second == PairConnectionSwap && !FlipOrder) || 176978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel R->second == PairConnectionSplat) { 177078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel int ESContrib = (int) getInstrCost(Instruction::ShuffleVector, 177178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel VTy, VTy); 177278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" << 177378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel *Q->second.first << " <-> " << *Q->second.second << 177478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel "} -> {" << 177578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel *S->first << " <-> " << *S->second << "} = " << 177678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib << "\n"); 177778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel EffSize -= ESContrib; 177878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 177978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 178078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 178178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 178278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // Compute the cost of outgoing edges. We assume that edges outgoing 178378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // to shuffles, inserts or extracts can be merged, and so contribute 178478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // no additional cost. 178578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (!S->first->getType()->isVoidTy()) { 178678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Type *Ty1 = S->first->getType(), 178778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel *Ty2 = S->second->getType(); 178878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Type *VTy = getVecTypeForPair(Ty1, Ty2); 178978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 179078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel bool NeedsExtraction = false; 179178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel for (Value::use_iterator I = S->first->use_begin(), 179278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel IE = S->first->use_end(); I != IE; ++I) { 179386c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(*I)) { 179486c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel // Shuffle can be folded if it has no other input 179586c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (isa<UndefValue>(SI->getOperand(1))) 179686c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel continue; 179786c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel } 179886c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (isa<ExtractElementInst>(*I)) 179978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel continue; 180078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (PrunedTreeInstrs.count(*I)) 180178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel continue; 180278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel NeedsExtraction = true; 180378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel break; 180478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 180578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 180678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (NeedsExtraction) { 180778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel int ESContrib; 180878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (Ty1->isVectorTy()) 180978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib = (int) getInstrCost(Instruction::ShuffleVector, 181078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Ty1, VTy); 181178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel else 181278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib = (int) VTTI->getVectorInstrCost( 181378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Instruction::ExtractElement, VTy, 0); 181478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 181578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" << 181678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel *S->first << "} = " << ESContrib << "\n"); 181778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel EffSize -= ESContrib; 181878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 181978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 182078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel NeedsExtraction = false; 182178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel for (Value::use_iterator I = S->second->use_begin(), 182278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel IE = S->second->use_end(); I != IE; ++I) { 182386c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(*I)) { 182486c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel // Shuffle can be folded if it has no other input 182586c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (isa<UndefValue>(SI->getOperand(1))) 182686c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel continue; 182786c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel } 182886c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (isa<ExtractElementInst>(*I)) 182978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel continue; 183078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (PrunedTreeInstrs.count(*I)) 183178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel continue; 183278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel NeedsExtraction = true; 183378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel break; 183478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 183578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 183678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (NeedsExtraction) { 183778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel int ESContrib; 183878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (Ty2->isVectorTy()) 183978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib = (int) getInstrCost(Instruction::ShuffleVector, 184078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Ty2, VTy); 184178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel else 184278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib = (int) VTTI->getVectorInstrCost( 184378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Instruction::ExtractElement, VTy, 1); 184478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" << 184578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel *S->second << "} = " << ESContrib << "\n"); 184678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel EffSize -= ESContrib; 184778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 184878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 184978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 185078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // Compute the cost of incoming edges. 185178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (!isa<LoadInst>(S->first) && !isa<StoreInst>(S->first)) { 185278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Instruction *S1 = cast<Instruction>(S->first), 185378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel *S2 = cast<Instruction>(S->second); 185478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel for (unsigned o = 0; o < S1->getNumOperands(); ++o) { 185578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Value *O1 = S1->getOperand(o), *O2 = S2->getOperand(o); 185678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 185778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // Combining constants into vector constants (or small vector 185878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // constants into larger ones are assumed free). 185978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (isa<Constant>(O1) && isa<Constant>(O2)) 186078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel continue; 186178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 186278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (FlipOrder) 186378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel std::swap(O1, O2); 186478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 186578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ValuePair VP = ValuePair(O1, O2); 186678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ValuePair VPR = ValuePair(O2, O1); 186778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 186878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // Internal edges are not handled here. 186978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (PrunedTree.count(VP) || PrunedTree.count(VPR)) 187078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel continue; 187178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 187278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Type *Ty1 = O1->getType(), 187378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel *Ty2 = O2->getType(); 187478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Type *VTy = getVecTypeForPair(Ty1, Ty2); 187578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 187678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // Combining vector operations of the same type is also assumed 187778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // folded with other operations. 187886c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (Ty1 == Ty2) { 187986c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel // If both are insert elements, then both can be widened. 1880b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel InsertElementInst *IEO1 = dyn_cast<InsertElementInst>(O1), 1881b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel *IEO2 = dyn_cast<InsertElementInst>(O2); 1882b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel if (IEO1 && IEO2 && isPureIEChain(IEO1) && isPureIEChain(IEO2)) 188386c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel continue; 188486c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel // If both are extract elements, and both have the same input 188586c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel // type, then they can be replaced with a shuffle 188686c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel ExtractElementInst *EIO1 = dyn_cast<ExtractElementInst>(O1), 188786c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel *EIO2 = dyn_cast<ExtractElementInst>(O2); 188886c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (EIO1 && EIO2 && 188986c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel EIO1->getOperand(0)->getType() == 189086c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel EIO2->getOperand(0)->getType()) 189186c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel continue; 189286c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel // If both are a shuffle with equal operand types and only two 189386c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel // unqiue operands, then they can be replaced with a single 189486c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel // shuffle 189586c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel ShuffleVectorInst *SIO1 = dyn_cast<ShuffleVectorInst>(O1), 189686c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel *SIO2 = dyn_cast<ShuffleVectorInst>(O2); 189786c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (SIO1 && SIO2 && 189886c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel SIO1->getOperand(0)->getType() == 189986c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel SIO2->getOperand(0)->getType()) { 190086c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel SmallSet<Value *, 4> SIOps; 190186c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel SIOps.insert(SIO1->getOperand(0)); 190286c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel SIOps.insert(SIO1->getOperand(1)); 190386c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel SIOps.insert(SIO2->getOperand(0)); 190486c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel SIOps.insert(SIO2->getOperand(1)); 190586c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (SIOps.size() <= 2) 190686c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel continue; 190786c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel } 190886c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel } 190978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 191078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel int ESContrib; 191178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // This pair has already been formed. 191278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (IncomingPairs.count(VP)) { 191378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel continue; 191478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } else if (IncomingPairs.count(VPR)) { 191578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib = (int) getInstrCost(Instruction::ShuffleVector, 191678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel VTy, VTy); 191778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } else if (!Ty1->isVectorTy() && !Ty2->isVectorTy()) { 191878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib = (int) VTTI->getVectorInstrCost( 191978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Instruction::InsertElement, VTy, 0); 192078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib += (int) VTTI->getVectorInstrCost( 192178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Instruction::InsertElement, VTy, 1); 192278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } else if (!Ty1->isVectorTy()) { 192378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // O1 needs to be inserted into a vector of size O2, and then 192478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // both need to be shuffled together. 192578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib = (int) VTTI->getVectorInstrCost( 192678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Instruction::InsertElement, Ty2, 0); 192778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib += (int) getInstrCost(Instruction::ShuffleVector, 192878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel VTy, Ty2); 192978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } else if (!Ty2->isVectorTy()) { 193078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // O2 needs to be inserted into a vector of size O1, and then 193178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // both need to be shuffled together. 193278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib = (int) VTTI->getVectorInstrCost( 193378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Instruction::InsertElement, Ty1, 0); 193478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib += (int) getInstrCost(Instruction::ShuffleVector, 193578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel VTy, Ty1); 193678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } else { 193778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Type *TyBig = Ty1, *TySmall = Ty2; 193878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (Ty2->getVectorNumElements() > Ty1->getVectorNumElements()) 193978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel std::swap(TyBig, TySmall); 194078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 194178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib = (int) getInstrCost(Instruction::ShuffleVector, 194278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel VTy, TyBig); 194378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (TyBig != TySmall) 194478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib += (int) getInstrCost(Instruction::ShuffleVector, 194578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel TyBig, TySmall); 194678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 194778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 194878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" 194978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel << *O1 << " <-> " << *O2 << "} = " << 195078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib << "\n"); 195178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel EffSize -= ESContrib; 195278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel IncomingPairs.insert(VP); 195386ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel } 195486ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel } 195565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel } 19564387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel 19574387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel if (!HasNontrivialInsts) { 19584387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel DEBUG(if (DebugPairSelection) dbgs() << 19594387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel "\tNo non-trivial instructions in tree;" 19604387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel " override to zero effective size\n"); 19614387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel EffSize = 0; 19624387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel } 196365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel } else { 196465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(), 196565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel E = PrunedTree.end(); S != E; ++S) 196665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel EffSize += (int) getDepthFactor(S->first); 196765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel } 1968de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1969de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugPairSelection) 1970de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel dbgs() << "BBV: found pruned Tree for pair {" 1971de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << *J->first << " <-> " << *J->second << "} of depth " << 1972de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel MaxDepth << " and size " << PrunedTree.size() << 1973de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " (effective size: " << EffSize << ")\n"); 197478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (((VTTI && !UseChainDepthWithTI) || 197578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel MaxDepth >= Config.ReqChainDepth) && 197665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel EffSize > 0 && EffSize > BestEffSize) { 1977de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BestMaxDepth = MaxDepth; 1978de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BestEffSize = EffSize; 1979de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BestTree = PrunedTree; 1980de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1981de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1982de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1983de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1984de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Given the list of candidate pairs, this function selects those 1985de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // that will be fused into vector instructions. 1986de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::choosePairs( 1987de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 198865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel DenseMap<ValuePair, int> &CandidatePairCostSavings, 1989de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 199086ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel DenseSet<ValuePair> &FixedOrderPairs, 199186ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes, 1992de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 199386ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairDeps, 1994de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 1995de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *>& ChosenPairs) { 1996bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng bool UseCycleCheck = 1997bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng CandidatePairs.size() <= Config.MaxCandPairsForCycleCheck; 1998de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> PairableInstUserMap; 1999de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::vector<Value *>::iterator I = PairableInsts.begin(), 2000de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = PairableInsts.end(); I != E; ++I) { 2001de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // The number of possible pairings for this variable: 2002de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t NumChoices = CandidatePairs.count(*I); 2003de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!NumChoices) continue; 2004de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2005de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair ChoiceRange = CandidatePairs.equal_range(*I); 2006de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2007de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // The best pair to choose and its tree: 200865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel size_t BestMaxDepth = 0; 200965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel int BestEffSize = 0; 2010de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> BestTree; 201165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel findBestTreeFor(CandidatePairs, CandidatePairCostSavings, 201286ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel PairableInsts, FixedOrderPairs, PairConnectionTypes, 201386ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel ConnectedPairs, ConnectedPairDeps, 2014de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers, PairableInstUserMap, ChosenPairs, 2015de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BestTree, BestMaxDepth, BestEffSize, ChoiceRange, 2016de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UseCycleCheck); 2017de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2018de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // A tree has been chosen (or not) at this point. If no tree was 2019de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // chosen, then this instruction, I, cannot be paired (and is no longer 2020de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // considered). 2021de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2022de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (BestTree.size() > 0) 2023de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel dbgs() << "BBV: selected pairs in the best tree for: " 2024de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << *cast<Instruction>(*I) << "\n"); 2025de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2026de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseSet<ValuePair>::iterator S = BestTree.begin(), 2027de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel SE2 = BestTree.end(); S != SE2; ++S) { 2028de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Insert the members of this tree into the list of chosen pairs. 2029de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairs.insert(ValuePair(S->first, S->second)); 2030de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: selected pair: " << *S->first << " <-> " << 2031de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel *S->second << "\n"); 2032de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2033de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Remove all candidate pairs that have values in the chosen tree. 2034de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<Value *, Value *>::iterator K = 2035de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CandidatePairs.begin(); K != CandidatePairs.end();) { 2036de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (K->first == S->first || K->second == S->first || 2037de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K->second == S->second || K->first == S->second) { 2038de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Don't remove the actual pair chosen so that it can be used 2039de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // in subsequent tree selections. 2040de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!(K->first == S->first && K->second == S->second)) 2041de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CandidatePairs.erase(K++); 2042de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel else 2043de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++K; 2044de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 2045de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++K; 2046de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2047de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2048de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2049de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2050de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2051de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: selected " << ChosenPairs.size() << " pairs.\n"); 2052de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2053de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2054de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::string getReplacementName(Instruction *I, bool IsInput, unsigned o, 2055de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned n = 0) { 2056de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!I->hasName()) 2057de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return ""; 2058de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2059de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return (I->getName() + (IsInput ? ".v.i" : ".v.r") + utostr(o) + 2060de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel (n > 0 ? "." + utostr(n) : "")).str(); 2061de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2062de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2063de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns the value that is to be used as the pointer input to the vector 2064de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // instruction that fuses I with J. 2065de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *BBVectorize::getReplacementPointerInput(LLVMContext& Context, 2066202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel Instruction *I, Instruction *J, unsigned o) { 2067de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *IPtr, *JPtr; 206865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace; 2069de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel int64_t OffsetInElmts; 2070282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel 2071202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel // Note: the analysis might fail here, that is why the pair order has 2072282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel // been precomputed (OffsetInElmts must be unused here). 2073de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel (void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, 207465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel IAddressSpace, JAddressSpace, 207593f6f457614299eee3d22f376ab8f42a130f1912Hal Finkel OffsetInElmts, false); 2076de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2077de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // The pointer value is taken to be the one with the lowest offset. 2078202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel Value *VPtr = IPtr; 2079de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 208064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeI = cast<PointerType>(IPtr->getType())->getElementType(); 208164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeJ = cast<PointerType>(JPtr->getType())->getElementType(); 208264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); 2083de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *VArgPtrType = PointerType::get(VArgType, 2084de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cast<PointerType>(IPtr->getType())->getAddressSpace()); 2085de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return new BitCastInst(VPtr, VArgPtrType, getReplacementName(I, true, o), 2086202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel /* insert before */ I); 2087de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2088de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2089de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::fillNewShuffleMask(LLVMContext& Context, Instruction *J, 209064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned MaskOffset, unsigned NumInElem, 209164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned NumInElem1, unsigned IdxOffset, 209264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant*> &Mask) { 209364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned NumElem1 = cast<VectorType>(J->getType())->getNumElements(); 209464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned v = 0; v < NumElem1; ++v) { 2095de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel int m = cast<ShuffleVectorInst>(J)->getMaskValue(v); 2096de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (m < 0) { 2097de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Mask[v+MaskOffset] = UndefValue::get(Type::getInt32Ty(Context)); 2098de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 2099de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned mm = m + (int) IdxOffset; 210064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (m >= (int) NumInElem1) 2101de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel mm += (int) NumInElem; 2102de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2103de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Mask[v+MaskOffset] = 2104de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ConstantInt::get(Type::getInt32Ty(Context), mm); 2105de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2106de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2107de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2108de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2109de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns the value that is to be used as the vector-shuffle mask to the 2110de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // vector instruction that fuses I with J. 2111de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *BBVectorize::getReplacementShuffleMask(LLVMContext& Context, 2112de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J) { 2113de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This is the shuffle mask. We need to append the second 2114de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // mask to the first, and the numbers need to be adjusted. 2115de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 211664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeI = I->getType(); 211764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeJ = J->getType(); 211864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); 211964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 212064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned NumElemI = cast<VectorType>(ArgTypeI)->getNumElements(); 2121de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2122de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Get the total number of elements in the fused vector type. 2123de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // By definition, this must equal the number of elements in 2124de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the final mask. 2125de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned NumElem = cast<VectorType>(VArgType)->getNumElements(); 2126de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Constant*> Mask(NumElem); 2127de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 212864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *OpTypeI = I->getOperand(0)->getType(); 212964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned NumInElemI = cast<VectorType>(OpTypeI)->getNumElements(); 213064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *OpTypeJ = J->getOperand(0)->getType(); 213164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned NumInElemJ = cast<VectorType>(OpTypeJ)->getNumElements(); 213264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 213364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // The fused vector will be: 213464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // ----------------------------------------------------- 213564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // | NumInElemI | NumInElemJ | NumInElemI | NumInElemJ | 213664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // ----------------------------------------------------- 213764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // from which we'll extract NumElem total elements (where the first NumElemI 213864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // of them come from the mask in I and the remainder come from the mask 213964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // in J. 2140de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2141de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // For the mask from the first pair... 214264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel fillNewShuffleMask(Context, I, 0, NumInElemJ, NumInElemI, 214364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 0, Mask); 2144de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2145de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // For the mask from the second pair... 214664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel fillNewShuffleMask(Context, J, NumElemI, NumInElemI, NumInElemJ, 214764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel NumInElemI, Mask); 2148de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2149de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return ConstantVector::get(Mask); 2150de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2151de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 215264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool BBVectorize::expandIEChain(LLVMContext& Context, Instruction *I, 215364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *J, unsigned o, Value *&LOp, 215464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElemL, 215564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeL, Type *ArgTypeH, 215672465ea23d010507d3746adc126d719005981e05Hal Finkel bool IBeforeJ, unsigned IdxOff) { 215764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool ExpandedIEChain = false; 215864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (InsertElementInst *LIE = dyn_cast<InsertElementInst>(LOp)) { 215964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // If we have a pure insertelement chain, then this can be rewritten 216064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // into a chain that directly builds the larger type. 2161b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel if (isPureIEChain(LIE)) { 216264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel SmallVector<Value *, 8> VectElemts(numElemL, 216364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel UndefValue::get(ArgTypeL->getScalarType())); 216464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel InsertElementInst *LIENext = LIE; 216564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel do { 216664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned Idx = 216764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel cast<ConstantInt>(LIENext->getOperand(2))->getSExtValue(); 216864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel VectElemts[Idx] = LIENext->getOperand(1); 216964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } while ((LIENext = 217064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel dyn_cast<InsertElementInst>(LIENext->getOperand(0)))); 217164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 217264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel LIENext = 0; 217364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *LIEPrev = UndefValue::get(ArgTypeH); 217464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned i = 0; i < numElemL; ++i) { 217564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (isa<UndefValue>(VectElemts[i])) continue; 217664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel LIENext = InsertElementInst::Create(LIEPrev, VectElemts[i], 217764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantInt::get(Type::getInt32Ty(Context), 217864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel i + IdxOff), 217972465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 218072465ea23d010507d3746adc126d719005981e05Hal Finkel true, o, i+1)); 218172465ea23d010507d3746adc126d719005981e05Hal Finkel LIENext->insertBefore(IBeforeJ ? J : I); 218264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel LIEPrev = LIENext; 218364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 218464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 218564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel LOp = LIENext ? (Value*) LIENext : UndefValue::get(ArgTypeH); 218664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ExpandedIEChain = true; 218764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 218864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 218964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 219064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return ExpandedIEChain; 219164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 219264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 2193de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns the value to be used as the specified operand of the vector 2194de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // instruction that fuses I with J. 2195de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *BBVectorize::getReplacementInput(LLVMContext& Context, Instruction *I, 219672465ea23d010507d3746adc126d719005981e05Hal Finkel Instruction *J, unsigned o, bool IBeforeJ) { 2197de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0); 2198de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), 1); 2199de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 220064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // Compute the fused vector type for this operand 220164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeI = I->getOperand(o)->getType(); 220264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeJ = J->getOperand(o)->getType(); 220364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel VectorType *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); 2204de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2205de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *L = I, *H = J; 220664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeL = ArgTypeI, *ArgTypeH = ArgTypeJ; 2207de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 220864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElemL; 220964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (ArgTypeL->isVectorTy()) 221064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemL = cast<VectorType>(ArgTypeL)->getNumElements(); 221164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else 221264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemL = 1; 2213de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 221464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElemH; 221564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (ArgTypeH->isVectorTy()) 221664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemH = cast<VectorType>(ArgTypeH)->getNumElements(); 221764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else 221864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemH = 1; 221964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 222064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *LOp = L->getOperand(o); 222164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *HOp = H->getOperand(o); 222264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElem = VArgType->getNumElements(); 222364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 222464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // First, we check if we can reuse the "original" vector outputs (if these 222564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // exist). We might need a shuffle. 222664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ExtractElementInst *LEE = dyn_cast<ExtractElementInst>(LOp); 222764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ExtractElementInst *HEE = dyn_cast<ExtractElementInst>(HOp); 222864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ShuffleVectorInst *LSV = dyn_cast<ShuffleVectorInst>(LOp); 222964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ShuffleVectorInst *HSV = dyn_cast<ShuffleVectorInst>(HOp); 223064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 223164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // FIXME: If we're fusing shuffle instructions, then we can't apply this 223264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // optimization. The input vectors to the shuffle might be a different 223364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // length from the shuffle outputs. Unfortunately, the replacement 223464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // shuffle mask has already been formed, and the mask entries are sensitive 223564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // to the sizes of the inputs. 223664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool IsSizeChangeShuffle = 223764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel isa<ShuffleVectorInst>(L) && 223864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel (LOp->getType() != L->getType() || HOp->getType() != H->getType()); 223964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 224064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if ((LEE || LSV) && (HEE || HSV) && !IsSizeChangeShuffle) { 224164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // We can have at most two unique vector inputs. 224264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool CanUseInputs = true; 224364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *I1, *I2 = 0; 224464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (LEE) { 224564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I1 = LEE->getOperand(0); 224664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 224764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I1 = LSV->getOperand(0); 224864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2 = LSV->getOperand(1); 224964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (I2 == I1 || isa<UndefValue>(I2)) 225064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2 = 0; 225164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 225264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 225364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (HEE) { 225464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *I3 = HEE->getOperand(0); 225564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!I2 && I3 != I1) 225664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2 = I3; 225764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else if (I3 != I1 && I3 != I2) 225864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel CanUseInputs = false; 225964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 226064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *I3 = HSV->getOperand(0); 226164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!I2 && I3 != I1) 226264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2 = I3; 226364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else if (I3 != I1 && I3 != I2) 226464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel CanUseInputs = false; 226564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 226664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (CanUseInputs) { 226764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *I4 = HSV->getOperand(1); 226864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!isa<UndefValue>(I4)) { 226964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!I2 && I4 != I1) 227064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2 = I4; 227164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else if (I4 != I1 && I4 != I2) 227264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel CanUseInputs = false; 227364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 227464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 227564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 227664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 227764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (CanUseInputs) { 227864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned LOpElem = 227964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel cast<VectorType>(cast<Instruction>(LOp)->getOperand(0)->getType()) 228064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ->getNumElements(); 228164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned HOpElem = 228264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel cast<VectorType>(cast<Instruction>(HOp)->getOperand(0)->getType()) 228364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ->getNumElements(); 228464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 228564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // We have one or two input vectors. We need to map each index of the 228664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // operands to the index of the original vector. 228764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel SmallVector<std::pair<int, int>, 8> II(numElem); 228864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned i = 0; i < numElemL; ++i) { 228964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel int Idx, INum; 229064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (LEE) { 229164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Idx = 229264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel cast<ConstantInt>(LEE->getOperand(1))->getSExtValue(); 229364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel INum = LEE->getOperand(0) == I1 ? 0 : 1; 229464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 229564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Idx = LSV->getMaskValue(i); 229664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (Idx < (int) LOpElem) { 229764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel INum = LSV->getOperand(0) == I1 ? 0 : 1; 229864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 229964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Idx -= LOpElem; 230064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel INum = LSV->getOperand(1) == I1 ? 0 : 1; 230164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 230264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 230364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 230464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel II[i] = std::pair<int, int>(Idx, INum); 230564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 230664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned i = 0; i < numElemH; ++i) { 230764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel int Idx, INum; 230864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (HEE) { 230964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Idx = 231064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel cast<ConstantInt>(HEE->getOperand(1))->getSExtValue(); 231164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel INum = HEE->getOperand(0) == I1 ? 0 : 1; 231264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 231364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Idx = HSV->getMaskValue(i); 231464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (Idx < (int) HOpElem) { 231564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel INum = HSV->getOperand(0) == I1 ? 0 : 1; 231664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 231764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Idx -= HOpElem; 231864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel INum = HSV->getOperand(1) == I1 ? 0 : 1; 231964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 232064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 232164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 232264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel II[i + numElemL] = std::pair<int, int>(Idx, INum); 232364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 232464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 232564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // We now have an array which tells us from which index of which 232664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // input vector each element of the operand comes. 232764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel VectorType *I1T = cast<VectorType>(I1->getType()); 232864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned I1Elem = I1T->getNumElements(); 232964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 233064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!I2) { 233164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // In this case there is only one underlying vector input. Check for 233264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // the trivial case where we can use the input directly. 233364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (I1Elem == numElem) { 233464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool ElemInOrder = true; 233564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned i = 0; i < numElem; ++i) { 233664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (II[i].first != (int) i && II[i].first != -1) { 233764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ElemInOrder = false; 233864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel break; 233964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 234064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 234164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 234264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (ElemInOrder) 234364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return I1; 234464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 234564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 234664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // A shuffle is needed. 234764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant *> Mask(numElem); 234864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned i = 0; i < numElem; ++i) { 234964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel int Idx = II[i].first; 235064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (Idx == -1) 235164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[i] = UndefValue::get(Type::getInt32Ty(Context)); 235264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else 235364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[i] = ConstantInt::get(Type::getInt32Ty(Context), Idx); 235464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 235564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 235664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *S = 235764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel new ShuffleVectorInst(I1, UndefValue::get(I1T), 235864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantVector::get(Mask), 235972465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 236072465ea23d010507d3746adc126d719005981e05Hal Finkel true, o)); 236172465ea23d010507d3746adc126d719005981e05Hal Finkel S->insertBefore(IBeforeJ ? J : I); 236264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return S; 236364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 236464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 236564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel VectorType *I2T = cast<VectorType>(I2->getType()); 236664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned I2Elem = I2T->getNumElements(); 236764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 236864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // This input comes from two distinct vectors. The first step is to 236964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // make sure that both vectors are the same length. If not, the 237064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // smaller one will need to grow before they can be shuffled together. 237164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (I1Elem < I2Elem) { 237264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant *> Mask(I2Elem); 237364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned v = 0; 237464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < I1Elem; ++v) 237564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); 237664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < I2Elem; ++v) 237764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); 237864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 237964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *NewI1 = 238064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel new ShuffleVectorInst(I1, UndefValue::get(I1T), 238164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantVector::get(Mask), 238272465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 238372465ea23d010507d3746adc126d719005981e05Hal Finkel true, o, 1)); 238472465ea23d010507d3746adc126d719005981e05Hal Finkel NewI1->insertBefore(IBeforeJ ? J : I); 238564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I1 = NewI1; 238664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I1T = I2T; 238764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I1Elem = I2Elem; 238864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else if (I1Elem > I2Elem) { 238964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant *> Mask(I1Elem); 239064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned v = 0; 239164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < I2Elem; ++v) 239264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); 239364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < I1Elem; ++v) 239464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); 239564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 239664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *NewI2 = 239764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel new ShuffleVectorInst(I2, UndefValue::get(I2T), 239864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantVector::get(Mask), 239972465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 240072465ea23d010507d3746adc126d719005981e05Hal Finkel true, o, 1)); 240172465ea23d010507d3746adc126d719005981e05Hal Finkel NewI2->insertBefore(IBeforeJ ? J : I); 240264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2 = NewI2; 240364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2T = I1T; 240464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2Elem = I1Elem; 240564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 240664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 240764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // Now that both I1 and I2 are the same length we can shuffle them 240864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // together (and use the result). 240964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant *> Mask(numElem); 241064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned v = 0; v < numElem; ++v) { 241164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (II[v].first == -1) { 241264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); 241364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 241464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel int Idx = II[v].first + II[v].second * I1Elem; 241564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), Idx); 241664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 241764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 241864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 241964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *NewOp = 242064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel new ShuffleVectorInst(I1, I2, ConstantVector::get(Mask), 242172465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, true, o)); 242272465ea23d010507d3746adc126d719005981e05Hal Finkel NewOp->insertBefore(IBeforeJ ? J : I); 242364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return NewOp; 242464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 2425de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2426de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 242764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgType = ArgTypeL; 242864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (numElemL < numElemH) { 242964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (numElemL == 1 && expandIEChain(Context, I, J, o, HOp, numElemH, 243072465ea23d010507d3746adc126d719005981e05Hal Finkel ArgTypeL, VArgType, IBeforeJ, 1)) { 243164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // This is another short-circuit case: we're combining a scalar into 243264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // a vector that is formed by an IE chain. We've just expanded the IE 243364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // chain, now insert the scalar and we're done. 243464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 243564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *S = InsertElementInst::Create(HOp, LOp, CV0, 243672465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, true, o)); 243772465ea23d010507d3746adc126d719005981e05Hal Finkel S->insertBefore(IBeforeJ ? J : I); 243864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return S; 243964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else if (!expandIEChain(Context, I, J, o, LOp, numElemL, ArgTypeL, 244072465ea23d010507d3746adc126d719005981e05Hal Finkel ArgTypeH, IBeforeJ)) { 244164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // The two vector inputs to the shuffle must be the same length, 244264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // so extend the smaller vector to be the same length as the larger one. 244364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *NLOp; 244464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (numElemL > 1) { 244564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 244664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant *> Mask(numElemH); 244764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned v = 0; 244864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < numElemL; ++v) 244964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); 245064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < numElemH; ++v) 245164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); 245264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 245364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel NLOp = new ShuffleVectorInst(LOp, UndefValue::get(ArgTypeL), 245464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantVector::get(Mask), 245572465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 245672465ea23d010507d3746adc126d719005981e05Hal Finkel true, o, 1)); 245764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 245864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel NLOp = InsertElementInst::Create(UndefValue::get(ArgTypeH), LOp, CV0, 245972465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 246072465ea23d010507d3746adc126d719005981e05Hal Finkel true, o, 1)); 246164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 246264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 246372465ea23d010507d3746adc126d719005981e05Hal Finkel NLOp->insertBefore(IBeforeJ ? J : I); 246464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel LOp = NLOp; 246564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 246664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 246764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ArgType = ArgTypeH; 246864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else if (numElemL > numElemH) { 246964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (numElemH == 1 && expandIEChain(Context, I, J, o, LOp, numElemL, 247072465ea23d010507d3746adc126d719005981e05Hal Finkel ArgTypeH, VArgType, IBeforeJ)) { 247164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *S = 247264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel InsertElementInst::Create(LOp, HOp, 247364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantInt::get(Type::getInt32Ty(Context), 247464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemL), 247572465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 247672465ea23d010507d3746adc126d719005981e05Hal Finkel true, o)); 247772465ea23d010507d3746adc126d719005981e05Hal Finkel S->insertBefore(IBeforeJ ? J : I); 247864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return S; 247964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else if (!expandIEChain(Context, I, J, o, HOp, numElemH, ArgTypeH, 248072465ea23d010507d3746adc126d719005981e05Hal Finkel ArgTypeL, IBeforeJ)) { 248164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *NHOp; 248264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (numElemH > 1) { 248364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant *> Mask(numElemL); 248464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned v = 0; 248564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < numElemH; ++v) 248664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); 248764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < numElemL; ++v) 248864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); 248964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 249064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel NHOp = new ShuffleVectorInst(HOp, UndefValue::get(ArgTypeH), 249164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantVector::get(Mask), 249272465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 249372465ea23d010507d3746adc126d719005981e05Hal Finkel true, o, 1)); 249464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 249564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel NHOp = InsertElementInst::Create(UndefValue::get(ArgTypeL), HOp, CV0, 249672465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 249772465ea23d010507d3746adc126d719005981e05Hal Finkel true, o, 1)); 249864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 249964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 250072465ea23d010507d3746adc126d719005981e05Hal Finkel NHOp->insertBefore(IBeforeJ ? J : I); 250164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel HOp = NHOp; 2502de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 250364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 2504de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 250564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (ArgType->isVectorTy()) { 250664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElem = cast<VectorType>(VArgType)->getNumElements(); 250764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant*> Mask(numElem); 250864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned v = 0; v < numElem; ++v) { 250964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned Idx = v; 251064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // If the low vector was expanded, we need to skip the extra 251164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // undefined entries. 251264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (v >= numElemL && numElemH > numElemL) 251364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Idx += (numElemH - numElemL); 251464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), Idx); 251564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 2516de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 251764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *BV = new ShuffleVectorInst(LOp, HOp, 251872465ea23d010507d3746adc126d719005981e05Hal Finkel ConstantVector::get(Mask), 251972465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, true, o)); 252072465ea23d010507d3746adc126d719005981e05Hal Finkel BV->insertBefore(IBeforeJ ? J : I); 2521de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return BV; 2522de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2523de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2524de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *BV1 = InsertElementInst::Create( 252564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel UndefValue::get(VArgType), LOp, CV0, 252672465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 252772465ea23d010507d3746adc126d719005981e05Hal Finkel true, o, 1)); 252872465ea23d010507d3746adc126d719005981e05Hal Finkel BV1->insertBefore(IBeforeJ ? J : I); 252964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *BV2 = InsertElementInst::Create(BV1, HOp, CV1, 253072465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 253172465ea23d010507d3746adc126d719005981e05Hal Finkel true, o, 2)); 253272465ea23d010507d3746adc126d719005981e05Hal Finkel BV2->insertBefore(IBeforeJ ? J : I); 2533de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return BV2; 2534de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2535de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2536de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function creates an array of values that will be used as the inputs 2537de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // to the vector instruction that fuses I with J. 2538de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::getReplacementInputsForPair(LLVMContext& Context, 2539de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J, 254072465ea23d010507d3746adc126d719005981e05Hal Finkel SmallVector<Value *, 3> &ReplacedOperands, 254172465ea23d010507d3746adc126d719005981e05Hal Finkel bool IBeforeJ) { 2542de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned NumOperands = I->getNumOperands(); 2543de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2544de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) { 2545de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Iterate backward so that we look at the store pointer 2546de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // first and know whether or not we need to flip the inputs. 2547de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2548de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (isa<LoadInst>(I) || (o == 1 && isa<StoreInst>(I))) { 2549de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This is the pointer for a load/store instruction. 2550202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel ReplacedOperands[o] = getReplacementPointerInput(Context, I, J, o); 2551de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 25526173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel } else if (isa<CallInst>(I)) { 2553de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Function *F = cast<CallInst>(I)->getCalledFunction(); 2554de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned IID = F->getIntrinsicID(); 25556173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel if (o == NumOperands-1) { 25566173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel BasicBlock &BB = *I->getParent(); 2557bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng 25586173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel Module *M = BB.getParent()->getParent(); 255964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeI = I->getType(); 256064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeJ = J->getType(); 256164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); 2562bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng 25636173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel ReplacedOperands[o] = Intrinsic::getDeclaration(M, 25646173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel (Intrinsic::ID) IID, VArgType); 25656173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel continue; 25666173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel } else if (IID == Intrinsic::powi && o == 1) { 25676173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel // The second argument of powi is a single integer and we've already 25686173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel // checked that both arguments are equal. As a result, we just keep 25696173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel // I's second argument. 25706173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel ReplacedOperands[o] = I->getOperand(o); 25716173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel continue; 25726173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel } 2573de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else if (isa<ShuffleVectorInst>(I) && o == NumOperands-1) { 2574de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ReplacedOperands[o] = getReplacementShuffleMask(Context, I, J); 2575de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 2576de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2577de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 257872465ea23d010507d3746adc126d719005981e05Hal Finkel ReplacedOperands[o] = getReplacementInput(Context, I, J, o, IBeforeJ); 2579de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2580de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2581de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2582de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function creates two values that represent the outputs of the 2583de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // original I and J instructions. These are generally vector shuffles 2584de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // or extracts. In many cases, these will end up being unused and, thus, 2585de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // eliminated by later passes. 2586de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::replaceOutputsOfPair(LLVMContext& Context, Instruction *I, 2587de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J, Instruction *K, 2588de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *&InsertionPt, 2589202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel Instruction *&K1, Instruction *&K2) { 2590de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (isa<StoreInst>(I)) { 2591de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AA->replaceWithNewValue(I, K); 2592de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AA->replaceWithNewValue(J, K); 2593de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 2594de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *IType = I->getType(); 259564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *JType = J->getType(); 259664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 259764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel VectorType *VType = getVecTypeForPair(IType, JType); 259864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElem = VType->getNumElements(); 259964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 260064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElemI, numElemJ; 260164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (IType->isVectorTy()) 260264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemI = cast<VectorType>(IType)->getNumElements(); 260364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else 260464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemI = 1; 260564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 260664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (JType->isVectorTy()) 260764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemJ = cast<VectorType>(JType)->getNumElements(); 260864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else 260964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemJ = 1; 2610de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2611de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (IType->isVectorTy()) { 261264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant*> Mask1(numElemI), Mask2(numElemI); 261364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned v = 0; v < numElemI; ++v) { 261464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v); 261564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemJ+v); 261664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 2617de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 261864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel K1 = new ShuffleVectorInst(K, UndefValue::get(VType), 2619202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel ConstantVector::get( Mask1), 262064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getReplacementName(K, false, 1)); 2621de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 262264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0); 2623202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel K1 = ExtractElementInst::Create(K, CV0, 2624de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel getReplacementName(K, false, 1)); 262564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 262664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 262764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (JType->isVectorTy()) { 262864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant*> Mask1(numElemJ), Mask2(numElemJ); 262964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned v = 0; v < numElemJ; ++v) { 263064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v); 263164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemI+v); 263264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 263364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 263464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel K2 = new ShuffleVectorInst(K, UndefValue::get(VType), 2635202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel ConstantVector::get( Mask2), 263664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getReplacementName(K, false, 2)); 263764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 263864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem-1); 2639202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel K2 = ExtractElementInst::Create(K, CV1, 2640de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel getReplacementName(K, false, 2)); 2641de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2642de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2643de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K1->insertAfter(K); 2644de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K2->insertAfter(K1); 2645de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel InsertionPt = K2; 2646de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2647de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2648de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2649de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Move all uses of the function I (including pairing-induced uses) after J. 2650de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::canMoveUsesOfIAfterJ(BasicBlock &BB, 2651de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &LoadMoveSet, 2652de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J) { 2653de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Skip to the first instruction past I. 2654ded681d2725907c7de9db53d59cee0c51fad6fcbBenjamin Kramer BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I)); 2655de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2656de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> Users; 2657de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker WriteSet(*AA); 2658de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (; cast<Instruction>(L) != J; ++L) 2659de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel (void) trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSet); 2660de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2661de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel assert(cast<Instruction>(L) == J && 2662de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel "Tracking has not proceeded far enough to check for dependencies"); 2663de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // If J is now in the use set of I, then trackUsesOfI will return true 2664de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // and we have a dependency cycle (and the fusing operation must abort). 2665de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return !trackUsesOfI(Users, WriteSet, I, J, true, &LoadMoveSet); 2666de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2667de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2668de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Move all uses of the function I (including pairing-induced uses) after J. 2669de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::moveUsesOfIAfterJ(BasicBlock &BB, 2670de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &LoadMoveSet, 2671de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *&InsertionPt, 2672de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J) { 2673de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Skip to the first instruction past I. 2674ded681d2725907c7de9db53d59cee0c51fad6fcbBenjamin Kramer BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I)); 2675de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2676de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> Users; 2677de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker WriteSet(*AA); 2678de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (; cast<Instruction>(L) != J;) { 2679de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSet)) { 2680de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Move this instruction 2681de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *InstToMove = L; ++L; 2682de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2683de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: moving: " << *InstToMove << 2684de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " to after " << *InsertionPt << "\n"); 2685de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel InstToMove->removeFromParent(); 2686de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel InstToMove->insertAfter(InsertionPt); 2687de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel InsertionPt = InstToMove; 2688de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 2689de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++L; 2690de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2691de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2692de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2693de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2694de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Collect all load instruction that are in the move set of a given first 2695de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pair member. These loads depend on the first instruction, I, and so need 2696de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // to be moved after J (the second instruction) when the pair is fused. 2697de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::collectPairLoadMoveSet(BasicBlock &BB, 2698de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 2699de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &LoadMoveSet, 2700de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I) { 2701de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Skip to the first instruction past I. 2702ded681d2725907c7de9db53d59cee0c51fad6fcbBenjamin Kramer BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I)); 2703de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2704de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> Users; 2705de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker WriteSet(*AA); 2706de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2707de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Note: We cannot end the loop when we reach J because J could be moved 2708de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // farther down the use chain by another instruction pairing. Also, J 2709de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // could be before I if this is an inverted input. 2710de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (BasicBlock::iterator E = BB.end(); cast<Instruction>(L) != E; ++L) { 2711de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (trackUsesOfI(Users, WriteSet, I, L)) { 2712de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (L->mayReadFromMemory()) 2713de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel LoadMoveSet.insert(ValuePair(L, I)); 2714de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2715de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2716de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2717de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2718de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // In cases where both load/stores and the computation of their pointers 2719de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // are chosen for vectorization, we can end up in a situation where the 2720de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // aliasing analysis starts returning different query results as the 2721de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // process of fusing instruction pairs continues. Because the algorithm 2722de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // relies on finding the same use trees here as were found earlier, we'll 2723de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // need to precompute the necessary aliasing information here and then 2724de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // manually update it during the fusion process. 2725de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::collectLoadMoveSet(BasicBlock &BB, 2726de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 2727de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 2728de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &LoadMoveSet) { 2729de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::vector<Value *>::iterator PI = PairableInsts.begin(), 2730de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PIE = PairableInsts.end(); PI != PIE; ++PI) { 2731de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(*PI); 2732de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (P == ChosenPairs.end()) continue; 2733de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2734de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I = cast<Instruction>(P->first); 2735de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel collectPairLoadMoveSet(BB, ChosenPairs, LoadMoveSet, I); 2736de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2737de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2738de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2739ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel // When the first instruction in each pair is cloned, it will inherit its 2740ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel // parent's metadata. This metadata must be combined with that of the other 2741ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel // instruction in a safe way. 2742ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel void BBVectorize::combineMetadata(Instruction *K, const Instruction *J) { 2743ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel SmallVector<std::pair<unsigned, MDNode*>, 4> Metadata; 2744ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel K->getAllMetadataOtherThanDebugLoc(Metadata); 2745ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel for (unsigned i = 0, n = Metadata.size(); i < n; ++i) { 2746ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel unsigned Kind = Metadata[i].first; 2747ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel MDNode *JMD = J->getMetadata(Kind); 2748ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel MDNode *KMD = Metadata[i].second; 2749ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel 2750ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel switch (Kind) { 2751ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel default: 2752ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel K->setMetadata(Kind, 0); // Remove unknown metadata 2753ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel break; 2754ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel case LLVMContext::MD_tbaa: 2755ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel K->setMetadata(Kind, MDNode::getMostGenericTBAA(JMD, KMD)); 2756ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel break; 2757ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel case LLVMContext::MD_fpmath: 2758ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel K->setMetadata(Kind, MDNode::getMostGenericFPMath(JMD, KMD)); 2759ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel break; 2760ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel } 2761ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel } 2762ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel } 2763ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel 2764de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function fuses the chosen instruction pairs into vector instructions, 2765de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // taking care preserve any needed scalar outputs and, then, it reorders the 2766de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // remaining instructions as needed (users of the first member of the pair 2767de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // need to be moved to after the location of the second member of the pair 2768de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // because the vector instruction is inserted in the location of the pair's 2769de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // second member). 2770de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::fuseChosenPairs(BasicBlock &BB, 2771de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 2772a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 277372465ea23d010507d3746adc126d719005981e05Hal Finkel DenseSet<ValuePair> &FixedOrderPairs, 277472465ea23d010507d3746adc126d719005981e05Hal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes, 277572465ea23d010507d3746adc126d719005981e05Hal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 277672465ea23d010507d3746adc126d719005981e05Hal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairDeps) { 2777de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel LLVMContext& Context = BB.getContext(); 2778de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2779de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // During the vectorization process, the order of the pairs to be fused 2780de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // could be flipped. So we'll add each pair, flipped, into the ChosenPairs 2781de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // list. After a pair is fused, the flipped pair is removed from the list. 278272465ea23d010507d3746adc126d719005981e05Hal Finkel DenseSet<ValuePair> FlippedPairs; 2783de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseMap<Value *, Value *>::iterator P = ChosenPairs.begin(), 2784de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = ChosenPairs.end(); P != E; ++P) 278572465ea23d010507d3746adc126d719005981e05Hal Finkel FlippedPairs.insert(ValuePair(P->second, P->first)); 278672465ea23d010507d3746adc126d719005981e05Hal Finkel for (DenseSet<ValuePair>::iterator P = FlippedPairs.begin(), 2787de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = FlippedPairs.end(); P != E; ++P) 2788de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairs.insert(*P); 2789de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2790de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> LoadMoveSet; 2791de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel collectLoadMoveSet(BB, PairableInsts, ChosenPairs, LoadMoveSet); 2792de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2793de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n"); 2794de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2795de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) { 2796de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(PI); 2797de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (P == ChosenPairs.end()) { 2798de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++PI; 2799de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 2800de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2801de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2802de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (getDepthFactor(P->first) == 0) { 2803de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // These instructions are not really fused, but are tracked as though 2804de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // they are. Any case in which it would be interesting to fuse them 2805de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // will be taken care of by InstCombine. 2806de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel --NumFusedOps; 2807de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++PI; 2808de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 2809de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2810de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2811de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I = cast<Instruction>(P->first), 2812de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel *J = cast<Instruction>(P->second); 2813de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2814de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: fusing: " << *I << 2815de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " <-> " << *J << "\n"); 2816de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2817de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Remove the pair and flipped pair from the list. 2818de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *>::iterator FP = ChosenPairs.find(P->second); 2819de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel assert(FP != ChosenPairs.end() && "Flipped pair not found in list"); 2820de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairs.erase(FP); 2821de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairs.erase(P); 2822de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2823de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!canMoveUsesOfIAfterJ(BB, LoadMoveSet, I, J)) { 2824de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: fusion of: " << *I << 2825de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " <-> " << *J << 2826de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " aborted because of non-trivial dependency cycle\n"); 2827de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel --NumFusedOps; 2828de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++PI; 2829de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 2830de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2831de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2832a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel // If the pair must have the other order, then flip it. 2833a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel bool FlipPairOrder = FixedOrderPairs.count(ValuePair(J, I)); 283472465ea23d010507d3746adc126d719005981e05Hal Finkel if (!FlipPairOrder && !FixedOrderPairs.count(ValuePair(I, J))) { 283572465ea23d010507d3746adc126d719005981e05Hal Finkel // This pair does not have a fixed order, and so we might want to 283672465ea23d010507d3746adc126d719005981e05Hal Finkel // flip it if that will yield fewer shuffles. We count the number 283772465ea23d010507d3746adc126d719005981e05Hal Finkel // of dependencies connected via swaps, and those directly connected, 283872465ea23d010507d3746adc126d719005981e05Hal Finkel // and flip the order if the number of swaps is greater. 283972465ea23d010507d3746adc126d719005981e05Hal Finkel bool OrigOrder = true; 284072465ea23d010507d3746adc126d719005981e05Hal Finkel VPPIteratorPair IP = ConnectedPairDeps.equal_range(ValuePair(I, J)); 284172465ea23d010507d3746adc126d719005981e05Hal Finkel if (IP.first == ConnectedPairDeps.end()) { 284272465ea23d010507d3746adc126d719005981e05Hal Finkel IP = ConnectedPairDeps.equal_range(ValuePair(J, I)); 284372465ea23d010507d3746adc126d719005981e05Hal Finkel OrigOrder = false; 284472465ea23d010507d3746adc126d719005981e05Hal Finkel } 284572465ea23d010507d3746adc126d719005981e05Hal Finkel 284672465ea23d010507d3746adc126d719005981e05Hal Finkel if (IP.first != ConnectedPairDeps.end()) { 284772465ea23d010507d3746adc126d719005981e05Hal Finkel unsigned NumDepsDirect = 0, NumDepsSwap = 0; 284872465ea23d010507d3746adc126d719005981e05Hal Finkel for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first; 284972465ea23d010507d3746adc126d719005981e05Hal Finkel Q != IP.second; ++Q) { 285072465ea23d010507d3746adc126d719005981e05Hal Finkel DenseMap<VPPair, unsigned>::iterator R = 285172465ea23d010507d3746adc126d719005981e05Hal Finkel PairConnectionTypes.find(VPPair(Q->second, Q->first)); 285272465ea23d010507d3746adc126d719005981e05Hal Finkel assert(R != PairConnectionTypes.end() && 285372465ea23d010507d3746adc126d719005981e05Hal Finkel "Cannot find pair connection type"); 285472465ea23d010507d3746adc126d719005981e05Hal Finkel if (R->second == PairConnectionDirect) 285572465ea23d010507d3746adc126d719005981e05Hal Finkel ++NumDepsDirect; 285672465ea23d010507d3746adc126d719005981e05Hal Finkel else if (R->second == PairConnectionSwap) 285772465ea23d010507d3746adc126d719005981e05Hal Finkel ++NumDepsSwap; 285872465ea23d010507d3746adc126d719005981e05Hal Finkel } 285972465ea23d010507d3746adc126d719005981e05Hal Finkel 286072465ea23d010507d3746adc126d719005981e05Hal Finkel if (!OrigOrder) 286172465ea23d010507d3746adc126d719005981e05Hal Finkel std::swap(NumDepsDirect, NumDepsSwap); 286272465ea23d010507d3746adc126d719005981e05Hal Finkel 286372465ea23d010507d3746adc126d719005981e05Hal Finkel if (NumDepsSwap > NumDepsDirect) { 286472465ea23d010507d3746adc126d719005981e05Hal Finkel FlipPairOrder = true; 286572465ea23d010507d3746adc126d719005981e05Hal Finkel DEBUG(dbgs() << "BBV: reordering pair: " << *I << 286672465ea23d010507d3746adc126d719005981e05Hal Finkel " <-> " << *J << "\n"); 286772465ea23d010507d3746adc126d719005981e05Hal Finkel } 286872465ea23d010507d3746adc126d719005981e05Hal Finkel } 286972465ea23d010507d3746adc126d719005981e05Hal Finkel } 2870282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel 2871202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel Instruction *L = I, *H = J; 2872a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel if (FlipPairOrder) 2873202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel std::swap(H, L); 2874202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel 287572465ea23d010507d3746adc126d719005981e05Hal Finkel // If the pair being fused uses the opposite order from that in the pair 287672465ea23d010507d3746adc126d719005981e05Hal Finkel // connection map, then we need to flip the types. 287772465ea23d010507d3746adc126d719005981e05Hal Finkel VPPIteratorPair IP = ConnectedPairs.equal_range(ValuePair(H, L)); 287872465ea23d010507d3746adc126d719005981e05Hal Finkel for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first; 287972465ea23d010507d3746adc126d719005981e05Hal Finkel Q != IP.second; ++Q) { 288072465ea23d010507d3746adc126d719005981e05Hal Finkel DenseMap<VPPair, unsigned>::iterator R = PairConnectionTypes.find(*Q); 288172465ea23d010507d3746adc126d719005981e05Hal Finkel assert(R != PairConnectionTypes.end() && 288272465ea23d010507d3746adc126d719005981e05Hal Finkel "Cannot find pair connection type"); 288372465ea23d010507d3746adc126d719005981e05Hal Finkel if (R->second == PairConnectionDirect) 288472465ea23d010507d3746adc126d719005981e05Hal Finkel R->second = PairConnectionSwap; 288572465ea23d010507d3746adc126d719005981e05Hal Finkel else if (R->second == PairConnectionSwap) 288672465ea23d010507d3746adc126d719005981e05Hal Finkel R->second = PairConnectionDirect; 288772465ea23d010507d3746adc126d719005981e05Hal Finkel } 288872465ea23d010507d3746adc126d719005981e05Hal Finkel 288972465ea23d010507d3746adc126d719005981e05Hal Finkel bool LBeforeH = !FlipPairOrder; 2890de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned NumOperands = I->getNumOperands(); 2891de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel SmallVector<Value *, 3> ReplacedOperands(NumOperands); 289272465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementInputsForPair(Context, L, H, ReplacedOperands, 289372465ea23d010507d3746adc126d719005981e05Hal Finkel LBeforeH); 2894de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2895de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Make a copy of the original operation, change its type to the vector 2896de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // type and replace its operands with the vector operands. 289772465ea23d010507d3746adc126d719005981e05Hal Finkel Instruction *K = L->clone(); 289872465ea23d010507d3746adc126d719005981e05Hal Finkel if (L->hasName()) 289972465ea23d010507d3746adc126d719005981e05Hal Finkel K->takeName(L); 290072465ea23d010507d3746adc126d719005981e05Hal Finkel else if (H->hasName()) 290172465ea23d010507d3746adc126d719005981e05Hal Finkel K->takeName(H); 2902de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2903de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!isa<StoreInst>(K)) 2904202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel K->mutateType(getVecTypeForPair(L->getType(), H->getType())); 2905de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 290672465ea23d010507d3746adc126d719005981e05Hal Finkel combineMetadata(K, H); 2907ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel 2908de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (unsigned o = 0; o < NumOperands; ++o) 2909de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K->setOperand(o, ReplacedOperands[o]); 2910de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2911de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K->insertAfter(J); 2912de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2913de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Instruction insertion point: 2914de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *InsertionPt = K; 2915de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *K1 = 0, *K2 = 0; 2916202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel replaceOutputsOfPair(Context, L, H, K, InsertionPt, K1, K2); 2917de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2918de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // The use tree of the first original instruction must be moved to after 2919de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the location of the second instruction. The entire use tree of the 2920de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // first instruction is disjoint from the input tree of the second 2921de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // (by definition), and so commutes with it. 2922de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2923de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel moveUsesOfIAfterJ(BB, LoadMoveSet, InsertionPt, I, J); 2924de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2925de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!isa<StoreInst>(I)) { 292672465ea23d010507d3746adc126d719005981e05Hal Finkel L->replaceAllUsesWith(K1); 292772465ea23d010507d3746adc126d719005981e05Hal Finkel H->replaceAllUsesWith(K2); 292872465ea23d010507d3746adc126d719005981e05Hal Finkel AA->replaceWithNewValue(L, K1); 292972465ea23d010507d3746adc126d719005981e05Hal Finkel AA->replaceWithNewValue(H, K2); 2930de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2931de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2932de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Instructions that may read from memory may be in the load move set. 2933de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Once an instruction is fused, we no longer need its move set, and so 2934de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the values of the map never need to be updated. However, when a load 2935de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // is fused, we need to merge the entries from both instructions in the 2936de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pair in case those instructions were in the move set of some other 2937de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // yet-to-be-fused pair. The loads in question are the keys of the map. 2938de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (I->mayReadFromMemory()) { 2939de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<ValuePair> NewSetMembers; 2940de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair IPairRange = LoadMoveSet.equal_range(I); 2941de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair JPairRange = LoadMoveSet.equal_range(J); 2942de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<Value *, Value *>::iterator N = IPairRange.first; 2943de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel N != IPairRange.second; ++N) 2944de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel NewSetMembers.push_back(ValuePair(K, N->second)); 2945de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<Value *, Value *>::iterator N = JPairRange.first; 2946de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel N != JPairRange.second; ++N) 2947de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel NewSetMembers.push_back(ValuePair(K, N->second)); 2948de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::vector<ValuePair>::iterator A = NewSetMembers.begin(), 2949de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AE = NewSetMembers.end(); A != AE; ++A) 2950de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel LoadMoveSet.insert(*A); 2951de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2952de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2953de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Before removing I, set the iterator to the next instruction. 2954de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PI = llvm::next(BasicBlock::iterator(I)); 2955de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (cast<Instruction>(PI) == J) 2956de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++PI; 2957de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2958de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel SE->forgetValue(I); 2959de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel SE->forgetValue(J); 2960de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel I->eraseFromParent(); 2961de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel J->eraseFromParent(); 296272465ea23d010507d3746adc126d719005981e05Hal Finkel 296372465ea23d010507d3746adc126d719005981e05Hal Finkel DEBUG(if (PrintAfterEveryPair) dbgs() << "BBV: block is now: \n" << 296472465ea23d010507d3746adc126d719005981e05Hal Finkel BB << "\n"); 2965de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2966de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2967de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: final: \n" << BB << "\n"); 2968de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2969de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel} 2970de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2971de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelchar BBVectorize::ID = 0; 2972de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic const char bb_vectorize_name[] = "Basic-Block Vectorization"; 2973de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelINITIALIZE_PASS_BEGIN(BBVectorize, BBV_NAME, bb_vectorize_name, false, false) 2974de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelINITIALIZE_AG_DEPENDENCY(AliasAnalysis) 2975e29c19091cca58db668407dfc5dd86c70e8b3d49Hal FinkelINITIALIZE_PASS_DEPENDENCY(DominatorTree) 2976de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelINITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 2977de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelINITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false) 2978de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2979bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin ZhengBasicBlockPass *llvm::createBBVectorizePass(const VectorizeConfig &C) { 2980bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng return new BBVectorize(C); 2981de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel} 2982de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2983bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zhengbool 2984bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zhengllvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB, const VectorizeConfig &C) { 2985bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng BBVectorize BBVectorizer(P, C); 298687825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng return BBVectorizer.vectorizeBB(BB); 298787825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng} 2988bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng 2989bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng//===----------------------------------------------------------------------===// 2990bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin ZhengVectorizeConfig::VectorizeConfig() { 2991bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng VectorBits = ::VectorBits; 2992768edf3cd037aab10391abc279f71470df8e3156Hal Finkel VectorizeBools = !::NoBools; 299386312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeInts = !::NoInts; 299486312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeFloats = !::NoFloats; 2995f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel VectorizePointers = !::NoPointers; 299686312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeCasts = !::NoCasts; 299786312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeMath = !::NoMath; 299886312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeFMA = !::NoFMA; 2999fc3665c87519850f629c9565535e3be447e10addHal Finkel VectorizeSelect = !::NoSelect; 3000e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel VectorizeCmp = !::NoCmp; 3001f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel VectorizeGEP = !::NoGEP; 300286312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeMemOps = !::NoMemOps; 3003bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng AlignedOnly = ::AlignedOnly; 3004bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng ReqChainDepth= ::ReqChainDepth; 3005bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng SearchLimit = ::SearchLimit; 3006bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng MaxCandPairsForCycleCheck = ::MaxCandPairsForCycleCheck; 3007bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng SplatBreaksChain = ::SplatBreaksChain; 3008bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng MaxInsts = ::MaxInsts; 3009bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng MaxIter = ::MaxIter; 301064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Pow2LenOnly = ::Pow2LenOnly; 3011bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng NoMemOpBoost = ::NoMemOpBoost; 3012bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng FastDep = ::FastDep; 3013bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng} 3014