BBVectorize.cpp revision 8f3359a4b396d3f1a7b2726e02f199be74c62e4c
1de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel//===- BBVectorize.cpp - A Basic-Block Vectorizer -------------------------===// 2de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// 3de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// The LLVM Compiler Infrastructure 4de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// 5de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// This file is distributed under the University of Illinois Open Source 6de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// License. See LICENSE.TXT for details. 7de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// 8de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel//===----------------------------------------------------------------------===// 9de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// 10de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// This file implements a basic-block vectorization pass. The algorithm was 11de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// inspired by that used by the Vienna MAP Vectorizor by Franchetti and Kral, 12de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// et al. It works by looking for chains of pairable operations and then 13de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// pairing them. 14de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// 15de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel//===----------------------------------------------------------------------===// 16de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 17de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#define BBV_NAME "bb-vectorize" 18de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#define DEBUG_TYPE BBV_NAME 19d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Transforms/Vectorize.h" 20de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/DenseMap.h" 21de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/DenseSet.h" 22d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/ADT/STLExtras.h" 2386c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel#include "llvm/ADT/SmallSet.h" 24de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/SmallVector.h" 25de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/Statistic.h" 26de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/StringExtras.h" 27de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/AliasAnalysis.h" 28de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/AliasSetTracker.h" 29e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel#include "llvm/Analysis/Dominators.h" 30de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/ScalarEvolution.h" 31de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/ScalarEvolutionExpressions.h" 32be04929f7fd76a921540e9901f24563e51dc1219Chandler Carruth#include "llvm/Analysis/TargetTransformInfo.h" 33de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/ValueTracking.h" 340b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/Constants.h" 350b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/DataLayout.h" 360b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/DerivedTypes.h" 370b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/Function.h" 380b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/Instructions.h" 390b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/IntrinsicInst.h" 400b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/Intrinsics.h" 410b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/LLVMContext.h" 420b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/Metadata.h" 430b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/Type.h" 44d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Pass.h" 45de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Support/CommandLine.h" 46de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Support/Debug.h" 47de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Support/ValueHandle.h" 48d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Support/raw_ostream.h" 4964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel#include "llvm/Transforms/Utils/Local.h" 50de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include <algorithm> 51de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include <map> 52de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelusing namespace llvm; 53de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 5465309660fa61a837cc05323f69c618a7d8134d56Hal Finkelstatic cl::opt<bool> 5565309660fa61a837cc05323f69c618a7d8134d56Hal FinkelIgnoreTargetInfo("bb-vectorize-ignore-target-info", cl::init(false), 5665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel cl::Hidden, cl::desc("Ignore target information")); 5765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel 58de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned> 59de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelReqChainDepth("bb-vectorize-req-chain-depth", cl::init(6), cl::Hidden, 60de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("The required chain depth for vectorization")); 61de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 6278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkelstatic cl::opt<bool> 6378fd353d5e5daedc47ecc31b6193ca48793c249cHal FinkelUseChainDepthWithTI("bb-vectorize-use-chain-depth", cl::init(false), 6478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel cl::Hidden, cl::desc("Use the chain depth requirement with" 6578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel " target information")); 6678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 67de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned> 68de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelSearchLimit("bb-vectorize-search-limit", cl::init(400), cl::Hidden, 69de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("The maximum search distance for instruction pairs")); 70de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 71de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 72de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelSplatBreaksChain("bb-vectorize-splat-breaks-chain", cl::init(false), cl::Hidden, 73de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Replicating one element to a pair breaks the chain")); 74de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 75de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned> 76de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelVectorBits("bb-vectorize-vector-bits", cl::init(128), cl::Hidden, 77de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("The size of the native vector registers")); 78de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 79de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned> 80de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelMaxIter("bb-vectorize-max-iter", cl::init(0), cl::Hidden, 81de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("The maximum number of pairing iterations")); 82de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 8364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkelstatic cl::opt<bool> 8464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal FinkelPow2LenOnly("bb-vectorize-pow2-len-only", cl::init(false), cl::Hidden, 8564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel cl::desc("Don't try to form non-2^n-length vectors")); 8664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 87de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned> 885d4e18bc39fea892f523d960213906d296d3cb38Hal FinkelMaxInsts("bb-vectorize-max-instr-per-group", cl::init(500), cl::Hidden, 895d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel cl::desc("The maximum number of pairable instructions per group")); 905d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 915d4e18bc39fea892f523d960213906d296d3cb38Hal Finkelstatic cl::opt<unsigned> 92de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelMaxCandPairsForCycleCheck("bb-vectorize-max-cycle-check-pairs", cl::init(200), 93de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::Hidden, cl::desc("The maximum number of candidate pairs with which to use" 94de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " a full cycle check")); 95de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 96de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 97768edf3cd037aab10391abc279f71470df8e3156Hal FinkelNoBools("bb-vectorize-no-bools", cl::init(false), cl::Hidden, 98768edf3cd037aab10391abc279f71470df8e3156Hal Finkel cl::desc("Don't try to vectorize boolean (i1) values")); 99768edf3cd037aab10391abc279f71470df8e3156Hal Finkel 100768edf3cd037aab10391abc279f71470df8e3156Hal Finkelstatic cl::opt<bool> 101de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoInts("bb-vectorize-no-ints", cl::init(false), cl::Hidden, 102de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize integer values")); 103de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 104de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 105de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoFloats("bb-vectorize-no-floats", cl::init(false), cl::Hidden, 106de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize floating-point values")); 107de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 108822ab00847da841a63be4e3883cb5f442dc69069Hal Finkel// FIXME: This should default to false once pointer vector support works. 109de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 110822ab00847da841a63be4e3883cb5f442dc69069Hal FinkelNoPointers("bb-vectorize-no-pointers", cl::init(/*false*/ true), cl::Hidden, 111f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel cl::desc("Don't try to vectorize pointer values")); 112f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel 113f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkelstatic cl::opt<bool> 114de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoCasts("bb-vectorize-no-casts", cl::init(false), cl::Hidden, 115de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize casting (conversion) operations")); 116de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 117de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 118de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoMath("bb-vectorize-no-math", cl::init(false), cl::Hidden, 119de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize floating-point math intrinsics")); 120de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 121de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 122de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoFMA("bb-vectorize-no-fma", cl::init(false), cl::Hidden, 123de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize the fused-multiply-add intrinsic")); 124de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 125de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 126fc3665c87519850f629c9565535e3be447e10addHal FinkelNoSelect("bb-vectorize-no-select", cl::init(false), cl::Hidden, 127fc3665c87519850f629c9565535e3be447e10addHal Finkel cl::desc("Don't try to vectorize select instructions")); 128fc3665c87519850f629c9565535e3be447e10addHal Finkel 129fc3665c87519850f629c9565535e3be447e10addHal Finkelstatic cl::opt<bool> 130e415f96b6a43ac8861148a11a4258bc38c247e8fHal FinkelNoCmp("bb-vectorize-no-cmp", cl::init(false), cl::Hidden, 131e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel cl::desc("Don't try to vectorize comparison instructions")); 132e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel 133e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkelstatic cl::opt<bool> 134f3f5a1e6f77a842ccb24cc81766437da5197d712Hal FinkelNoGEP("bb-vectorize-no-gep", cl::init(false), cl::Hidden, 135f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel cl::desc("Don't try to vectorize getelementptr instructions")); 136f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel 137f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkelstatic cl::opt<bool> 138de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoMemOps("bb-vectorize-no-mem-ops", cl::init(false), cl::Hidden, 139de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize loads and stores")); 140de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 141de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 142de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelAlignedOnly("bb-vectorize-aligned-only", cl::init(false), cl::Hidden, 143de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Only generate aligned loads and stores")); 144de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 145de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 146edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal FinkelNoMemOpBoost("bb-vectorize-no-mem-op-boost", 147edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel cl::init(false), cl::Hidden, 148edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel cl::desc("Don't boost the chain-depth contribution of loads and stores")); 149edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel 150edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkelstatic cl::opt<bool> 151de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelFastDep("bb-vectorize-fast-dep", cl::init(false), cl::Hidden, 152de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Use a fast instruction dependency analysis")); 153de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 154de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#ifndef NDEBUG 155de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 156de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelDebugInstructionExamination("bb-vectorize-debug-instruction-examination", 157de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::init(false), cl::Hidden, 158de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("When debugging is enabled, output information on the" 159de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " instruction-examination process")); 160de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 161de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelDebugCandidateSelection("bb-vectorize-debug-candidate-selection", 162de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::init(false), cl::Hidden, 163de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("When debugging is enabled, output information on the" 164de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " candidate-selection process")); 165de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 166de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelDebugPairSelection("bb-vectorize-debug-pair-selection", 167de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::init(false), cl::Hidden, 168de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("When debugging is enabled, output information on the" 169de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " pair-selection process")); 170de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 171de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelDebugCycleCheck("bb-vectorize-debug-cycle-check", 172de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::init(false), cl::Hidden, 173de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("When debugging is enabled, output information on the" 174de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " cycle-checking process")); 17572465ea23d010507d3746adc126d719005981e05Hal Finkel 17672465ea23d010507d3746adc126d719005981e05Hal Finkelstatic cl::opt<bool> 17772465ea23d010507d3746adc126d719005981e05Hal FinkelPrintAfterEveryPair("bb-vectorize-debug-print-after-every-pair", 17872465ea23d010507d3746adc126d719005981e05Hal Finkel cl::init(false), cl::Hidden, 17972465ea23d010507d3746adc126d719005981e05Hal Finkel cl::desc("When debugging is enabled, dump the basic block after" 18072465ea23d010507d3746adc126d719005981e05Hal Finkel " every pair is fused")); 181de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#endif 182de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 183de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelSTATISTIC(NumFusedOps, "Number of operations fused by bb-vectorize"); 184de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 185de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelnamespace { 186de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel struct BBVectorize : public BasicBlockPass { 187de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel static char ID; // Pass identification, replacement for typeid 188bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng 189940371bc65570ec0add1ede4f4d9f0a41ba25e09Hongbin Zheng const VectorizeConfig Config; 190bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng 191bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng BBVectorize(const VectorizeConfig &C = VectorizeConfig()) 192bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng : BasicBlockPass(ID), Config(C) { 193de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel initializeBBVectorizePass(*PassRegistry::getPassRegistry()); 194de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 195de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 196bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng BBVectorize(Pass *P, const VectorizeConfig &C) 197bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng : BasicBlockPass(ID), Config(C) { 19887825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng AA = &P->getAnalysis<AliasAnalysis>(); 199e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel DT = &P->getAnalysis<DominatorTree>(); 20087825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng SE = &P->getAnalysis<ScalarEvolution>(); 2013574eca1b02600bac4e625297f4ecf745f4c4f32Micah Villmow TD = P->getAnalysisIfAvailable<DataLayout>(); 2028bd6c52396ab6e7955fdcc1bce099b7cba29a308Chandler Carruth TTI = IgnoreTargetInfo ? 0 : &P->getAnalysis<TargetTransformInfo>(); 20387825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng } 20487825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng 205de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typedef std::pair<Value *, Value *> ValuePair; 20665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel typedef std::pair<ValuePair, int> ValuePairWithCost; 207de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typedef std::pair<ValuePair, size_t> ValuePairWithDepth; 208de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typedef std::pair<ValuePair, ValuePair> VPPair; // A ValuePair pair 20972465ea23d010507d3746adc126d719005981e05Hal Finkel typedef std::pair<VPPair, unsigned> VPPairWithType; 210de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typedef std::pair<std::multimap<Value *, Value *>::iterator, 211de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *>::iterator> VPIteratorPair; 212de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typedef std::pair<std::multimap<ValuePair, ValuePair>::iterator, 213de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair>::iterator> 214de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPPIteratorPair; 215de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 216de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasAnalysis *AA; 217e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel DominatorTree *DT; 218de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ScalarEvolution *SE; 2193574eca1b02600bac4e625297f4ecf745f4c4f32Micah Villmow DataLayout *TD; 220abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth const TargetTransformInfo *TTI; 221de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 222de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // FIXME: const correct? 223de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 22464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool vectorizePairs(BasicBlock &BB, bool NonPow2Len = false); 225de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2265d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel bool getCandidatePairs(BasicBlock &BB, 2275d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel BasicBlock::iterator &Start, 228de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 229a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel DenseSet<ValuePair> &FixedOrderPairs, 23065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel DenseMap<ValuePair, int> &CandidatePairCostSavings, 23164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Value *> &PairableInsts, bool NonPow2Len); 232de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 23378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // FIXME: The current implementation does not account for pairs that 23478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // are connected in multiple ways. For example: 23578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // C1 = A1 / A2; C2 = A2 / A1 (which may be both direct and a swap) 23672465ea23d010507d3746adc126d719005981e05Hal Finkel enum PairConnectionType { 23772465ea23d010507d3746adc126d719005981e05Hal Finkel PairConnectionDirect, 23872465ea23d010507d3746adc126d719005981e05Hal Finkel PairConnectionSwap, 23972465ea23d010507d3746adc126d719005981e05Hal Finkel PairConnectionSplat 24072465ea23d010507d3746adc126d719005981e05Hal Finkel }; 24172465ea23d010507d3746adc126d719005981e05Hal Finkel 242de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void computeConnectedPairs(std::multimap<Value *, Value *> &CandidatePairs, 243b1a82589339fed148c12b052d30861a539552f1aHal Finkel DenseSet<ValuePair> &CandidatePairsSet, 244de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 24572465ea23d010507d3746adc126d719005981e05Hal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 24672465ea23d010507d3746adc126d719005981e05Hal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes); 247de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 248de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void buildDepMap(BasicBlock &BB, 249de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 250de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 251de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers); 252de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 253de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void choosePairs(std::multimap<Value *, Value *> &CandidatePairs, 254b1a82589339fed148c12b052d30861a539552f1aHal Finkel DenseSet<ValuePair> &CandidatePairsSet, 25565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel DenseMap<ValuePair, int> &CandidatePairCostSavings, 256de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 25786ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel DenseSet<ValuePair> &FixedOrderPairs, 25886ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes, 259de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 26086ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairDeps, 261de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 262de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *>& ChosenPairs); 263de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 264de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void fuseChosenPairs(BasicBlock &BB, 265de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 266a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel DenseMap<Value *, Value *>& ChosenPairs, 26772465ea23d010507d3746adc126d719005981e05Hal Finkel DenseSet<ValuePair> &FixedOrderPairs, 26872465ea23d010507d3746adc126d719005981e05Hal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes, 26972465ea23d010507d3746adc126d719005981e05Hal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 27072465ea23d010507d3746adc126d719005981e05Hal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairDeps); 27172465ea23d010507d3746adc126d719005981e05Hal Finkel 272de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 273de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore); 274de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 275de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool areInstsCompatible(Instruction *I, Instruction *J, 27665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel bool IsSimpleLoadStore, bool NonPow2Len, 277a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel int &CostSavings, int &FixedOrder); 278de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 279de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool trackUsesOfI(DenseSet<Value *> &Users, 280de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker &WriteSet, Instruction *I, 281de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J, bool UpdateUsers = true, 2822f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel DenseSet<ValuePair> *LoadMoveSetPairs = 0); 2831230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop 284de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void computePairsConnectedTo( 285de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 28600f63b1b84d059a1ffa572e76708e03750a9e523Hal Finkel DenseSet<ValuePair> &CandidatePairsSet, 287de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 288de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 28972465ea23d010507d3746adc126d719005981e05Hal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes, 290de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ValuePair P); 291de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 292de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool pairsConflict(ValuePair P, ValuePair Q, 293de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 294da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel std::multimap<ValuePair, ValuePair> *PairableInstUserMap = 0, 295da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel DenseSet<VPPair> *PairableInstUserPairSet = 0); 296de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 297de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool pairWillFormCycle(ValuePair P, 298de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &PairableInstUsers, 299de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &CurrentPairs); 300de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 301de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void pruneTreeFor( 302de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 303de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 304de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 305de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 306de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &PairableInstUserMap, 307da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel DenseSet<VPPair> &PairableInstUserPairSet, 308de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 309de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<ValuePair, size_t> &Tree, 310de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PrunedTree, ValuePair J, 311de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool UseCycleCheck); 312de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 313de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void buildInitialTreeFor( 314de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 315b1a82589339fed148c12b052d30861a539552f1aHal Finkel DenseSet<ValuePair> &CandidatePairsSet, 316de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 317de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 318de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 319de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 320de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<ValuePair, size_t> &Tree, ValuePair J); 321de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 322de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void findBestTreeFor( 323de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 324b1a82589339fed148c12b052d30861a539552f1aHal Finkel DenseSet<ValuePair> &CandidatePairsSet, 32565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel DenseMap<ValuePair, int> &CandidatePairCostSavings, 326de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 32786ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel DenseSet<ValuePair> &FixedOrderPairs, 32886ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes, 329de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 33086ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairDeps, 331de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 332de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &PairableInstUserMap, 333da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel DenseSet<VPPair> &PairableInstUserPairSet, 334de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 335de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth, 33665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel int &BestEffSize, VPIteratorPair ChoiceRange, 337de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool UseCycleCheck); 338de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 339de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I, 340202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel Instruction *J, unsigned o); 341de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 342de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void fillNewShuffleMask(LLVMContext& Context, Instruction *J, 34364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned MaskOffset, unsigned NumInElem, 34464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned NumInElem1, unsigned IdxOffset, 34564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant*> &Mask); 346de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 347de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *getReplacementShuffleMask(LLVMContext& Context, Instruction *I, 348de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J); 349de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 35064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool expandIEChain(LLVMContext& Context, Instruction *I, Instruction *J, 35164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned o, Value *&LOp, unsigned numElemL, 35272465ea23d010507d3746adc126d719005981e05Hal Finkel Type *ArgTypeL, Type *ArgTypeR, bool IBeforeJ, 35364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned IdxOff = 0); 35464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 355de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *getReplacementInput(LLVMContext& Context, Instruction *I, 35672465ea23d010507d3746adc126d719005981e05Hal Finkel Instruction *J, unsigned o, bool IBeforeJ); 357de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 358de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void getReplacementInputsForPair(LLVMContext& Context, Instruction *I, 35972465ea23d010507d3746adc126d719005981e05Hal Finkel Instruction *J, SmallVector<Value *, 3> &ReplacedOperands, 36072465ea23d010507d3746adc126d719005981e05Hal Finkel bool IBeforeJ); 361de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 362de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void replaceOutputsOfPair(LLVMContext& Context, Instruction *I, 363de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J, Instruction *K, 364de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *&InsertionPt, Instruction *&K1, 365202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel Instruction *&K2); 366de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 367de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void collectPairLoadMoveSet(BasicBlock &BB, 368de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 369de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &LoadMoveSet, 3702f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel DenseSet<ValuePair> &LoadMoveSetPairs, 371de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I); 372de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 373de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void collectLoadMoveSet(BasicBlock &BB, 374de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 375de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 3762f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel std::multimap<Value *, Value *> &LoadMoveSet, 3772f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel DenseSet<ValuePair> &LoadMoveSetPairs); 378de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 379de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool canMoveUsesOfIAfterJ(BasicBlock &BB, 3802f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel DenseSet<ValuePair> &LoadMoveSetPairs, 381de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J); 382de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 383de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void moveUsesOfIAfterJ(BasicBlock &BB, 3842f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel DenseSet<ValuePair> &LoadMoveSetPairs, 385de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *&InsertionPt, 386de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J); 387de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 388ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel void combineMetadata(Instruction *K, const Instruction *J); 389ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel 39087825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng bool vectorizeBB(BasicBlock &BB) { 391e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel if (!DT->isReachableFromEntry(&BB)) { 392e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel DEBUG(dbgs() << "BBV: skipping unreachable " << BB.getName() << 393e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel " in " << BB.getParent()->getName() << "\n"); 394e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel return false; 395e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel } 396e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel 397abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth DEBUG(if (TTI) dbgs() << "BBV: using target information\n"); 39865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel 399de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool changed = false; 400de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Iterate a sufficient number of times to merge types of size 1 bit, 401de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // then 2 bits, then 4, etc. up to half of the target vector width of the 402de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // target vector register. 40364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned n = 1; 40464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned v = 2; 405abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth (TTI || v <= Config.VectorBits) && 40665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel (!Config.MaxIter || n <= Config.MaxIter); 407de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel v *= 2, ++n) { 408bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng DEBUG(dbgs() << "BBV: fusing loop #" << n << 409de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " for " << BB.getName() << " in " << 410de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BB.getParent()->getName() << "...\n"); 411de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (vectorizePairs(BB)) 412de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel changed = true; 413de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel else 414de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 415de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 416de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 41764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (changed && !Pow2LenOnly) { 41864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ++n; 41964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; !Config.MaxIter || n <= Config.MaxIter; ++n) { 42064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel DEBUG(dbgs() << "BBV: fusing for non-2^n-length vectors loop #: " << 42164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel n << " for " << BB.getName() << " in " << 42264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel BB.getParent()->getName() << "...\n"); 42364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!vectorizePairs(BB, true)) break; 42464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 42564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 42664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 427de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: done!\n"); 428de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return changed; 429de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 430de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 43187825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng virtual bool runOnBasicBlock(BasicBlock &BB) { 43287825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng AA = &getAnalysis<AliasAnalysis>(); 433e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel DT = &getAnalysis<DominatorTree>(); 43487825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng SE = &getAnalysis<ScalarEvolution>(); 4353574eca1b02600bac4e625297f4ecf745f4c4f32Micah Villmow TD = getAnalysisIfAvailable<DataLayout>(); 4368bd6c52396ab6e7955fdcc1bce099b7cba29a308Chandler Carruth TTI = IgnoreTargetInfo ? 0 : &getAnalysis<TargetTransformInfo>(); 43787825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng 43887825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng return vectorizeBB(BB); 43987825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng } 44087825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng 441de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel virtual void getAnalysisUsage(AnalysisUsage &AU) const { 442de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BasicBlockPass::getAnalysisUsage(AU); 443de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AU.addRequired<AliasAnalysis>(); 444e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel AU.addRequired<DominatorTree>(); 445de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AU.addRequired<ScalarEvolution>(); 4468bd6c52396ab6e7955fdcc1bce099b7cba29a308Chandler Carruth AU.addRequired<TargetTransformInfo>(); 447de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AU.addPreserved<AliasAnalysis>(); 448e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel AU.addPreserved<DominatorTree>(); 449de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AU.addPreserved<ScalarEvolution>(); 4507e004d177fe76145f75a9417ed2e281f1b9abaf7Hal Finkel AU.setPreservesCFG(); 451de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 452de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 45364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel static inline VectorType *getVecTypeForPair(Type *ElemTy, Type *Elem2Ty) { 45464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel assert(ElemTy->getScalarType() == Elem2Ty->getScalarType() && 45564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel "Cannot form vector from incompatible scalar types"); 45664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *STy = ElemTy->getScalarType(); 45764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 45864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElem; 459de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (VectorType *VTy = dyn_cast<VectorType>(ElemTy)) { 46064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElem = VTy->getNumElements(); 46164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 46264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElem = 1; 46364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 46464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 46564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (VectorType *VTy = dyn_cast<VectorType>(Elem2Ty)) { 46664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElem += VTy->getNumElements(); 46764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 46864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElem += 1; 469de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 4707e004d177fe76145f75a9417ed2e281f1b9abaf7Hal Finkel 47164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return VectorType::get(STy, numElem); 47264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 47364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 47464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel static inline void getInstructionTypes(Instruction *I, 47564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *&T1, Type *&T2) { 4763fc1e4aa159ec15058bb26acbec39f6e09990207Hal Finkel if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 47764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // For stores, it is the value type, not the pointer type that matters 47864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // because the value is what will come from a vector register. 47964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 4803fc1e4aa159ec15058bb26acbec39f6e09990207Hal Finkel Value *IVal = SI->getValueOperand(); 48164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel T1 = IVal->getType(); 48264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 48364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel T1 = I->getType(); 48464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 48564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 4863fc1e4aa159ec15058bb26acbec39f6e09990207Hal Finkel if (CastInst *CI = dyn_cast<CastInst>(I)) 4873fc1e4aa159ec15058bb26acbec39f6e09990207Hal Finkel T2 = CI->getSrcTy(); 48864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else 48964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel T2 = T1; 49065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel 49165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (SelectInst *SI = dyn_cast<SelectInst>(I)) { 49265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel T2 = SI->getCondition()->getType(); 4938b9796f4f83bea2bbefcd2822eb574abdb7f3d1bHal Finkel } else if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(I)) { 4948b9796f4f83bea2bbefcd2822eb574abdb7f3d1bHal Finkel T2 = SI->getOperand(0)->getType(); 4955094257518ea7b615d87ef5bea657625ffa81991Hal Finkel } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) { 4965094257518ea7b615d87ef5bea657625ffa81991Hal Finkel T2 = CI->getOperand(0)->getType(); 49765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel } 498de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 499de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 500de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns the weight associated with the provided value. A chain of 501de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // candidate pairs has a length given by the sum of the weights of its 502de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // members (one weight per pair; the weight of each member of the pair 503de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // is assumed to be the same). This length is then compared to the 504de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // chain-length threshold to determine if a given chain is significant 505de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // enough to be vectorized. The length is also used in comparing 506de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // candidate chains where longer chains are considered to be better. 507de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Note: when this function returns 0, the resulting instructions are 508de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // not actually fused. 509bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng inline size_t getDepthFactor(Value *V) { 510de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // InsertElement and ExtractElement have a depth factor of zero. This is 511de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // for two reasons: First, they cannot be usefully fused. Second, because 512de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the pass generates a lot of these, they can confuse the simple metric 513de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // used to compare the trees in the next iteration. Thus, giving them a 514de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // weight of zero allows the pass to essentially ignore them in 515de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // subsequent iterations when looking for vectorization opportunities 516de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // while still tracking dependency chains that flow through those 517de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // instructions. 518de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (isa<InsertElementInst>(V) || isa<ExtractElementInst>(V)) 519de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return 0; 520de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 521edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel // Give a load or store half of the required depth so that load/store 522edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel // pairs will vectorize. 523bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (!Config.NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V))) 524bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng return Config.ReqChainDepth/2; 525edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel 526de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return 1; 527de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 528de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 529abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth // Returns the cost of the provided instruction using TTI. 53046fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel // This does not handle loads and stores. 53146fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel unsigned getInstrCost(unsigned Opcode, Type *T1, Type *T2) { 53246fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel switch (Opcode) { 53346fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel default: break; 53446fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::GetElementPtr: 53546fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel // We mark this instruction as zero-cost because scalar GEPs are usually 53646fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel // lowered to the intruction addressing mode. At the moment we don't 53746fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel // generate vector GEPs. 53846fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel return 0; 53946fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::Br: 540abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth return TTI->getCFInstrCost(Opcode); 54146fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::PHI: 54246fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel return 0; 54346fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::Add: 54446fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FAdd: 54546fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::Sub: 54646fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FSub: 54746fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::Mul: 54846fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FMul: 54946fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::UDiv: 55046fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::SDiv: 55146fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FDiv: 55246fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::URem: 55346fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::SRem: 55446fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FRem: 55546fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::Shl: 55646fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::LShr: 55746fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::AShr: 55846fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::And: 55946fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::Or: 56046fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::Xor: 561abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth return TTI->getArithmeticInstrCost(Opcode, T1); 56246fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::Select: 56346fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::ICmp: 56446fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FCmp: 565abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth return TTI->getCmpSelInstrCost(Opcode, T1, T2); 56646fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::ZExt: 56746fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::SExt: 56846fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FPToUI: 56946fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FPToSI: 57046fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FPExt: 57146fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::PtrToInt: 57246fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::IntToPtr: 57346fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::SIToFP: 57446fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::UIToFP: 57546fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::Trunc: 57646fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FPTrunc: 57746fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::BitCast: 57886ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel case Instruction::ShuffleVector: 579abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth return TTI->getCastInstrCost(Opcode, T1, T2); 58046fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel } 58146fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel 58246fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel return 1; 58346fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel } 58446fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel 585de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This determines the relative offset of two loads or stores, returning 586de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // true if the offset could be determined to be some constant value. 587de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // For example, if OffsetInElmts == 1, then J accesses the memory directly 588de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // after I; if OffsetInElmts == -1 then I accesses the memory 58964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // directly after J. 590de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool getPairPtrInfo(Instruction *I, Instruction *J, 591de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *&IPtr, Value *&JPtr, unsigned &IAlignment, unsigned &JAlignment, 59265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel unsigned &IAddressSpace, unsigned &JAddressSpace, 59393f6f457614299eee3d22f376ab8f42a130f1912Hal Finkel int64_t &OffsetInElmts, bool ComputeOffset = true) { 594de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel OffsetInElmts = 0; 59565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 59665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel LoadInst *LJ = cast<LoadInst>(J); 59765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel IPtr = LI->getPointerOperand(); 59865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel JPtr = LJ->getPointerOperand(); 59965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel IAlignment = LI->getAlignment(); 60065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel JAlignment = LJ->getAlignment(); 60165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel IAddressSpace = LI->getPointerAddressSpace(); 60265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel JAddressSpace = LJ->getPointerAddressSpace(); 603de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 60465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel StoreInst *SI = cast<StoreInst>(I), *SJ = cast<StoreInst>(J); 60565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel IPtr = SI->getPointerOperand(); 60665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel JPtr = SJ->getPointerOperand(); 60765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel IAlignment = SI->getAlignment(); 60865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel JAlignment = SJ->getAlignment(); 60965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel IAddressSpace = SI->getPointerAddressSpace(); 61065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel JAddressSpace = SJ->getPointerAddressSpace(); 611de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 612de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 61393f6f457614299eee3d22f376ab8f42a130f1912Hal Finkel if (!ComputeOffset) 61493f6f457614299eee3d22f376ab8f42a130f1912Hal Finkel return true; 61593f6f457614299eee3d22f376ab8f42a130f1912Hal Finkel 616de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel const SCEV *IPtrSCEV = SE->getSCEV(IPtr); 617de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel const SCEV *JPtrSCEV = SE->getSCEV(JPtr); 618de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 619de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // If this is a trivial offset, then we'll get something like 620de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // 1*sizeof(type). With target data, which we need anyway, this will get 621de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // constant folded into a number. 622de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel const SCEV *OffsetSCEV = SE->getMinusSCEV(JPtrSCEV, IPtrSCEV); 623de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (const SCEVConstant *ConstOffSCEV = 624de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel dyn_cast<SCEVConstant>(OffsetSCEV)) { 625de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ConstantInt *IntOff = ConstOffSCEV->getValue(); 626de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel int64_t Offset = IntOff->getSExtValue(); 627de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 628de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *VTy = cast<PointerType>(IPtr->getType())->getElementType(); 629de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel int64_t VTyTSS = (int64_t) TD->getTypeStoreSize(VTy); 630de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 63164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *VTy2 = cast<PointerType>(JPtr->getType())->getElementType(); 63264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (VTy != VTy2 && Offset < 0) { 63364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel int64_t VTy2TSS = (int64_t) TD->getTypeStoreSize(VTy2); 63464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel OffsetInElmts = Offset/VTy2TSS; 63564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return (abs64(Offset) % VTy2TSS) == 0; 63664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 637de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 638de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel OffsetInElmts = Offset/VTyTSS; 639de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return (abs64(Offset) % VTyTSS) == 0; 640de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 641de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 642de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 643de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 644de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 645de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns true if the provided CallInst represents an intrinsic that can 646de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // be vectorized. 647de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool isVectorizableIntrinsic(CallInst* I) { 648de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Function *F = I->getCalledFunction(); 649de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!F) return false; 650de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 651a77728415857196035c0090f7b2749d7971811a2Hal Finkel Intrinsic::ID IID = (Intrinsic::ID) F->getIntrinsicID(); 652de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!IID) return false; 653de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 654de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel switch(IID) { 655de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel default: 656de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 657de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::sqrt: 658de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::powi: 659de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::sin: 660de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::cos: 661de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::log: 662de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::log2: 663de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::log10: 664de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::exp: 665de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::exp2: 666de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::pow: 66786312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng return Config.VectorizeMath; 668de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::fma: 66964a7a24edf719bb6ffacc030c23f4cd99312f3fbHal Finkel case Intrinsic::fmuladd: 67086312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng return Config.VectorizeFMA; 671de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 672de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 673de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 674b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel bool isPureIEChain(InsertElementInst *IE) { 675b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel InsertElementInst *IENext = IE; 676b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel do { 677b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel if (!isa<UndefValue>(IENext->getOperand(0)) && 678b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel !isa<InsertElementInst>(IENext->getOperand(0))) { 679b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel return false; 680b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel } 681b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel } while ((IENext = 682b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel dyn_cast<InsertElementInst>(IENext->getOperand(0)))); 683b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel 684b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel return true; 685b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel } 686de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel }; 687de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 688de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function implements one vectorization iteration on the provided 689de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // basic block. It returns true if the block is changed. 69064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool BBVectorize::vectorizePairs(BasicBlock &BB, bool NonPow2Len) { 6915d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel bool ShouldContinue; 6925d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel BasicBlock::iterator Start = BB.getFirstInsertionPt(); 6935d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 6945d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel std::vector<Value *> AllPairableInsts; 6955d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel DenseMap<Value *, Value *> AllChosenPairs; 696a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel DenseSet<ValuePair> AllFixedOrderPairs; 69772465ea23d010507d3746adc126d719005981e05Hal Finkel DenseMap<VPPair, unsigned> AllPairConnectionTypes; 69872465ea23d010507d3746adc126d719005981e05Hal Finkel std::multimap<ValuePair, ValuePair> AllConnectedPairs, AllConnectedPairDeps; 6995d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 7005d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel do { 7015d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel std::vector<Value *> PairableInsts; 7025d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel std::multimap<Value *, Value *> CandidatePairs; 703a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel DenseSet<ValuePair> FixedOrderPairs; 70465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel DenseMap<ValuePair, int> CandidatePairCostSavings; 7055d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs, 706a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel FixedOrderPairs, 70765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel CandidatePairCostSavings, 70864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel PairableInsts, NonPow2Len); 7095d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (PairableInsts.empty()) continue; 7103706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 711b1a82589339fed148c12b052d30861a539552f1aHal Finkel // Build the candidate pair set for faster lookups. 712b1a82589339fed148c12b052d30861a539552f1aHal Finkel DenseSet<ValuePair> CandidatePairsSet; 713b1a82589339fed148c12b052d30861a539552f1aHal Finkel for (std::multimap<Value *, Value *>::iterator I = CandidatePairs.begin(), 714b1a82589339fed148c12b052d30861a539552f1aHal Finkel E = CandidatePairs.end(); I != E; ++I) 715b1a82589339fed148c12b052d30861a539552f1aHal Finkel CandidatePairsSet.insert(*I); 716b1a82589339fed148c12b052d30861a539552f1aHal Finkel 7175d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // Now we have a map of all of the pairable instructions and we need to 7185d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // select the best possible pairing. A good pairing is one such that the 7195d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // users of the pair are also paired. This defines a (directed) forest 72094c22716d60ff5edf6a98a3c67e0faa001be1142Sylvestre Ledru // over the pairs such that two pairs are connected iff the second pair 7215d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // uses the first. 7223706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 7235d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // Note that it only matters that both members of the second pair use some 7245d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // element of the first pair (to allow for splatting). 7253706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 72672465ea23d010507d3746adc126d719005981e05Hal Finkel std::multimap<ValuePair, ValuePair> ConnectedPairs, ConnectedPairDeps; 72772465ea23d010507d3746adc126d719005981e05Hal Finkel DenseMap<VPPair, unsigned> PairConnectionTypes; 728b1a82589339fed148c12b052d30861a539552f1aHal Finkel computeConnectedPairs(CandidatePairs, CandidatePairsSet, 729b1a82589339fed148c12b052d30861a539552f1aHal Finkel PairableInsts, ConnectedPairs, PairConnectionTypes); 7305d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (ConnectedPairs.empty()) continue; 7313706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 73272465ea23d010507d3746adc126d719005981e05Hal Finkel for (std::multimap<ValuePair, ValuePair>::iterator 73372465ea23d010507d3746adc126d719005981e05Hal Finkel I = ConnectedPairs.begin(), IE = ConnectedPairs.end(); 73472465ea23d010507d3746adc126d719005981e05Hal Finkel I != IE; ++I) { 73572465ea23d010507d3746adc126d719005981e05Hal Finkel ConnectedPairDeps.insert(VPPair(I->second, I->first)); 73672465ea23d010507d3746adc126d719005981e05Hal Finkel } 73772465ea23d010507d3746adc126d719005981e05Hal Finkel 7385d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // Build the pairable-instruction dependency map 7395d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel DenseSet<ValuePair> PairableInstUsers; 7405d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel buildDepMap(BB, CandidatePairs, PairableInsts, PairableInstUsers); 7413706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 74235564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel // There is now a graph of the connected pairs. For each variable, pick 74335564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel // the pairing with the largest tree meeting the depth requirement on at 74435564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel // least one branch. Then select all pairings that are part of that tree 74535564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel // and remove them from the list of available pairings and pairable 74635564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel // variables. 7473706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 7485d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel DenseMap<Value *, Value *> ChosenPairs; 749b1a82589339fed148c12b052d30861a539552f1aHal Finkel choosePairs(CandidatePairs, CandidatePairsSet, 750b1a82589339fed148c12b052d30861a539552f1aHal Finkel CandidatePairCostSavings, 75186ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel PairableInsts, FixedOrderPairs, PairConnectionTypes, 75286ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel ConnectedPairs, ConnectedPairDeps, 7535d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel PairableInstUsers, ChosenPairs); 7543706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 7555d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (ChosenPairs.empty()) continue; 7565d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel AllPairableInsts.insert(AllPairableInsts.end(), PairableInsts.begin(), 7575d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel PairableInsts.end()); 7585d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel AllChosenPairs.insert(ChosenPairs.begin(), ChosenPairs.end()); 759a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel 76072465ea23d010507d3746adc126d719005981e05Hal Finkel // Only for the chosen pairs, propagate information on fixed-order pairs, 76172465ea23d010507d3746adc126d719005981e05Hal Finkel // pair connections, and their types to the data structures used by the 76272465ea23d010507d3746adc126d719005981e05Hal Finkel // pair fusion procedures. 763a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel for (DenseMap<Value *, Value *>::iterator I = ChosenPairs.begin(), 764a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel IE = ChosenPairs.end(); I != IE; ++I) { 765a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel if (FixedOrderPairs.count(*I)) 766a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel AllFixedOrderPairs.insert(*I); 767a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel else if (FixedOrderPairs.count(ValuePair(I->second, I->first))) 768a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel AllFixedOrderPairs.insert(ValuePair(I->second, I->first)); 76972465ea23d010507d3746adc126d719005981e05Hal Finkel 77072465ea23d010507d3746adc126d719005981e05Hal Finkel for (DenseMap<Value *, Value *>::iterator J = ChosenPairs.begin(); 77172465ea23d010507d3746adc126d719005981e05Hal Finkel J != IE; ++J) { 77272465ea23d010507d3746adc126d719005981e05Hal Finkel DenseMap<VPPair, unsigned>::iterator K = 77372465ea23d010507d3746adc126d719005981e05Hal Finkel PairConnectionTypes.find(VPPair(*I, *J)); 77472465ea23d010507d3746adc126d719005981e05Hal Finkel if (K != PairConnectionTypes.end()) { 77572465ea23d010507d3746adc126d719005981e05Hal Finkel AllPairConnectionTypes.insert(*K); 77672465ea23d010507d3746adc126d719005981e05Hal Finkel } else { 77772465ea23d010507d3746adc126d719005981e05Hal Finkel K = PairConnectionTypes.find(VPPair(*J, *I)); 77872465ea23d010507d3746adc126d719005981e05Hal Finkel if (K != PairConnectionTypes.end()) 77972465ea23d010507d3746adc126d719005981e05Hal Finkel AllPairConnectionTypes.insert(*K); 78072465ea23d010507d3746adc126d719005981e05Hal Finkel } 78172465ea23d010507d3746adc126d719005981e05Hal Finkel } 78272465ea23d010507d3746adc126d719005981e05Hal Finkel } 78372465ea23d010507d3746adc126d719005981e05Hal Finkel 78472465ea23d010507d3746adc126d719005981e05Hal Finkel for (std::multimap<ValuePair, ValuePair>::iterator 78572465ea23d010507d3746adc126d719005981e05Hal Finkel I = ConnectedPairs.begin(), IE = ConnectedPairs.end(); 78672465ea23d010507d3746adc126d719005981e05Hal Finkel I != IE; ++I) { 78772465ea23d010507d3746adc126d719005981e05Hal Finkel if (AllPairConnectionTypes.count(*I)) { 78872465ea23d010507d3746adc126d719005981e05Hal Finkel AllConnectedPairs.insert(*I); 78972465ea23d010507d3746adc126d719005981e05Hal Finkel AllConnectedPairDeps.insert(VPPair(I->second, I->first)); 79072465ea23d010507d3746adc126d719005981e05Hal Finkel } 791a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel } 7925d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel } while (ShouldContinue); 7935d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 7945d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (AllChosenPairs.empty()) return false; 7955d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel NumFusedOps += AllChosenPairs.size(); 7963706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 797de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // A set of pairs has now been selected. It is now necessary to replace the 798de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // paired instructions with vector instructions. For this procedure each 79943ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop // operand must be replaced with a vector operand. This vector is formed 800de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // by using build_vector on the old operands. The replaced values are then 801de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // replaced with a vector_extract on the result. Subsequent optimization 802de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // passes should coalesce the build/extract combinations. 8033706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 80472465ea23d010507d3746adc126d719005981e05Hal Finkel fuseChosenPairs(BB, AllPairableInsts, AllChosenPairs, AllFixedOrderPairs, 80572465ea23d010507d3746adc126d719005981e05Hal Finkel AllPairConnectionTypes, 80672465ea23d010507d3746adc126d719005981e05Hal Finkel AllConnectedPairs, AllConnectedPairDeps); 80764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 80864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // It is important to cleanup here so that future iterations of this 80964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // function have less work to do. 8108e0d1c03ca7fd86e6879b4e37d0d7f0e982feef6Benjamin Kramer (void) SimplifyInstructionsInBlock(&BB, TD, AA->getTargetLibraryInfo()); 811de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return true; 812de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 813de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 814de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function returns true if the provided instruction is capable of being 815de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // fused into a vector instruction. This determination is based only on the 816de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // type and other attributes of the instruction. 817de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::isInstVectorizable(Instruction *I, 818de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool &IsSimpleLoadStore) { 819de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IsSimpleLoadStore = false; 820de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 821de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (CallInst *C = dyn_cast<CallInst>(I)) { 822de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!isVectorizableIntrinsic(C)) 823de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 824de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else if (LoadInst *L = dyn_cast<LoadInst>(I)) { 825de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Vectorize simple loads if possbile: 826de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IsSimpleLoadStore = L->isSimple(); 82786312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng if (!IsSimpleLoadStore || !Config.VectorizeMemOps) 828de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 829de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else if (StoreInst *S = dyn_cast<StoreInst>(I)) { 830de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Vectorize simple stores if possbile: 831de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IsSimpleLoadStore = S->isSimple(); 83286312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng if (!IsSimpleLoadStore || !Config.VectorizeMemOps) 833de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 834de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else if (CastInst *C = dyn_cast<CastInst>(I)) { 835de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // We can vectorize casts, but not casts of pointer types, etc. 83686312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng if (!Config.VectorizeCasts) 837de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 838de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 839de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *SrcTy = C->getSrcTy(); 840f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel if (!SrcTy->isSingleValueType()) 841de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 842de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 843de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *DestTy = C->getDestTy(); 844f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel if (!DestTy->isSingleValueType()) 845de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 846fc3665c87519850f629c9565535e3be447e10addHal Finkel } else if (isa<SelectInst>(I)) { 847fc3665c87519850f629c9565535e3be447e10addHal Finkel if (!Config.VectorizeSelect) 848fc3665c87519850f629c9565535e3be447e10addHal Finkel return false; 849e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel } else if (isa<CmpInst>(I)) { 850e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel if (!Config.VectorizeCmp) 851e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel return false; 852f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(I)) { 853f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel if (!Config.VectorizeGEP) 854f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel return false; 855f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel 856f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel // Currently, vector GEPs exist only with one index. 857f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel if (G->getNumIndices() != 1) 858f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel return false; 859de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else if (!(I->isBinaryOp() || isa<ShuffleVectorInst>(I) || 860de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel isa<ExtractElementInst>(I) || isa<InsertElementInst>(I))) { 861de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 862de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 863de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 864de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // We can't vectorize memory operations without target data 865de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (TD == 0 && IsSimpleLoadStore) 866de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 867de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 868de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *T1, *T2; 86964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getInstructionTypes(I, T1, T2); 870de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 871de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Not every type can be vectorized... 872de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!(VectorType::isValidElementType(T1) || T1->isVectorTy()) || 873de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel !(VectorType::isValidElementType(T2) || T2->isVectorTy())) 874de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 875de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 87665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (T1->getScalarSizeInBits() == 1) { 877768edf3cd037aab10391abc279f71470df8e3156Hal Finkel if (!Config.VectorizeBools) 878768edf3cd037aab10391abc279f71470df8e3156Hal Finkel return false; 879768edf3cd037aab10391abc279f71470df8e3156Hal Finkel } else { 88065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (!Config.VectorizeInts && T1->isIntOrIntVectorTy()) 881768edf3cd037aab10391abc279f71470df8e3156Hal Finkel return false; 882768edf3cd037aab10391abc279f71470df8e3156Hal Finkel } 88365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel 88465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (T2->getScalarSizeInBits() == 1) { 88565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (!Config.VectorizeBools) 88665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel return false; 88765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel } else { 88865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (!Config.VectorizeInts && T2->isIntOrIntVectorTy()) 88965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel return false; 89065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel } 89165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel 89286312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng if (!Config.VectorizeFloats 89386312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy())) 894de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 895de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 896e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel // Don't vectorize target-specific types. 897e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel if (T1->isX86_FP80Ty() || T1->isPPC_FP128Ty() || T1->isX86_MMXTy()) 898e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel return false; 899e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel if (T2->isX86_FP80Ty() || T2->isPPC_FP128Ty() || T2->isX86_MMXTy()) 900e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel return false; 901e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel 90205bc5087a25bbcf59936d71ebfc878b545ef3e5cHal Finkel if ((!Config.VectorizePointers || TD == 0) && 90305bc5087a25bbcf59936d71ebfc878b545ef3e5cHal Finkel (T1->getScalarType()->isPointerTy() || 90405bc5087a25bbcf59936d71ebfc878b545ef3e5cHal Finkel T2->getScalarType()->isPointerTy())) 905f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel return false; 906f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel 907abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth if (!TTI && (T1->getPrimitiveSizeInBits() >= Config.VectorBits || 908abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth T2->getPrimitiveSizeInBits() >= Config.VectorBits)) 909de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 910de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 911de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return true; 912de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 913de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 914de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function returns true if the two provided instructions are compatible 915de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // (meaning that they can be fused into a vector instruction). This assumes 916de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // that I has already been determined to be vectorizable and that J is not 917de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // in the use tree of I. 918de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J, 91965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel bool IsSimpleLoadStore, bool NonPow2Len, 920a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel int &CostSavings, int &FixedOrder) { 921de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugInstructionExamination) dbgs() << "BBV: looking at " << *I << 922de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " <-> " << *J << "\n"); 923de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 92465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel CostSavings = 0; 925a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel FixedOrder = 0; 92665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel 927de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Loads and stores can be merged if they have different alignments, 928de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // but are otherwise the same. 92964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!J->isSameOperationAs(I, Instruction::CompareIgnoringAlignment | 93064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel (NonPow2Len ? Instruction::CompareUsingScalarTypes : 0))) 93164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return false; 93264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 93364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *IT1, *IT2, *JT1, *JT2; 93464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getInstructionTypes(I, IT1, IT2); 93564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getInstructionTypes(J, JT1, JT2); 93664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned MaxTypeBits = std::max( 93764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel IT1->getPrimitiveSizeInBits() + JT1->getPrimitiveSizeInBits(), 93864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel IT2->getPrimitiveSizeInBits() + JT2->getPrimitiveSizeInBits()); 939abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth if (!TTI && MaxTypeBits > Config.VectorBits) 940de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 941ec4e85e3364f50802f2007e4b1e23661d4610366Hal Finkel 942de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // FIXME: handle addsub-type operations! 943de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 944de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (IsSimpleLoadStore) { 945de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *IPtr, *JPtr; 94665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace; 947de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel int64_t OffsetInElmts = 0; 948de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, 94965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel IAddressSpace, JAddressSpace, 950de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel OffsetInElmts) && abs64(OffsetInElmts) == 1) { 951a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel FixedOrder = (int) OffsetInElmts; 95265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel unsigned BottomAlignment = IAlignment; 95365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (OffsetInElmts < 0) BottomAlignment = JAlignment; 95465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel 95565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel Type *aTypeI = isa<StoreInst>(I) ? 95665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel cast<StoreInst>(I)->getValueOperand()->getType() : I->getType(); 95765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel Type *aTypeJ = isa<StoreInst>(J) ? 95865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel cast<StoreInst>(J)->getValueOperand()->getType() : J->getType(); 95965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel Type *VType = getVecTypeForPair(aTypeI, aTypeJ); 96064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 96165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (Config.AlignedOnly) { 962de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // An aligned load or store is possible only if the instruction 963de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // with the lower offset has an alignment suitable for the 964de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // vector type. 9651230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop 966de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned VecAlignment = TD->getPrefTypeAlignment(VType); 967de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (BottomAlignment < VecAlignment) 968de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 969de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 97065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel 971abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth if (TTI) { 972abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth unsigned ICost = TTI->getMemoryOpCost(I->getOpcode(), aTypeI, 973abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth IAlignment, IAddressSpace); 974abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth unsigned JCost = TTI->getMemoryOpCost(J->getOpcode(), aTypeJ, 975abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth JAlignment, JAddressSpace); 976abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth unsigned VCost = TTI->getMemoryOpCost(I->getOpcode(), VType, 977abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth BottomAlignment, 978abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth IAddressSpace); 9790cf5d396c14c71dd4fa1d102c2b3d178b1191436Hal Finkel 9800cf5d396c14c71dd4fa1d102c2b3d178b1191436Hal Finkel ICost += TTI->getAddressComputationCost(aTypeI); 9810cf5d396c14c71dd4fa1d102c2b3d178b1191436Hal Finkel JCost += TTI->getAddressComputationCost(aTypeJ); 9820cf5d396c14c71dd4fa1d102c2b3d178b1191436Hal Finkel VCost += TTI->getAddressComputationCost(VType); 9830cf5d396c14c71dd4fa1d102c2b3d178b1191436Hal Finkel 98465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (VCost > ICost + JCost) 98565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel return false; 98682149a9106f221aa6a7271977c236b078e621f21Hal Finkel 987dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel // We don't want to fuse to a type that will be split, even 98882149a9106f221aa6a7271977c236b078e621f21Hal Finkel // if the two input types will also be split and there is no other 989dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel // associated cost. 990abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth unsigned VParts = TTI->getNumberOfParts(VType); 991dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel if (VParts > 1) 992dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel return false; 993dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel else if (!VParts && VCost == ICost + JCost) 99482149a9106f221aa6a7271977c236b078e621f21Hal Finkel return false; 99582149a9106f221aa6a7271977c236b078e621f21Hal Finkel 99665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel CostSavings = ICost + JCost - VCost; 99765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel } 998de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 999de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 1000de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1001abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth } else if (TTI) { 100246fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel unsigned ICost = getInstrCost(I->getOpcode(), IT1, IT2); 100346fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel unsigned JCost = getInstrCost(J->getOpcode(), JT1, JT2); 100465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel Type *VT1 = getVecTypeForPair(IT1, JT1), 100565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel *VT2 = getVecTypeForPair(IT2, JT2); 1006ec5c3c2bd37828e9fbd913f5ac7b7f75711ddd59Hal Finkel 1007ec5c3c2bd37828e9fbd913f5ac7b7f75711ddd59Hal Finkel // Note that this procedure is incorrect for insert and extract element 1008ec5c3c2bd37828e9fbd913f5ac7b7f75711ddd59Hal Finkel // instructions (because combining these often results in a shuffle), 1009ec5c3c2bd37828e9fbd913f5ac7b7f75711ddd59Hal Finkel // but this cost is ignored (because insert and extract element 1010ec5c3c2bd37828e9fbd913f5ac7b7f75711ddd59Hal Finkel // instructions are assigned a zero depth factor and are not really 1011ec5c3c2bd37828e9fbd913f5ac7b7f75711ddd59Hal Finkel // fused in general). 101246fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel unsigned VCost = getInstrCost(I->getOpcode(), VT1, VT2); 101365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel 101465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (VCost > ICost + JCost) 101565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel return false; 101682149a9106f221aa6a7271977c236b078e621f21Hal Finkel 1017dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel // We don't want to fuse to a type that will be split, even 101882149a9106f221aa6a7271977c236b078e621f21Hal Finkel // if the two input types will also be split and there is no other 1019dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel // associated cost. 1020abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth unsigned VParts1 = TTI->getNumberOfParts(VT1), 1021abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth VParts2 = TTI->getNumberOfParts(VT2); 10228b9796f4f83bea2bbefcd2822eb574abdb7f3d1bHal Finkel if (VParts1 > 1 || VParts2 > 1) 1023dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel return false; 10248b9796f4f83bea2bbefcd2822eb574abdb7f3d1bHal Finkel else if ((!VParts1 || !VParts2) && VCost == ICost + JCost) 102582149a9106f221aa6a7271977c236b078e621f21Hal Finkel return false; 102682149a9106f221aa6a7271977c236b078e621f21Hal Finkel 102765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel CostSavings = ICost + JCost - VCost; 1028de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1029de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 10306173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel // The powi intrinsic is special because only the first argument is 10316173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel // vectorized, the second arguments must be equal. 10326173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel CallInst *CI = dyn_cast<CallInst>(I); 10336173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel Function *FI; 1034a77728415857196035c0090f7b2749d7971811a2Hal Finkel if (CI && (FI = CI->getCalledFunction())) { 1035a77728415857196035c0090f7b2749d7971811a2Hal Finkel Intrinsic::ID IID = (Intrinsic::ID) FI->getIntrinsicID(); 1036a77728415857196035c0090f7b2749d7971811a2Hal Finkel if (IID == Intrinsic::powi) { 1037a77728415857196035c0090f7b2749d7971811a2Hal Finkel Value *A1I = CI->getArgOperand(1), 1038a77728415857196035c0090f7b2749d7971811a2Hal Finkel *A1J = cast<CallInst>(J)->getArgOperand(1); 1039a77728415857196035c0090f7b2749d7971811a2Hal Finkel const SCEV *A1ISCEV = SE->getSCEV(A1I), 1040a77728415857196035c0090f7b2749d7971811a2Hal Finkel *A1JSCEV = SE->getSCEV(A1J); 1041a77728415857196035c0090f7b2749d7971811a2Hal Finkel return (A1ISCEV == A1JSCEV); 1042a77728415857196035c0090f7b2749d7971811a2Hal Finkel } 1043a77728415857196035c0090f7b2749d7971811a2Hal Finkel 1044abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth if (IID && TTI) { 1045a77728415857196035c0090f7b2749d7971811a2Hal Finkel SmallVector<Type*, 4> Tys; 1046a77728415857196035c0090f7b2749d7971811a2Hal Finkel for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) 1047a77728415857196035c0090f7b2749d7971811a2Hal Finkel Tys.push_back(CI->getArgOperand(i)->getType()); 1048abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, Tys); 1049a77728415857196035c0090f7b2749d7971811a2Hal Finkel 1050a77728415857196035c0090f7b2749d7971811a2Hal Finkel Tys.clear(); 1051a77728415857196035c0090f7b2749d7971811a2Hal Finkel CallInst *CJ = cast<CallInst>(J); 1052a77728415857196035c0090f7b2749d7971811a2Hal Finkel for (unsigned i = 0, ie = CJ->getNumArgOperands(); i != ie; ++i) 1053a77728415857196035c0090f7b2749d7971811a2Hal Finkel Tys.push_back(CJ->getArgOperand(i)->getType()); 1054abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, Tys); 1055a77728415857196035c0090f7b2749d7971811a2Hal Finkel 1056a77728415857196035c0090f7b2749d7971811a2Hal Finkel Tys.clear(); 1057a77728415857196035c0090f7b2749d7971811a2Hal Finkel assert(CI->getNumArgOperands() == CJ->getNumArgOperands() && 1058a77728415857196035c0090f7b2749d7971811a2Hal Finkel "Intrinsic argument counts differ"); 1059a77728415857196035c0090f7b2749d7971811a2Hal Finkel for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) { 1060a77728415857196035c0090f7b2749d7971811a2Hal Finkel if (IID == Intrinsic::powi && i == 1) 1061a77728415857196035c0090f7b2749d7971811a2Hal Finkel Tys.push_back(CI->getArgOperand(i)->getType()); 1062a77728415857196035c0090f7b2749d7971811a2Hal Finkel else 1063a77728415857196035c0090f7b2749d7971811a2Hal Finkel Tys.push_back(getVecTypeForPair(CI->getArgOperand(i)->getType(), 1064a77728415857196035c0090f7b2749d7971811a2Hal Finkel CJ->getArgOperand(i)->getType())); 1065a77728415857196035c0090f7b2749d7971811a2Hal Finkel } 1066a77728415857196035c0090f7b2749d7971811a2Hal Finkel 1067a77728415857196035c0090f7b2749d7971811a2Hal Finkel Type *RetTy = getVecTypeForPair(IT1, JT1); 1068abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys); 1069a77728415857196035c0090f7b2749d7971811a2Hal Finkel 1070a77728415857196035c0090f7b2749d7971811a2Hal Finkel if (VCost > ICost + JCost) 1071a77728415857196035c0090f7b2749d7971811a2Hal Finkel return false; 1072a77728415857196035c0090f7b2749d7971811a2Hal Finkel 1073a77728415857196035c0090f7b2749d7971811a2Hal Finkel // We don't want to fuse to a type that will be split, even 1074a77728415857196035c0090f7b2749d7971811a2Hal Finkel // if the two input types will also be split and there is no other 1075a77728415857196035c0090f7b2749d7971811a2Hal Finkel // associated cost. 1076abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth unsigned RetParts = TTI->getNumberOfParts(RetTy); 1077a77728415857196035c0090f7b2749d7971811a2Hal Finkel if (RetParts > 1) 1078a77728415857196035c0090f7b2749d7971811a2Hal Finkel return false; 1079a77728415857196035c0090f7b2749d7971811a2Hal Finkel else if (!RetParts && VCost == ICost + JCost) 1080a77728415857196035c0090f7b2749d7971811a2Hal Finkel return false; 1081a77728415857196035c0090f7b2749d7971811a2Hal Finkel 1082a77728415857196035c0090f7b2749d7971811a2Hal Finkel for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) { 1083a77728415857196035c0090f7b2749d7971811a2Hal Finkel if (!Tys[i]->isVectorTy()) 1084a77728415857196035c0090f7b2749d7971811a2Hal Finkel continue; 1085a77728415857196035c0090f7b2749d7971811a2Hal Finkel 1086abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth unsigned NumParts = TTI->getNumberOfParts(Tys[i]); 1087a77728415857196035c0090f7b2749d7971811a2Hal Finkel if (NumParts > 1) 1088a77728415857196035c0090f7b2749d7971811a2Hal Finkel return false; 1089a77728415857196035c0090f7b2749d7971811a2Hal Finkel else if (!NumParts && VCost == ICost + JCost) 1090a77728415857196035c0090f7b2749d7971811a2Hal Finkel return false; 1091a77728415857196035c0090f7b2749d7971811a2Hal Finkel } 1092a77728415857196035c0090f7b2749d7971811a2Hal Finkel 1093a77728415857196035c0090f7b2749d7971811a2Hal Finkel CostSavings = ICost + JCost - VCost; 1094a77728415857196035c0090f7b2749d7971811a2Hal Finkel } 10956173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel } 10966173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel 1097de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return true; 1098de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1099de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1100de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Figure out whether or not J uses I and update the users and write-set 1101de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // structures associated with I. Specifically, Users represents the set of 1102de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // instructions that depend on I. WriteSet represents the set 1103de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // of memory locations that are dependent on I. If UpdateUsers is true, 1104de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // and J uses I, then Users is updated to contain J and WriteSet is updated 1105de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // to contain any memory locations to which J writes. The function returns 1106de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // true if J uses I. By default, alias analysis is used to determine 1107de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // whether J reads from memory that overlaps with a location in WriteSet. 1108de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // If LoadMoveSet is not null, then it is a previously-computed multimap 1109de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // where the key is the memory-based user instruction and the value is 1110de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the instruction to be compared with I. So, if LoadMoveSet is provided, 1111de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // then the alias analysis is not used. This is necessary because this 1112de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // function is called during the process of moving instructions during 1113de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // vectorization and the results of the alias analysis are not stable during 1114de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // that process. 1115de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::trackUsesOfI(DenseSet<Value *> &Users, 1116de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker &WriteSet, Instruction *I, 1117de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J, bool UpdateUsers, 11182f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel DenseSet<ValuePair> *LoadMoveSetPairs) { 1119de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool UsesI = false; 1120de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1121de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This instruction may already be marked as a user due, for example, to 1122de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // being a member of a selected pair. 1123de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (Users.count(J)) 1124de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UsesI = true; 1125de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1126de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!UsesI) 11277e004d177fe76145f75a9417ed2e281f1b9abaf7Hal Finkel for (User::op_iterator JU = J->op_begin(), JE = J->op_end(); 11287e004d177fe76145f75a9417ed2e281f1b9abaf7Hal Finkel JU != JE; ++JU) { 1129de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *V = *JU; 1130de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (I == V || Users.count(V)) { 1131de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UsesI = true; 1132de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1133de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1134de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1135de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!UsesI && J->mayReadFromMemory()) { 11362f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel if (LoadMoveSetPairs) { 11372f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel UsesI = LoadMoveSetPairs->count(ValuePair(J, I)); 1138de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 1139de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (AliasSetTracker::iterator W = WriteSet.begin(), 1140de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel WE = WriteSet.end(); W != WE; ++W) { 114138a7f22445b8782682d1f8f253454ea0390d4ac5Hal Finkel if (W->aliasesUnknownInst(J, *AA)) { 114238a7f22445b8782682d1f8f253454ea0390d4ac5Hal Finkel UsesI = true; 114338a7f22445b8782682d1f8f253454ea0390d4ac5Hal Finkel break; 1144de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1145de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1146de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1147de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1148de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1149de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (UsesI && UpdateUsers) { 1150de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (J->mayWriteToMemory()) WriteSet.add(J); 1151de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Users.insert(J); 1152de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1153de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1154de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return UsesI; 1155de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1156de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1157de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function iterates over all instruction pairs in the provided 1158de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // basic block and collects all candidate pairs for vectorization. 11595d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel bool BBVectorize::getCandidatePairs(BasicBlock &BB, 11605d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel BasicBlock::iterator &Start, 1161de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 1162a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel DenseSet<ValuePair> &FixedOrderPairs, 116365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel DenseMap<ValuePair, int> &CandidatePairCostSavings, 116464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Value *> &PairableInsts, bool NonPow2Len) { 1165de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BasicBlock::iterator E = BB.end(); 11665d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (Start == E) return false; 11675d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 11685d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel bool ShouldContinue = false, IAfterStart = false; 11695d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel for (BasicBlock::iterator I = Start++; I != E; ++I) { 11705d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (I == Start) IAfterStart = true; 11715d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 1172de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool IsSimpleLoadStore; 1173de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!isInstVectorizable(I, IsSimpleLoadStore)) continue; 1174de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1175de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Look for an instruction with which to pair instruction *I... 1176de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> Users; 1177de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker WriteSet(*AA); 11785d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel bool JAfterStart = IAfterStart; 11795d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel BasicBlock::iterator J = llvm::next(I); 1180bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) { 11815d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (J == Start) JAfterStart = true; 11825d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 1183de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Determine if J uses I, if so, exit the loop. 1184bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !Config.FastDep); 1185bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (Config.FastDep) { 1186de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Note: For this heuristic to be effective, independent operations 1187de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // must tend to be intermixed. This is likely to be true from some 1188de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // kinds of grouped loop unrolling (but not the generic LLVM pass), 1189de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // but otherwise may require some kind of reordering pass. 1190de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1191de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // When using fast dependency analysis, 1192de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // stop searching after first use: 1193de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (UsesI) break; 1194de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 1195de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (UsesI) continue; 1196de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1197de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1198de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // J does not use I, and comes before the first use of I, so it can be 1199de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // merged with I if the instructions are compatible. 1200a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel int CostSavings, FixedOrder; 120165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (!areInstsCompatible(I, J, IsSimpleLoadStore, NonPow2Len, 1202a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel CostSavings, FixedOrder)) continue; 1203de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1204de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // J is a candidate for merging with I. 1205de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!PairableInsts.size() || 1206de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInsts[PairableInsts.size()-1] != I) { 1207de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInsts.push_back(I); 1208de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 12095d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 1210de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CandidatePairs.insert(ValuePair(I, J)); 1211abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth if (TTI) 121265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel CandidatePairCostSavings.insert(ValuePairWithCost(ValuePair(I, J), 121365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel CostSavings)); 12145d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 1215a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel if (FixedOrder == 1) 1216a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel FixedOrderPairs.insert(ValuePair(I, J)); 1217a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel else if (FixedOrder == -1) 1218a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel FixedOrderPairs.insert(ValuePair(J, I)); 1219a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel 12205d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // The next call to this function must start after the last instruction 12215d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // selected during this invocation. 12225d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (JAfterStart) { 12235d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel Start = llvm::next(J); 12245d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel IAfterStart = JAfterStart = false; 12255d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel } 12265d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 1227de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugCandidateSelection) dbgs() << "BBV: candidate pair " 122865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel << *I << " <-> " << *J << " (cost savings: " << 122965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel CostSavings << ")\n"); 12305d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 12315d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // If we have already found too many pairs, break here and this function 12325d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // will be called again starting after the last instruction selected 12335d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // during this invocation. 1234bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (PairableInsts.size() >= Config.MaxInsts) { 12355d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel ShouldContinue = true; 12365d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel break; 12375d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel } 1238de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 12395d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 12405d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (ShouldContinue) 12415d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel break; 1242de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1243de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1244de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: found " << PairableInsts.size() 1245de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << " instructions with candidate pairs\n"); 12465d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 12475d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel return ShouldContinue; 1248de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1249de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1250de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Finds candidate pairs connected to the pair P = <PI, PJ>. This means that 1251de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // it looks for pairs such that both members have an input which is an 1252de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // output of PI or PJ. 1253de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::computePairsConnectedTo( 1254de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 125500f63b1b84d059a1ffa572e76708e03750a9e523Hal Finkel DenseSet<ValuePair> &CandidatePairsSet, 1256de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 1257de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 125872465ea23d010507d3746adc126d719005981e05Hal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes, 1259de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ValuePair P) { 1260bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel StoreInst *SI, *SJ; 1261bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel 1262de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // For each possible pairing for this variable, look at the uses of 1263de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the first value... 1264de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (Value::use_iterator I = P.first->use_begin(), 1265de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = P.first->use_end(); I != E; ++I) { 1266bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel if (isa<LoadInst>(*I)) { 1267bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel // A pair cannot be connected to a load because the load only takes one 1268bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel // operand (the address) and it is a scalar even after vectorization. 1269bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel continue; 1270bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel } else if ((SI = dyn_cast<StoreInst>(*I)) && 1271bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel P.first == SI->getPointerOperand()) { 1272bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel // Similarly, a pair cannot be connected to a store through its 1273bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel // pointer operand. 1274bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel continue; 1275bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel } 1276bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel 1277de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // For each use of the first variable, look for uses of the second 1278de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // variable... 1279de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (Value::use_iterator J = P.second->use_begin(), 1280de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E2 = P.second->use_end(); J != E2; ++J) { 1281bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel if ((SJ = dyn_cast<StoreInst>(*J)) && 1282bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel P.second == SJ->getPointerOperand()) 1283bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel continue; 1284bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel 1285de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Look for <I, J>: 128600f63b1b84d059a1ffa572e76708e03750a9e523Hal Finkel if (CandidatePairsSet.count(ValuePair(*I, *J))) { 128772465ea23d010507d3746adc126d719005981e05Hal Finkel VPPair VP(P, ValuePair(*I, *J)); 128872465ea23d010507d3746adc126d719005981e05Hal Finkel ConnectedPairs.insert(VP); 128972465ea23d010507d3746adc126d719005981e05Hal Finkel PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionDirect)); 129072465ea23d010507d3746adc126d719005981e05Hal Finkel } 1291de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1292de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Look for <J, I>: 129300f63b1b84d059a1ffa572e76708e03750a9e523Hal Finkel if (CandidatePairsSet.count(ValuePair(*J, *I))) { 129472465ea23d010507d3746adc126d719005981e05Hal Finkel VPPair VP(P, ValuePair(*J, *I)); 129572465ea23d010507d3746adc126d719005981e05Hal Finkel ConnectedPairs.insert(VP); 129672465ea23d010507d3746adc126d719005981e05Hal Finkel PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSwap)); 129772465ea23d010507d3746adc126d719005981e05Hal Finkel } 1298de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1299de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1300bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (Config.SplatBreaksChain) continue; 1301de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Look for cases where just the first value in the pair is used by 1302de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // both members of another pair (splatting). 1303de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (Value::use_iterator J = P.first->use_begin(); J != E; ++J) { 1304bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel if ((SJ = dyn_cast<StoreInst>(*J)) && 1305bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel P.first == SJ->getPointerOperand()) 1306bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel continue; 1307bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel 130800f63b1b84d059a1ffa572e76708e03750a9e523Hal Finkel if (CandidatePairsSet.count(ValuePair(*I, *J))) { 130972465ea23d010507d3746adc126d719005981e05Hal Finkel VPPair VP(P, ValuePair(*I, *J)); 131072465ea23d010507d3746adc126d719005981e05Hal Finkel ConnectedPairs.insert(VP); 131172465ea23d010507d3746adc126d719005981e05Hal Finkel PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat)); 131272465ea23d010507d3746adc126d719005981e05Hal Finkel } 1313de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1314de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1315de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1316bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (Config.SplatBreaksChain) return; 1317de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Look for cases where just the second value in the pair is used by 1318de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // both members of another pair (splatting). 1319de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (Value::use_iterator I = P.second->use_begin(), 1320de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = P.second->use_end(); I != E; ++I) { 1321bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel if (isa<LoadInst>(*I)) 1322bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel continue; 1323bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel else if ((SI = dyn_cast<StoreInst>(*I)) && 1324bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel P.second == SI->getPointerOperand()) 1325bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel continue; 1326bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel 1327de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (Value::use_iterator J = P.second->use_begin(); J != E; ++J) { 1328bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel if ((SJ = dyn_cast<StoreInst>(*J)) && 1329bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel P.second == SJ->getPointerOperand()) 1330bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel continue; 1331bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel 133200f63b1b84d059a1ffa572e76708e03750a9e523Hal Finkel if (CandidatePairsSet.count(ValuePair(*I, *J))) { 133372465ea23d010507d3746adc126d719005981e05Hal Finkel VPPair VP(P, ValuePair(*I, *J)); 133472465ea23d010507d3746adc126d719005981e05Hal Finkel ConnectedPairs.insert(VP); 133572465ea23d010507d3746adc126d719005981e05Hal Finkel PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat)); 133672465ea23d010507d3746adc126d719005981e05Hal Finkel } 1337de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1338de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1339de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1340de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1341de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function figures out which pairs are connected. Two pairs are 1342de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // connected if some output of the first pair forms an input to both members 1343de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // of the second pair. 1344de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::computeConnectedPairs( 1345de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 1346b1a82589339fed148c12b052d30861a539552f1aHal Finkel DenseSet<ValuePair> &CandidatePairsSet, 1347de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 134872465ea23d010507d3746adc126d719005981e05Hal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 134972465ea23d010507d3746adc126d719005981e05Hal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes) { 1350de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::vector<Value *>::iterator PI = PairableInsts.begin(), 1351de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PE = PairableInsts.end(); PI != PE; ++PI) { 1352de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair choiceRange = CandidatePairs.equal_range(*PI); 1353de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1354de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<Value *, Value *>::iterator P = choiceRange.first; 1355de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel P != choiceRange.second; ++P) 1356b1a82589339fed148c12b052d30861a539552f1aHal Finkel computePairsConnectedTo(CandidatePairs, CandidatePairsSet, 1357b1a82589339fed148c12b052d30861a539552f1aHal Finkel PairableInsts, ConnectedPairs, 1358b1a82589339fed148c12b052d30861a539552f1aHal Finkel PairConnectionTypes, *P); 1359de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1360de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1361de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: found " << ConnectedPairs.size() 1362de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << " pair connections.\n"); 1363de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1364de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1365de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function builds a set of use tuples such that <A, B> is in the set 1366de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // if B is in the use tree of A. If B is in the use tree of A, then B 1367de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // depends on the output of A. 1368de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::buildDepMap( 1369de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BasicBlock &BB, 1370de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 1371de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 1372de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers) { 1373de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> IsInPair; 1374de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<Value *, Value *>::iterator C = CandidatePairs.begin(), 1375de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = CandidatePairs.end(); C != E; ++C) { 1376de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IsInPair.insert(C->first); 1377de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IsInPair.insert(C->second); 1378de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1379de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 13807a8aba906416b6998347b52c3c08610fdc190638Hal Finkel // Iterate through the basic block, recording all users of each 1381de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pairable instruction. 1382de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1383de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BasicBlock::iterator E = BB.end(); 1384de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (BasicBlock::iterator I = BB.getFirstInsertionPt(); I != E; ++I) { 1385de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (IsInPair.find(I) == IsInPair.end()) continue; 1386de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1387de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> Users; 1388de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker WriteSet(*AA); 1389de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (BasicBlock::iterator J = llvm::next(I); J != E; ++J) 1390de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel (void) trackUsesOfI(Users, WriteSet, I, J); 1391de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1392de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseSet<Value *>::iterator U = Users.begin(), E = Users.end(); 13938f3359a4b396d3f1a7b2726e02f199be74c62e4cHal Finkel U != E; ++U) { 13948f3359a4b396d3f1a7b2726e02f199be74c62e4cHal Finkel if (IsInPair.find(*U) == IsInPair.end()) continue; 1395de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.insert(ValuePair(I, *U)); 13968f3359a4b396d3f1a7b2726e02f199be74c62e4cHal Finkel } 1397de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1398de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1399de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1400de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns true if an input to pair P is an output of pair Q and also an 1401de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // input of pair Q is an output of pair P. If this is the case, then these 1402de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // two pairs cannot be simultaneously fused. 1403de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::pairsConflict(ValuePair P, ValuePair Q, 1404de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 1405da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel std::multimap<ValuePair, ValuePair> *PairableInstUserMap, 1406da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel DenseSet<VPPair> *PairableInstUserPairSet) { 1407de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Two pairs are in conflict if they are mutual Users of eachother. 1408de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool QUsesP = PairableInstUsers.count(ValuePair(P.first, Q.first)) || 1409de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(P.first, Q.second)) || 1410de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(P.second, Q.first)) || 1411de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(P.second, Q.second)); 1412de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool PUsesQ = PairableInstUsers.count(ValuePair(Q.first, P.first)) || 1413de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(Q.first, P.second)) || 1414de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(Q.second, P.first)) || 1415de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(Q.second, P.second)); 1416de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (PairableInstUserMap) { 1417de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // FIXME: The expensive part of the cycle check is not so much the cycle 1418de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // check itself but this edge insertion procedure. This needs some 1419de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // profiling and probably a different data structure (same is true of 1420de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // most uses of std::multimap). 1421de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (PUsesQ) { 1422da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel if (PairableInstUserPairSet->insert(VPPair(Q, P)).second) 1423de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUserMap->insert(VPPair(Q, P)); 1424de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1425de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (QUsesP) { 1426da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel if (PairableInstUserPairSet->insert(VPPair(P, Q)).second) 1427de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUserMap->insert(VPPair(P, Q)); 1428de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1429de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1430de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1431de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return (QUsesP && PUsesQ); 1432de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1433de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1434de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function walks the use graph of current pairs to see if, starting 1435de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // from P, the walk returns to P. 1436de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::pairWillFormCycle(ValuePair P, 1437de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &PairableInstUserMap, 1438de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &CurrentPairs) { 1439de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugCycleCheck) 1440de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel dbgs() << "BBV: starting cycle check for : " << *P.first << " <-> " 1441de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << *P.second << "\n"); 1442de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // A lookup table of visisted pairs is kept because the PairableInstUserMap 1443de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // contains non-direct associations. 1444de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> Visited; 144535564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel SmallVector<ValuePair, 32> Q; 1446de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // General depth-first post-order traversal: 1447de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.push_back(P); 144835564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel do { 144935564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel ValuePair QTop = Q.pop_back_val(); 1450de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Visited.insert(QTop); 1451de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1452de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugCycleCheck) 1453de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel dbgs() << "BBV: cycle check visiting: " << *QTop.first << " <-> " 1454de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << *QTop.second << "\n"); 1455de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPPIteratorPair QPairRange = PairableInstUserMap.equal_range(QTop); 1456de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<ValuePair, ValuePair>::iterator C = QPairRange.first; 1457de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C != QPairRange.second; ++C) { 1458de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C->second == P) { 1459de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() 1460de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << "BBV: rejected to prevent non-trivial cycle formation: " 1461de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << *C->first.first << " <-> " << *C->first.second << "\n"); 1462de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return true; 1463de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1464de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 14650b2500c504156c45cd71817a9ef6749b6cde5703David Blaikie if (CurrentPairs.count(C->second) && !Visited.count(C->second)) 1466de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.push_back(C->second); 1467de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 146835564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel } while (!Q.empty()); 1469de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1470de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 1471de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1472de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1473de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function builds the initial tree of connected pairs with the 1474de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pair J at the root. 1475de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::buildInitialTreeFor( 1476de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 1477b1a82589339fed148c12b052d30861a539552f1aHal Finkel DenseSet<ValuePair> &CandidatePairsSet, 1478de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 1479de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 1480de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 1481de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 1482de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<ValuePair, size_t> &Tree, ValuePair J) { 1483de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Each of these pairs is viewed as the root node of a Tree. The Tree 1484de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // is then walked (depth-first). As this happens, we keep track of 1485de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the pairs that compose the Tree and the maximum depth of the Tree. 148635564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel SmallVector<ValuePairWithDepth, 32> Q; 1487de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // General depth-first post-order traversal: 1488de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first))); 148935564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel do { 1490de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ValuePairWithDepth QTop = Q.back(); 1491de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1492de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Push each child onto the queue: 1493de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool MoreChildren = false; 1494de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t MaxChildDepth = QTop.second; 1495de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPPIteratorPair qtRange = ConnectedPairs.equal_range(QTop.first); 1496478eed85f96f0d93da43e26cfb7fc6dee981c9aaNAKAMURA Takumi for (std::multimap<ValuePair, ValuePair>::iterator k = qtRange.first; 1497de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel k != qtRange.second; ++k) { 1498de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Make sure that this child pair is still a candidate: 1499b1a82589339fed148c12b052d30861a539552f1aHal Finkel if (CandidatePairsSet.count(ValuePair(k->second))) { 1500de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<ValuePair, size_t>::iterator C = Tree.find(k->second); 1501de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C == Tree.end()) { 1502de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t d = getDepthFactor(k->second.first); 1503de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.push_back(ValuePairWithDepth(k->second, QTop.second+d)); 1504de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel MoreChildren = true; 1505de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 1506de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel MaxChildDepth = std::max(MaxChildDepth, C->second); 1507de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1508de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1509de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1510de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1511de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!MoreChildren) { 1512de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Record the current pair as part of the Tree: 1513de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Tree.insert(ValuePairWithDepth(QTop.first, MaxChildDepth)); 1514de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.pop_back(); 1515de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 151635564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel } while (!Q.empty()); 1517de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1518de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1519de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Given some initial tree, prune it by removing conflicting pairs (pairs 1520de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // that cannot be simultaneously chosen for vectorization). 1521de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::pruneTreeFor( 1522de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 1523de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 1524de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 1525de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 1526de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &PairableInstUserMap, 1527da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel DenseSet<VPPair> &PairableInstUserPairSet, 1528de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 1529de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<ValuePair, size_t> &Tree, 1530de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PrunedTree, ValuePair J, 1531de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool UseCycleCheck) { 153235564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel SmallVector<ValuePairWithDepth, 32> Q; 1533de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // General depth-first post-order traversal: 1534de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first))); 153535564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel do { 153635564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel ValuePairWithDepth QTop = Q.pop_back_val(); 1537de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PrunedTree.insert(QTop.first); 1538de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1539de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Visit each child, pruning as necessary... 154097d19ebe5b5bf27617e536a16fa232116cefe914Hal Finkel SmallVector<ValuePairWithDepth, 8> BestChildren; 1541de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPPIteratorPair QTopRange = ConnectedPairs.equal_range(QTop.first); 1542478eed85f96f0d93da43e26cfb7fc6dee981c9aaNAKAMURA Takumi for (std::multimap<ValuePair, ValuePair>::iterator K = QTopRange.first; 1543de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K != QTopRange.second; ++K) { 1544de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<ValuePair, size_t>::iterator C = Tree.find(K->second); 1545de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C == Tree.end()) continue; 1546de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1547de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This child is in the Tree, now we need to make sure it is the 1548de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // best of any conflicting children. There could be multiple 1549de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // conflicting children, so first, determine if we're keeping 1550de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // this child, then delete conflicting children as necessary. 1551de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1552de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // It is also necessary to guard against pairing-induced 1553de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // dependencies. Consider instructions a .. x .. y .. b 1554de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // such that (a,b) are to be fused and (x,y) are to be fused 1555de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // but a is an input to x and b is an output from y. This 1556de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // means that y cannot be moved after b but x must be moved 1557de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // after b for (a,b) to be fused. In other words, after 1558de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // fusing (a,b) we have y .. a/b .. x where y is an input 1559de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // to a/b and x is an output to a/b: x and y can no longer 1560de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // be legally fused. To prevent this condition, we must 1561de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // make sure that a child pair added to the Tree is not 1562de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // both an input and output of an already-selected pair. 1563de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1564de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Pairing-induced dependencies can also form from more complicated 1565de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // cycles. The pair vs. pair conflicts are easy to check, and so 1566de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // that is done explicitly for "fast rejection", and because for 1567de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // child vs. child conflicts, we may prefer to keep the current 1568de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pair in preference to the already-selected child. 1569de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> CurrentPairs; 1570de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1571de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool CanAdd = true; 157297d19ebe5b5bf27617e536a16fa232116cefe914Hal Finkel for (SmallVector<ValuePairWithDepth, 8>::iterator C2 157343ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop = BestChildren.begin(), E2 = BestChildren.end(); 1574de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2 != E2; ++C2) { 1575de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C2->first.first == C->first.first || 1576de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.first == C->first.second || 1577de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.first || 1578de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.second || 1579de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairsConflict(C2->first, C->first, PairableInstUsers, 1580da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel UseCycleCheck ? &PairableInstUserMap : 0, 1581da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel UseCycleCheck ? &PairableInstUserPairSet : 0)) { 1582de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C2->second >= C->second) { 1583de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CanAdd = false; 1584de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1585de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1586de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1587de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CurrentPairs.insert(C2->first); 1588de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1589de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1590de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!CanAdd) continue; 1591de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1592de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Even worse, this child could conflict with another node already 1593de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // selected for the Tree. If that is the case, ignore this child. 1594de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseSet<ValuePair>::iterator T = PrunedTree.begin(), 1595de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E2 = PrunedTree.end(); T != E2; ++T) { 1596de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (T->first == C->first.first || 1597de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel T->first == C->first.second || 1598de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel T->second == C->first.first || 1599de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel T->second == C->first.second || 1600de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairsConflict(*T, C->first, PairableInstUsers, 1601da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel UseCycleCheck ? &PairableInstUserMap : 0, 1602da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel UseCycleCheck ? &PairableInstUserPairSet : 0)) { 1603de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CanAdd = false; 1604de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1605de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1606de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1607de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CurrentPairs.insert(*T); 1608de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1609de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!CanAdd) continue; 1610de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1611de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // And check the queue too... 161235564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel for (SmallVector<ValuePairWithDepth, 32>::iterator C2 = Q.begin(), 1613de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E2 = Q.end(); C2 != E2; ++C2) { 1614de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C2->first.first == C->first.first || 1615de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.first == C->first.second || 1616de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.first || 1617de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.second || 1618de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairsConflict(C2->first, C->first, PairableInstUsers, 1619da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel UseCycleCheck ? &PairableInstUserMap : 0, 1620da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel UseCycleCheck ? &PairableInstUserPairSet : 0)) { 1621de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CanAdd = false; 1622de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1623de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1624de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1625de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CurrentPairs.insert(C2->first); 1626de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1627de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!CanAdd) continue; 1628de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1629de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Last but not least, check for a conflict with any of the 1630de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // already-chosen pairs. 1631de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseMap<Value *, Value *>::iterator C2 = 1632de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairs.begin(), E2 = ChosenPairs.end(); 1633de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2 != E2; ++C2) { 1634de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (pairsConflict(*C2, C->first, PairableInstUsers, 1635da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel UseCycleCheck ? &PairableInstUserMap : 0, 1636da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel UseCycleCheck ? &PairableInstUserPairSet : 0)) { 1637de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CanAdd = false; 1638de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1639de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1640de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1641de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CurrentPairs.insert(*C2); 1642de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1643de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!CanAdd) continue; 1644de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 16451230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop // To check for non-trivial cycles formed by the addition of the 16461230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop // current pair we've formed a list of all relevant pairs, now use a 16471230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop // graph walk to check for a cycle. We start from the current pair and 16481230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop // walk the use tree to see if we again reach the current pair. If we 16491230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop // do, then the current pair is rejected. 1650de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1651de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // FIXME: It may be more efficient to use a topological-ordering 1652de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // algorithm to improve the cycle check. This should be investigated. 1653de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (UseCycleCheck && 1654de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairWillFormCycle(C->first, PairableInstUserMap, CurrentPairs)) 1655de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 1656de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1657de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This child can be added, but we may have chosen it in preference 1658de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // to an already-selected child. Check for this here, and if a 1659de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // conflict is found, then remove the previously-selected child 1660de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // before adding this one in its place. 166197d19ebe5b5bf27617e536a16fa232116cefe914Hal Finkel for (SmallVector<ValuePairWithDepth, 8>::iterator C2 166243ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop = BestChildren.begin(); C2 != BestChildren.end();) { 1663de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C2->first.first == C->first.first || 1664de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.first == C->first.second || 1665de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.first || 1666de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.second || 1667de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairsConflict(C2->first, C->first, PairableInstUsers)) 1668d7a3425f06d51ed579bd9aefeb835b7fa4ce7849Hal Finkel C2 = BestChildren.erase(C2); 1669de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel else 1670de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++C2; 1671de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1672de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1673d7a3425f06d51ed579bd9aefeb835b7fa4ce7849Hal Finkel BestChildren.push_back(ValuePairWithDepth(C->first, C->second)); 1674de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1675de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 167697d19ebe5b5bf27617e536a16fa232116cefe914Hal Finkel for (SmallVector<ValuePairWithDepth, 8>::iterator C 167743ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop = BestChildren.begin(), E2 = BestChildren.end(); 1678de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C != E2; ++C) { 1679de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t DepthF = getDepthFactor(C->first.first); 1680de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.push_back(ValuePairWithDepth(C->first, QTop.second+DepthF)); 1681de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 168235564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel } while (!Q.empty()); 1683de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1684de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1685de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function finds the best tree of mututally-compatible connected 1686de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pairs, given the choice of root pairs as an iterator range. 1687de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::findBestTreeFor( 1688de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 1689b1a82589339fed148c12b052d30861a539552f1aHal Finkel DenseSet<ValuePair> &CandidatePairsSet, 169065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel DenseMap<ValuePair, int> &CandidatePairCostSavings, 1691de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 169286ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel DenseSet<ValuePair> &FixedOrderPairs, 169386ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes, 1694de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 169586ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairDeps, 1696de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 1697de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &PairableInstUserMap, 1698da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel DenseSet<VPPair> &PairableInstUserPairSet, 1699de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 1700de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth, 170165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel int &BestEffSize, VPIteratorPair ChoiceRange, 1702de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool UseCycleCheck) { 1703de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<Value *, Value *>::iterator J = ChoiceRange.first; 1704de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel J != ChoiceRange.second; ++J) { 1705de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1706de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Before going any further, make sure that this pair does not 1707de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // conflict with any already-selected pairs (see comment below 1708de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // near the Tree pruning for more details). 1709de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> ChosenPairSet; 1710de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool DoesConflict = false; 1711de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseMap<Value *, Value *>::iterator C = ChosenPairs.begin(), 1712de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = ChosenPairs.end(); C != E; ++C) { 1713de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (pairsConflict(*C, *J, PairableInstUsers, 1714da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel UseCycleCheck ? &PairableInstUserMap : 0, 1715da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel UseCycleCheck ? &PairableInstUserPairSet : 0)) { 1716de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DoesConflict = true; 1717de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1718de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1719de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1720de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairSet.insert(*C); 1721de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1722de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (DoesConflict) continue; 1723de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1724de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (UseCycleCheck && 1725de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairWillFormCycle(*J, PairableInstUserMap, ChosenPairSet)) 1726de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 1727de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1728de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<ValuePair, size_t> Tree; 1729b1a82589339fed148c12b052d30861a539552f1aHal Finkel buildInitialTreeFor(CandidatePairs, CandidatePairsSet, 1730b1a82589339fed148c12b052d30861a539552f1aHal Finkel PairableInsts, ConnectedPairs, 1731de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers, ChosenPairs, Tree, *J); 1732de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1733de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Because we'll keep the child with the largest depth, the largest 1734de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // depth is still the same in the unpruned Tree. 1735de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t MaxDepth = Tree.lookup(*J); 1736de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1737de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugPairSelection) dbgs() << "BBV: found Tree for pair {" 1738de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << *J->first << " <-> " << *J->second << "} of depth " << 1739de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel MaxDepth << " and size " << Tree.size() << "\n"); 1740de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1741de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // At this point the Tree has been constructed, but, may contain 1742de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // contradictory children (meaning that different children of 1743de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // some tree node may be attempting to fuse the same instruction). 1744de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // So now we walk the tree again, in the case of a conflict, 1745de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // keep only the child with the largest depth. To break a tie, 1746de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // favor the first child. 1747de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1748de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> PrunedTree; 1749de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pruneTreeFor(CandidatePairs, PairableInsts, ConnectedPairs, 1750b1a82589339fed148c12b052d30861a539552f1aHal Finkel PairableInstUsers, PairableInstUserMap, 1751b1a82589339fed148c12b052d30861a539552f1aHal Finkel PairableInstUserPairSet, 1752da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel ChosenPairs, Tree, PrunedTree, *J, UseCycleCheck); 1753de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 175465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel int EffSize = 0; 1755abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth if (TTI) { 175678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel DenseSet<Value *> PrunedTreeInstrs; 175778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(), 175878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel E = PrunedTree.end(); S != E; ++S) { 175978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel PrunedTreeInstrs.insert(S->first); 176078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel PrunedTreeInstrs.insert(S->second); 176178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 176278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 176378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // The set of pairs that have already contributed to the total cost. 176478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel DenseSet<ValuePair> IncomingPairs; 176578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 17664387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel // If the cost model were perfect, this might not be necessary; but we 17674387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel // need to make sure that we don't get stuck vectorizing our own 17684387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel // shuffle chains. 17694387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel bool HasNontrivialInsts = false; 17704387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel 177186ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel // The node weights represent the cost savings associated with 177286ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel // fusing the pair of instructions. 177365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(), 177465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel E = PrunedTree.end(); S != E; ++S) { 17754387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel if (!isa<ShuffleVectorInst>(S->first) && 17764387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel !isa<InsertElementInst>(S->first) && 17774387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel !isa<ExtractElementInst>(S->first)) 17784387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel HasNontrivialInsts = true; 17794387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel 178078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel bool FlipOrder = false; 178178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 178278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (getDepthFactor(S->first)) { 178378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel int ESContrib = CandidatePairCostSavings.find(*S)->second; 178478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel DEBUG(if (DebugPairSelection) dbgs() << "\tweight {" 178578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel << *S->first << " <-> " << *S->second << "} = " << 178678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib << "\n"); 178778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel EffSize += ESContrib; 178878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 178986ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel 179078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // The edge weights contribute in a negative sense: they represent 179178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // the cost of shuffles. 179286ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel VPPIteratorPair IP = ConnectedPairDeps.equal_range(*S); 179386ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel if (IP.first != ConnectedPairDeps.end()) { 179486ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel unsigned NumDepsDirect = 0, NumDepsSwap = 0; 179586ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first; 179686ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel Q != IP.second; ++Q) { 179778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (!PrunedTree.count(Q->second)) 179878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel continue; 179986ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel DenseMap<VPPair, unsigned>::iterator R = 180086ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel PairConnectionTypes.find(VPPair(Q->second, Q->first)); 180186ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel assert(R != PairConnectionTypes.end() && 180286ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel "Cannot find pair connection type"); 180386ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel if (R->second == PairConnectionDirect) 180486ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel ++NumDepsDirect; 180586ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel else if (R->second == PairConnectionSwap) 180686ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel ++NumDepsSwap; 180786ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel } 180886ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel 180986ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel // If there are more swaps than direct connections, then 181086ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel // the pair order will be flipped during fusion. So the real 181186ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel // number of swaps is the minimum number. 181278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel FlipOrder = !FixedOrderPairs.count(*S) && 181386ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel ((NumDepsSwap > NumDepsDirect) || 181486ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel FixedOrderPairs.count(ValuePair(S->second, S->first))); 181586ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel 181686ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first; 181786ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel Q != IP.second; ++Q) { 181878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (!PrunedTree.count(Q->second)) 181978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel continue; 182086ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel DenseMap<VPPair, unsigned>::iterator R = 182186ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel PairConnectionTypes.find(VPPair(Q->second, Q->first)); 182286ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel assert(R != PairConnectionTypes.end() && 182386ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel "Cannot find pair connection type"); 182486ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel Type *Ty1 = Q->second.first->getType(), 182586ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel *Ty2 = Q->second.second->getType(); 182686ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel Type *VTy = getVecTypeForPair(Ty1, Ty2); 182786ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel if ((R->second == PairConnectionDirect && FlipOrder) || 182886ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel (R->second == PairConnectionSwap && !FlipOrder) || 182978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel R->second == PairConnectionSplat) { 183078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel int ESContrib = (int) getInstrCost(Instruction::ShuffleVector, 183178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel VTy, VTy); 1832245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel 1833245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel if (VTy->getVectorNumElements() == 2) { 1834245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel if (R->second == PairConnectionSplat) 1835245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost( 1836245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel TargetTransformInfo::SK_Broadcast, VTy)); 1837245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel else 1838245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost( 1839245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel TargetTransformInfo::SK_Reverse, VTy)); 1840245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel } 1841245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel 184278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" << 184378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel *Q->second.first << " <-> " << *Q->second.second << 184478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel "} -> {" << 184578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel *S->first << " <-> " << *S->second << "} = " << 184678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib << "\n"); 184778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel EffSize -= ESContrib; 184878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 184978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 185078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 185178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 185278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // Compute the cost of outgoing edges. We assume that edges outgoing 185378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // to shuffles, inserts or extracts can be merged, and so contribute 185478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // no additional cost. 185578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (!S->first->getType()->isVoidTy()) { 185678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Type *Ty1 = S->first->getType(), 185778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel *Ty2 = S->second->getType(); 185878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Type *VTy = getVecTypeForPair(Ty1, Ty2); 185978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 186078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel bool NeedsExtraction = false; 186178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel for (Value::use_iterator I = S->first->use_begin(), 186278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel IE = S->first->use_end(); I != IE; ++I) { 186386c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(*I)) { 186486c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel // Shuffle can be folded if it has no other input 186586c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (isa<UndefValue>(SI->getOperand(1))) 186686c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel continue; 186786c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel } 186886c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (isa<ExtractElementInst>(*I)) 186978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel continue; 187078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (PrunedTreeInstrs.count(*I)) 187178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel continue; 187278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel NeedsExtraction = true; 187378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel break; 187478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 187578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 187678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (NeedsExtraction) { 187778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel int ESContrib; 1878245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel if (Ty1->isVectorTy()) { 187978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib = (int) getInstrCost(Instruction::ShuffleVector, 188078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Ty1, VTy); 1881245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost( 1882245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel TargetTransformInfo::SK_ExtractSubvector, VTy, 0, Ty1)); 1883245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel } else 1884abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth ESContrib = (int) TTI->getVectorInstrCost( 188578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Instruction::ExtractElement, VTy, 0); 188678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 188778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" << 188878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel *S->first << "} = " << ESContrib << "\n"); 188978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel EffSize -= ESContrib; 189078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 189178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 189278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel NeedsExtraction = false; 189378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel for (Value::use_iterator I = S->second->use_begin(), 189478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel IE = S->second->use_end(); I != IE; ++I) { 189586c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(*I)) { 189686c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel // Shuffle can be folded if it has no other input 189786c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (isa<UndefValue>(SI->getOperand(1))) 189886c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel continue; 189986c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel } 190086c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (isa<ExtractElementInst>(*I)) 190178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel continue; 190278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (PrunedTreeInstrs.count(*I)) 190378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel continue; 190478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel NeedsExtraction = true; 190578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel break; 190678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 190778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 190878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (NeedsExtraction) { 190978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel int ESContrib; 1910245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel if (Ty2->isVectorTy()) { 191178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib = (int) getInstrCost(Instruction::ShuffleVector, 191278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Ty2, VTy); 1913245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost( 1914245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel TargetTransformInfo::SK_ExtractSubvector, VTy, 1915245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel Ty1->isVectorTy() ? Ty1->getVectorNumElements() : 1, Ty2)); 1916245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel } else 1917abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth ESContrib = (int) TTI->getVectorInstrCost( 191878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Instruction::ExtractElement, VTy, 1); 191978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" << 192078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel *S->second << "} = " << ESContrib << "\n"); 192178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel EffSize -= ESContrib; 192278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 192378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 192478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 192578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // Compute the cost of incoming edges. 192678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (!isa<LoadInst>(S->first) && !isa<StoreInst>(S->first)) { 192778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Instruction *S1 = cast<Instruction>(S->first), 192878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel *S2 = cast<Instruction>(S->second); 192978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel for (unsigned o = 0; o < S1->getNumOperands(); ++o) { 193078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Value *O1 = S1->getOperand(o), *O2 = S2->getOperand(o); 193178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 193278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // Combining constants into vector constants (or small vector 193378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // constants into larger ones are assumed free). 193478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (isa<Constant>(O1) && isa<Constant>(O2)) 193578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel continue; 193678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 193778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (FlipOrder) 193878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel std::swap(O1, O2); 193978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 194078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ValuePair VP = ValuePair(O1, O2); 194178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ValuePair VPR = ValuePair(O2, O1); 194278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 194378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // Internal edges are not handled here. 194478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (PrunedTree.count(VP) || PrunedTree.count(VPR)) 194578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel continue; 194678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 194778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Type *Ty1 = O1->getType(), 194878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel *Ty2 = O2->getType(); 194978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Type *VTy = getVecTypeForPair(Ty1, Ty2); 195078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 195178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // Combining vector operations of the same type is also assumed 195278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // folded with other operations. 195386c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (Ty1 == Ty2) { 195486c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel // If both are insert elements, then both can be widened. 1955b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel InsertElementInst *IEO1 = dyn_cast<InsertElementInst>(O1), 1956b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel *IEO2 = dyn_cast<InsertElementInst>(O2); 1957b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel if (IEO1 && IEO2 && isPureIEChain(IEO1) && isPureIEChain(IEO2)) 195886c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel continue; 195986c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel // If both are extract elements, and both have the same input 196086c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel // type, then they can be replaced with a shuffle 196186c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel ExtractElementInst *EIO1 = dyn_cast<ExtractElementInst>(O1), 196286c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel *EIO2 = dyn_cast<ExtractElementInst>(O2); 196386c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (EIO1 && EIO2 && 196486c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel EIO1->getOperand(0)->getType() == 196586c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel EIO2->getOperand(0)->getType()) 196686c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel continue; 196786c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel // If both are a shuffle with equal operand types and only two 196886c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel // unqiue operands, then they can be replaced with a single 196986c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel // shuffle 197086c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel ShuffleVectorInst *SIO1 = dyn_cast<ShuffleVectorInst>(O1), 197186c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel *SIO2 = dyn_cast<ShuffleVectorInst>(O2); 197286c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (SIO1 && SIO2 && 197386c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel SIO1->getOperand(0)->getType() == 197486c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel SIO2->getOperand(0)->getType()) { 197586c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel SmallSet<Value *, 4> SIOps; 197686c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel SIOps.insert(SIO1->getOperand(0)); 197786c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel SIOps.insert(SIO1->getOperand(1)); 197886c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel SIOps.insert(SIO2->getOperand(0)); 197986c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel SIOps.insert(SIO2->getOperand(1)); 198086c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (SIOps.size() <= 2) 198186c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel continue; 198286c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel } 198386c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel } 198478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 198578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel int ESContrib; 198678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // This pair has already been formed. 198778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (IncomingPairs.count(VP)) { 198878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel continue; 198978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } else if (IncomingPairs.count(VPR)) { 199078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib = (int) getInstrCost(Instruction::ShuffleVector, 199178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel VTy, VTy); 1992245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel 1993245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel if (VTy->getVectorNumElements() == 2) 1994245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost( 1995245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel TargetTransformInfo::SK_Reverse, VTy)); 199678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } else if (!Ty1->isVectorTy() && !Ty2->isVectorTy()) { 1997abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth ESContrib = (int) TTI->getVectorInstrCost( 199878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Instruction::InsertElement, VTy, 0); 1999abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth ESContrib += (int) TTI->getVectorInstrCost( 200078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Instruction::InsertElement, VTy, 1); 200178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } else if (!Ty1->isVectorTy()) { 200278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // O1 needs to be inserted into a vector of size O2, and then 200378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // both need to be shuffled together. 2004abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth ESContrib = (int) TTI->getVectorInstrCost( 200578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Instruction::InsertElement, Ty2, 0); 200678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib += (int) getInstrCost(Instruction::ShuffleVector, 200778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel VTy, Ty2); 200878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } else if (!Ty2->isVectorTy()) { 200978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // O2 needs to be inserted into a vector of size O1, and then 201078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // both need to be shuffled together. 2011abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth ESContrib = (int) TTI->getVectorInstrCost( 201278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Instruction::InsertElement, Ty1, 0); 201378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib += (int) getInstrCost(Instruction::ShuffleVector, 201478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel VTy, Ty1); 201578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } else { 201678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Type *TyBig = Ty1, *TySmall = Ty2; 201778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (Ty2->getVectorNumElements() > Ty1->getVectorNumElements()) 201878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel std::swap(TyBig, TySmall); 201978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 202078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib = (int) getInstrCost(Instruction::ShuffleVector, 202178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel VTy, TyBig); 202278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (TyBig != TySmall) 202378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib += (int) getInstrCost(Instruction::ShuffleVector, 202478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel TyBig, TySmall); 202578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 202678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 202778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" 202878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel << *O1 << " <-> " << *O2 << "} = " << 202978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib << "\n"); 203078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel EffSize -= ESContrib; 203178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel IncomingPairs.insert(VP); 203286ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel } 203386ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel } 203465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel } 20354387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel 20364387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel if (!HasNontrivialInsts) { 20374387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel DEBUG(if (DebugPairSelection) dbgs() << 20384387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel "\tNo non-trivial instructions in tree;" 20394387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel " override to zero effective size\n"); 20404387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel EffSize = 0; 20414387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel } 204265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel } else { 204365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(), 204465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel E = PrunedTree.end(); S != E; ++S) 204565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel EffSize += (int) getDepthFactor(S->first); 204665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel } 2047de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2048de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugPairSelection) 2049de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel dbgs() << "BBV: found pruned Tree for pair {" 2050de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << *J->first << " <-> " << *J->second << "} of depth " << 2051de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel MaxDepth << " and size " << PrunedTree.size() << 2052de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " (effective size: " << EffSize << ")\n"); 2053abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth if (((TTI && !UseChainDepthWithTI) || 205478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel MaxDepth >= Config.ReqChainDepth) && 205565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel EffSize > 0 && EffSize > BestEffSize) { 2056de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BestMaxDepth = MaxDepth; 2057de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BestEffSize = EffSize; 2058de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BestTree = PrunedTree; 2059de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2060de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2061de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2062de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2063de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Given the list of candidate pairs, this function selects those 2064de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // that will be fused into vector instructions. 2065de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::choosePairs( 2066de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &CandidatePairs, 2067b1a82589339fed148c12b052d30861a539552f1aHal Finkel DenseSet<ValuePair> &CandidatePairsSet, 206865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel DenseMap<ValuePair, int> &CandidatePairCostSavings, 2069de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 207086ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel DenseSet<ValuePair> &FixedOrderPairs, 207186ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes, 2072de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 207386ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairDeps, 2074de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers, 2075de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *>& ChosenPairs) { 2076bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng bool UseCycleCheck = 2077bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng CandidatePairs.size() <= Config.MaxCandPairsForCycleCheck; 2078de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<ValuePair, ValuePair> PairableInstUserMap; 2079da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel DenseSet<VPPair> PairableInstUserPairSet; 2080de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::vector<Value *>::iterator I = PairableInsts.begin(), 2081de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = PairableInsts.end(); I != E; ++I) { 2082de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // The number of possible pairings for this variable: 2083de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t NumChoices = CandidatePairs.count(*I); 2084de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!NumChoices) continue; 2085de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2086de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair ChoiceRange = CandidatePairs.equal_range(*I); 2087de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2088de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // The best pair to choose and its tree: 208965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel size_t BestMaxDepth = 0; 209065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel int BestEffSize = 0; 2091de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> BestTree; 2092b1a82589339fed148c12b052d30861a539552f1aHal Finkel findBestTreeFor(CandidatePairs, CandidatePairsSet, 2093b1a82589339fed148c12b052d30861a539552f1aHal Finkel CandidatePairCostSavings, 209486ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel PairableInsts, FixedOrderPairs, PairConnectionTypes, 209586ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel ConnectedPairs, ConnectedPairDeps, 2096da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel PairableInstUsers, PairableInstUserMap, 2097da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel PairableInstUserPairSet, ChosenPairs, 2098de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BestTree, BestMaxDepth, BestEffSize, ChoiceRange, 2099de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UseCycleCheck); 2100de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2101de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // A tree has been chosen (or not) at this point. If no tree was 2102de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // chosen, then this instruction, I, cannot be paired (and is no longer 2103de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // considered). 2104de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2105de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (BestTree.size() > 0) 2106de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel dbgs() << "BBV: selected pairs in the best tree for: " 2107de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << *cast<Instruction>(*I) << "\n"); 2108de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2109de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseSet<ValuePair>::iterator S = BestTree.begin(), 2110de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel SE2 = BestTree.end(); S != SE2; ++S) { 2111de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Insert the members of this tree into the list of chosen pairs. 2112de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairs.insert(ValuePair(S->first, S->second)); 2113de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: selected pair: " << *S->first << " <-> " << 2114de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel *S->second << "\n"); 2115de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2116de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Remove all candidate pairs that have values in the chosen tree. 2117de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<Value *, Value *>::iterator K = 2118de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CandidatePairs.begin(); K != CandidatePairs.end();) { 2119de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (K->first == S->first || K->second == S->first || 2120de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K->second == S->second || K->first == S->second) { 2121de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Don't remove the actual pair chosen so that it can be used 2122de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // in subsequent tree selections. 2123b1a82589339fed148c12b052d30861a539552f1aHal Finkel if (!(K->first == S->first && K->second == S->second)) { 2124b1a82589339fed148c12b052d30861a539552f1aHal Finkel CandidatePairsSet.erase(*K); 2125de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CandidatePairs.erase(K++); 2126b1a82589339fed148c12b052d30861a539552f1aHal Finkel } else 2127de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++K; 2128de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 2129de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++K; 2130de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2131de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2132de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2133de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2134de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2135de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: selected " << ChosenPairs.size() << " pairs.\n"); 2136de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2137de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2138de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::string getReplacementName(Instruction *I, bool IsInput, unsigned o, 2139de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned n = 0) { 2140de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!I->hasName()) 2141de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return ""; 2142de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2143de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return (I->getName() + (IsInput ? ".v.i" : ".v.r") + utostr(o) + 2144de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel (n > 0 ? "." + utostr(n) : "")).str(); 2145de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2146de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2147de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns the value that is to be used as the pointer input to the vector 2148de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // instruction that fuses I with J. 2149de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *BBVectorize::getReplacementPointerInput(LLVMContext& Context, 2150202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel Instruction *I, Instruction *J, unsigned o) { 2151de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *IPtr, *JPtr; 215265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace; 2153de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel int64_t OffsetInElmts; 2154282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel 2155202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel // Note: the analysis might fail here, that is why the pair order has 2156282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel // been precomputed (OffsetInElmts must be unused here). 2157de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel (void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, 215865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel IAddressSpace, JAddressSpace, 215993f6f457614299eee3d22f376ab8f42a130f1912Hal Finkel OffsetInElmts, false); 2160de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2161de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // The pointer value is taken to be the one with the lowest offset. 2162202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel Value *VPtr = IPtr; 2163de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 216464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeI = cast<PointerType>(IPtr->getType())->getElementType(); 216564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeJ = cast<PointerType>(JPtr->getType())->getElementType(); 216664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); 2167de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *VArgPtrType = PointerType::get(VArgType, 2168de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cast<PointerType>(IPtr->getType())->getAddressSpace()); 2169de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return new BitCastInst(VPtr, VArgPtrType, getReplacementName(I, true, o), 2170202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel /* insert before */ I); 2171de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2172de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2173de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::fillNewShuffleMask(LLVMContext& Context, Instruction *J, 217464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned MaskOffset, unsigned NumInElem, 217564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned NumInElem1, unsigned IdxOffset, 217664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant*> &Mask) { 217764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned NumElem1 = cast<VectorType>(J->getType())->getNumElements(); 217864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned v = 0; v < NumElem1; ++v) { 2179de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel int m = cast<ShuffleVectorInst>(J)->getMaskValue(v); 2180de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (m < 0) { 2181de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Mask[v+MaskOffset] = UndefValue::get(Type::getInt32Ty(Context)); 2182de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 2183de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned mm = m + (int) IdxOffset; 218464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (m >= (int) NumInElem1) 2185de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel mm += (int) NumInElem; 2186de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2187de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Mask[v+MaskOffset] = 2188de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ConstantInt::get(Type::getInt32Ty(Context), mm); 2189de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2190de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2191de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2192de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2193de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns the value that is to be used as the vector-shuffle mask to the 2194de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // vector instruction that fuses I with J. 2195de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *BBVectorize::getReplacementShuffleMask(LLVMContext& Context, 2196de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J) { 2197de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This is the shuffle mask. We need to append the second 2198de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // mask to the first, and the numbers need to be adjusted. 2199de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 220064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeI = I->getType(); 220164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeJ = J->getType(); 220264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); 220364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 220464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned NumElemI = cast<VectorType>(ArgTypeI)->getNumElements(); 2205de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2206de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Get the total number of elements in the fused vector type. 2207de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // By definition, this must equal the number of elements in 2208de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the final mask. 2209de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned NumElem = cast<VectorType>(VArgType)->getNumElements(); 2210de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Constant*> Mask(NumElem); 2211de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 221264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *OpTypeI = I->getOperand(0)->getType(); 221364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned NumInElemI = cast<VectorType>(OpTypeI)->getNumElements(); 221464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *OpTypeJ = J->getOperand(0)->getType(); 221564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned NumInElemJ = cast<VectorType>(OpTypeJ)->getNumElements(); 221664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 221764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // The fused vector will be: 221864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // ----------------------------------------------------- 221964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // | NumInElemI | NumInElemJ | NumInElemI | NumInElemJ | 222064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // ----------------------------------------------------- 222164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // from which we'll extract NumElem total elements (where the first NumElemI 222264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // of them come from the mask in I and the remainder come from the mask 222364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // in J. 2224de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2225de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // For the mask from the first pair... 222664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel fillNewShuffleMask(Context, I, 0, NumInElemJ, NumInElemI, 222764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 0, Mask); 2228de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2229de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // For the mask from the second pair... 223064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel fillNewShuffleMask(Context, J, NumElemI, NumInElemI, NumInElemJ, 223164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel NumInElemI, Mask); 2232de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2233de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return ConstantVector::get(Mask); 2234de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2235de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 223664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool BBVectorize::expandIEChain(LLVMContext& Context, Instruction *I, 223764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *J, unsigned o, Value *&LOp, 223864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElemL, 223964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeL, Type *ArgTypeH, 224072465ea23d010507d3746adc126d719005981e05Hal Finkel bool IBeforeJ, unsigned IdxOff) { 224164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool ExpandedIEChain = false; 224264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (InsertElementInst *LIE = dyn_cast<InsertElementInst>(LOp)) { 224364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // If we have a pure insertelement chain, then this can be rewritten 224464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // into a chain that directly builds the larger type. 2245b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel if (isPureIEChain(LIE)) { 224664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel SmallVector<Value *, 8> VectElemts(numElemL, 224764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel UndefValue::get(ArgTypeL->getScalarType())); 224864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel InsertElementInst *LIENext = LIE; 224964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel do { 225064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned Idx = 225164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel cast<ConstantInt>(LIENext->getOperand(2))->getSExtValue(); 225264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel VectElemts[Idx] = LIENext->getOperand(1); 225364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } while ((LIENext = 225464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel dyn_cast<InsertElementInst>(LIENext->getOperand(0)))); 225564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 225664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel LIENext = 0; 225764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *LIEPrev = UndefValue::get(ArgTypeH); 225864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned i = 0; i < numElemL; ++i) { 225964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (isa<UndefValue>(VectElemts[i])) continue; 226064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel LIENext = InsertElementInst::Create(LIEPrev, VectElemts[i], 226164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantInt::get(Type::getInt32Ty(Context), 226264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel i + IdxOff), 226372465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 226472465ea23d010507d3746adc126d719005981e05Hal Finkel true, o, i+1)); 226572465ea23d010507d3746adc126d719005981e05Hal Finkel LIENext->insertBefore(IBeforeJ ? J : I); 226664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel LIEPrev = LIENext; 226764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 226864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 226964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel LOp = LIENext ? (Value*) LIENext : UndefValue::get(ArgTypeH); 227064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ExpandedIEChain = true; 227164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 227264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 227364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 227464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return ExpandedIEChain; 227564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 227664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 2277de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns the value to be used as the specified operand of the vector 2278de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // instruction that fuses I with J. 2279de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *BBVectorize::getReplacementInput(LLVMContext& Context, Instruction *I, 228072465ea23d010507d3746adc126d719005981e05Hal Finkel Instruction *J, unsigned o, bool IBeforeJ) { 2281de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0); 2282de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), 1); 2283de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 228464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // Compute the fused vector type for this operand 228564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeI = I->getOperand(o)->getType(); 228664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeJ = J->getOperand(o)->getType(); 228764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel VectorType *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); 2288de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2289de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *L = I, *H = J; 229064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeL = ArgTypeI, *ArgTypeH = ArgTypeJ; 2291de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 229264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElemL; 229364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (ArgTypeL->isVectorTy()) 229464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemL = cast<VectorType>(ArgTypeL)->getNumElements(); 229564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else 229664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemL = 1; 2297de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 229864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElemH; 229964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (ArgTypeH->isVectorTy()) 230064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemH = cast<VectorType>(ArgTypeH)->getNumElements(); 230164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else 230264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemH = 1; 230364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 230464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *LOp = L->getOperand(o); 230564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *HOp = H->getOperand(o); 230664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElem = VArgType->getNumElements(); 230764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 230864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // First, we check if we can reuse the "original" vector outputs (if these 230964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // exist). We might need a shuffle. 231064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ExtractElementInst *LEE = dyn_cast<ExtractElementInst>(LOp); 231164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ExtractElementInst *HEE = dyn_cast<ExtractElementInst>(HOp); 231264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ShuffleVectorInst *LSV = dyn_cast<ShuffleVectorInst>(LOp); 231364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ShuffleVectorInst *HSV = dyn_cast<ShuffleVectorInst>(HOp); 231464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 231564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // FIXME: If we're fusing shuffle instructions, then we can't apply this 231664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // optimization. The input vectors to the shuffle might be a different 231764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // length from the shuffle outputs. Unfortunately, the replacement 231864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // shuffle mask has already been formed, and the mask entries are sensitive 231964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // to the sizes of the inputs. 232064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool IsSizeChangeShuffle = 232164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel isa<ShuffleVectorInst>(L) && 232264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel (LOp->getType() != L->getType() || HOp->getType() != H->getType()); 232364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 232464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if ((LEE || LSV) && (HEE || HSV) && !IsSizeChangeShuffle) { 232564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // We can have at most two unique vector inputs. 232664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool CanUseInputs = true; 232764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *I1, *I2 = 0; 232864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (LEE) { 232964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I1 = LEE->getOperand(0); 233064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 233164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I1 = LSV->getOperand(0); 233264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2 = LSV->getOperand(1); 233364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (I2 == I1 || isa<UndefValue>(I2)) 233464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2 = 0; 233564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 233664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 233764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (HEE) { 233864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *I3 = HEE->getOperand(0); 233964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!I2 && I3 != I1) 234064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2 = I3; 234164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else if (I3 != I1 && I3 != I2) 234264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel CanUseInputs = false; 234364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 234464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *I3 = HSV->getOperand(0); 234564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!I2 && I3 != I1) 234664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2 = I3; 234764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else if (I3 != I1 && I3 != I2) 234864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel CanUseInputs = false; 234964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 235064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (CanUseInputs) { 235164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *I4 = HSV->getOperand(1); 235264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!isa<UndefValue>(I4)) { 235364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!I2 && I4 != I1) 235464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2 = I4; 235564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else if (I4 != I1 && I4 != I2) 235664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel CanUseInputs = false; 235764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 235864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 235964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 236064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 236164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (CanUseInputs) { 236264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned LOpElem = 236364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel cast<VectorType>(cast<Instruction>(LOp)->getOperand(0)->getType()) 236464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ->getNumElements(); 236564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned HOpElem = 236664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel cast<VectorType>(cast<Instruction>(HOp)->getOperand(0)->getType()) 236764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ->getNumElements(); 236864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 236964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // We have one or two input vectors. We need to map each index of the 237064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // operands to the index of the original vector. 237164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel SmallVector<std::pair<int, int>, 8> II(numElem); 237264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned i = 0; i < numElemL; ++i) { 237364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel int Idx, INum; 237464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (LEE) { 237564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Idx = 237664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel cast<ConstantInt>(LEE->getOperand(1))->getSExtValue(); 237764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel INum = LEE->getOperand(0) == I1 ? 0 : 1; 237864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 237964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Idx = LSV->getMaskValue(i); 238064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (Idx < (int) LOpElem) { 238164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel INum = LSV->getOperand(0) == I1 ? 0 : 1; 238264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 238364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Idx -= LOpElem; 238464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel INum = LSV->getOperand(1) == I1 ? 0 : 1; 238564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 238664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 238764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 238864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel II[i] = std::pair<int, int>(Idx, INum); 238964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 239064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned i = 0; i < numElemH; ++i) { 239164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel int Idx, INum; 239264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (HEE) { 239364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Idx = 239464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel cast<ConstantInt>(HEE->getOperand(1))->getSExtValue(); 239564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel INum = HEE->getOperand(0) == I1 ? 0 : 1; 239664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 239764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Idx = HSV->getMaskValue(i); 239864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (Idx < (int) HOpElem) { 239964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel INum = HSV->getOperand(0) == I1 ? 0 : 1; 240064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 240164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Idx -= HOpElem; 240264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel INum = HSV->getOperand(1) == I1 ? 0 : 1; 240364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 240464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 240564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 240664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel II[i + numElemL] = std::pair<int, int>(Idx, INum); 240764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 240864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 240964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // We now have an array which tells us from which index of which 241064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // input vector each element of the operand comes. 241164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel VectorType *I1T = cast<VectorType>(I1->getType()); 241264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned I1Elem = I1T->getNumElements(); 241364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 241464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!I2) { 241564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // In this case there is only one underlying vector input. Check for 241664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // the trivial case where we can use the input directly. 241764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (I1Elem == numElem) { 241864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool ElemInOrder = true; 241964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned i = 0; i < numElem; ++i) { 242064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (II[i].first != (int) i && II[i].first != -1) { 242164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ElemInOrder = false; 242264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel break; 242364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 242464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 242564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 242664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (ElemInOrder) 242764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return I1; 242864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 242964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 243064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // A shuffle is needed. 243164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant *> Mask(numElem); 243264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned i = 0; i < numElem; ++i) { 243364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel int Idx = II[i].first; 243464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (Idx == -1) 243564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[i] = UndefValue::get(Type::getInt32Ty(Context)); 243664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else 243764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[i] = ConstantInt::get(Type::getInt32Ty(Context), Idx); 243864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 243964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 244064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *S = 244164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel new ShuffleVectorInst(I1, UndefValue::get(I1T), 244264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantVector::get(Mask), 244372465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 244472465ea23d010507d3746adc126d719005981e05Hal Finkel true, o)); 244572465ea23d010507d3746adc126d719005981e05Hal Finkel S->insertBefore(IBeforeJ ? J : I); 244664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return S; 244764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 244864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 244964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel VectorType *I2T = cast<VectorType>(I2->getType()); 245064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned I2Elem = I2T->getNumElements(); 245164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 245264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // This input comes from two distinct vectors. The first step is to 245364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // make sure that both vectors are the same length. If not, the 245464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // smaller one will need to grow before they can be shuffled together. 245564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (I1Elem < I2Elem) { 245664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant *> Mask(I2Elem); 245764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned v = 0; 245864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < I1Elem; ++v) 245964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); 246064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < I2Elem; ++v) 246164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); 246264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 246364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *NewI1 = 246464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel new ShuffleVectorInst(I1, UndefValue::get(I1T), 246564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantVector::get(Mask), 246672465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 246772465ea23d010507d3746adc126d719005981e05Hal Finkel true, o, 1)); 246872465ea23d010507d3746adc126d719005981e05Hal Finkel NewI1->insertBefore(IBeforeJ ? J : I); 246964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I1 = NewI1; 247064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I1T = I2T; 247164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I1Elem = I2Elem; 247264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else if (I1Elem > I2Elem) { 247364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant *> Mask(I1Elem); 247464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned v = 0; 247564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < I2Elem; ++v) 247664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); 247764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < I1Elem; ++v) 247864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); 247964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 248064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *NewI2 = 248164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel new ShuffleVectorInst(I2, UndefValue::get(I2T), 248264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantVector::get(Mask), 248372465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 248472465ea23d010507d3746adc126d719005981e05Hal Finkel true, o, 1)); 248572465ea23d010507d3746adc126d719005981e05Hal Finkel NewI2->insertBefore(IBeforeJ ? J : I); 248664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2 = NewI2; 248764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2T = I1T; 248864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2Elem = I1Elem; 248964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 249064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 249164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // Now that both I1 and I2 are the same length we can shuffle them 249264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // together (and use the result). 249364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant *> Mask(numElem); 249464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned v = 0; v < numElem; ++v) { 249564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (II[v].first == -1) { 249664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); 249764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 249864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel int Idx = II[v].first + II[v].second * I1Elem; 249964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), Idx); 250064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 250164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 250264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 250364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *NewOp = 250464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel new ShuffleVectorInst(I1, I2, ConstantVector::get(Mask), 250572465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, true, o)); 250672465ea23d010507d3746adc126d719005981e05Hal Finkel NewOp->insertBefore(IBeforeJ ? J : I); 250764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return NewOp; 250864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 2509de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2510de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 251164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgType = ArgTypeL; 251264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (numElemL < numElemH) { 251364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (numElemL == 1 && expandIEChain(Context, I, J, o, HOp, numElemH, 251472465ea23d010507d3746adc126d719005981e05Hal Finkel ArgTypeL, VArgType, IBeforeJ, 1)) { 251564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // This is another short-circuit case: we're combining a scalar into 251664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // a vector that is formed by an IE chain. We've just expanded the IE 251764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // chain, now insert the scalar and we're done. 251864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 251964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *S = InsertElementInst::Create(HOp, LOp, CV0, 252072465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, true, o)); 252172465ea23d010507d3746adc126d719005981e05Hal Finkel S->insertBefore(IBeforeJ ? J : I); 252264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return S; 252364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else if (!expandIEChain(Context, I, J, o, LOp, numElemL, ArgTypeL, 252472465ea23d010507d3746adc126d719005981e05Hal Finkel ArgTypeH, IBeforeJ)) { 252564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // The two vector inputs to the shuffle must be the same length, 252664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // so extend the smaller vector to be the same length as the larger one. 252764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *NLOp; 252864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (numElemL > 1) { 252964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 253064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant *> Mask(numElemH); 253164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned v = 0; 253264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < numElemL; ++v) 253364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); 253464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < numElemH; ++v) 253564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); 253664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 253764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel NLOp = new ShuffleVectorInst(LOp, UndefValue::get(ArgTypeL), 253864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantVector::get(Mask), 253972465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 254072465ea23d010507d3746adc126d719005981e05Hal Finkel true, o, 1)); 254164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 254264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel NLOp = InsertElementInst::Create(UndefValue::get(ArgTypeH), LOp, CV0, 254372465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 254472465ea23d010507d3746adc126d719005981e05Hal Finkel true, o, 1)); 254564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 254664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 254772465ea23d010507d3746adc126d719005981e05Hal Finkel NLOp->insertBefore(IBeforeJ ? J : I); 254864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel LOp = NLOp; 254964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 255064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 255164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ArgType = ArgTypeH; 255264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else if (numElemL > numElemH) { 255364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (numElemH == 1 && expandIEChain(Context, I, J, o, LOp, numElemL, 255472465ea23d010507d3746adc126d719005981e05Hal Finkel ArgTypeH, VArgType, IBeforeJ)) { 255564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *S = 255664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel InsertElementInst::Create(LOp, HOp, 255764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantInt::get(Type::getInt32Ty(Context), 255864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemL), 255972465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 256072465ea23d010507d3746adc126d719005981e05Hal Finkel true, o)); 256172465ea23d010507d3746adc126d719005981e05Hal Finkel S->insertBefore(IBeforeJ ? J : I); 256264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return S; 256364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else if (!expandIEChain(Context, I, J, o, HOp, numElemH, ArgTypeH, 256472465ea23d010507d3746adc126d719005981e05Hal Finkel ArgTypeL, IBeforeJ)) { 256564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *NHOp; 256664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (numElemH > 1) { 256764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant *> Mask(numElemL); 256864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned v = 0; 256964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < numElemH; ++v) 257064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); 257164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < numElemL; ++v) 257264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); 257364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 257464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel NHOp = new ShuffleVectorInst(HOp, UndefValue::get(ArgTypeH), 257564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantVector::get(Mask), 257672465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 257772465ea23d010507d3746adc126d719005981e05Hal Finkel true, o, 1)); 257864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 257964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel NHOp = InsertElementInst::Create(UndefValue::get(ArgTypeL), HOp, CV0, 258072465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 258172465ea23d010507d3746adc126d719005981e05Hal Finkel true, o, 1)); 258264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 258364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 258472465ea23d010507d3746adc126d719005981e05Hal Finkel NHOp->insertBefore(IBeforeJ ? J : I); 258564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel HOp = NHOp; 2586de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 258764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 2588de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 258964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (ArgType->isVectorTy()) { 259064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElem = cast<VectorType>(VArgType)->getNumElements(); 259164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant*> Mask(numElem); 259264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned v = 0; v < numElem; ++v) { 259364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned Idx = v; 259464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // If the low vector was expanded, we need to skip the extra 259564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // undefined entries. 259664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (v >= numElemL && numElemH > numElemL) 259764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Idx += (numElemH - numElemL); 259864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), Idx); 259964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 2600de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 260164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *BV = new ShuffleVectorInst(LOp, HOp, 260272465ea23d010507d3746adc126d719005981e05Hal Finkel ConstantVector::get(Mask), 260372465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, true, o)); 260472465ea23d010507d3746adc126d719005981e05Hal Finkel BV->insertBefore(IBeforeJ ? J : I); 2605de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return BV; 2606de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2607de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2608de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *BV1 = InsertElementInst::Create( 260964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel UndefValue::get(VArgType), LOp, CV0, 261072465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 261172465ea23d010507d3746adc126d719005981e05Hal Finkel true, o, 1)); 261272465ea23d010507d3746adc126d719005981e05Hal Finkel BV1->insertBefore(IBeforeJ ? J : I); 261364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *BV2 = InsertElementInst::Create(BV1, HOp, CV1, 261472465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 261572465ea23d010507d3746adc126d719005981e05Hal Finkel true, o, 2)); 261672465ea23d010507d3746adc126d719005981e05Hal Finkel BV2->insertBefore(IBeforeJ ? J : I); 2617de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return BV2; 2618de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2619de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2620de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function creates an array of values that will be used as the inputs 2621de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // to the vector instruction that fuses I with J. 2622de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::getReplacementInputsForPair(LLVMContext& Context, 2623de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J, 262472465ea23d010507d3746adc126d719005981e05Hal Finkel SmallVector<Value *, 3> &ReplacedOperands, 262572465ea23d010507d3746adc126d719005981e05Hal Finkel bool IBeforeJ) { 2626de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned NumOperands = I->getNumOperands(); 2627de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2628de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) { 2629de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Iterate backward so that we look at the store pointer 2630de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // first and know whether or not we need to flip the inputs. 2631de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2632de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (isa<LoadInst>(I) || (o == 1 && isa<StoreInst>(I))) { 2633de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This is the pointer for a load/store instruction. 2634202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel ReplacedOperands[o] = getReplacementPointerInput(Context, I, J, o); 2635de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 26366173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel } else if (isa<CallInst>(I)) { 2637de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Function *F = cast<CallInst>(I)->getCalledFunction(); 2638a77728415857196035c0090f7b2749d7971811a2Hal Finkel Intrinsic::ID IID = (Intrinsic::ID) F->getIntrinsicID(); 26396173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel if (o == NumOperands-1) { 26406173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel BasicBlock &BB = *I->getParent(); 2641bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng 26426173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel Module *M = BB.getParent()->getParent(); 264364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeI = I->getType(); 264464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeJ = J->getType(); 264564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); 2646bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng 2647a77728415857196035c0090f7b2749d7971811a2Hal Finkel ReplacedOperands[o] = Intrinsic::getDeclaration(M, IID, VArgType); 26486173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel continue; 26496173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel } else if (IID == Intrinsic::powi && o == 1) { 26506173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel // The second argument of powi is a single integer and we've already 26516173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel // checked that both arguments are equal. As a result, we just keep 26526173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel // I's second argument. 26536173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel ReplacedOperands[o] = I->getOperand(o); 26546173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel continue; 26556173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel } 2656de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else if (isa<ShuffleVectorInst>(I) && o == NumOperands-1) { 2657de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ReplacedOperands[o] = getReplacementShuffleMask(Context, I, J); 2658de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 2659de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2660de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 266172465ea23d010507d3746adc126d719005981e05Hal Finkel ReplacedOperands[o] = getReplacementInput(Context, I, J, o, IBeforeJ); 2662de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2663de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2664de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2665de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function creates two values that represent the outputs of the 2666de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // original I and J instructions. These are generally vector shuffles 2667de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // or extracts. In many cases, these will end up being unused and, thus, 2668de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // eliminated by later passes. 2669de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::replaceOutputsOfPair(LLVMContext& Context, Instruction *I, 2670de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J, Instruction *K, 2671de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *&InsertionPt, 2672202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel Instruction *&K1, Instruction *&K2) { 2673de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (isa<StoreInst>(I)) { 2674de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AA->replaceWithNewValue(I, K); 2675de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AA->replaceWithNewValue(J, K); 2676de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 2677de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *IType = I->getType(); 267864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *JType = J->getType(); 267964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 268064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel VectorType *VType = getVecTypeForPair(IType, JType); 268164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElem = VType->getNumElements(); 268264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 268364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElemI, numElemJ; 268464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (IType->isVectorTy()) 268564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemI = cast<VectorType>(IType)->getNumElements(); 268664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else 268764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemI = 1; 268864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 268964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (JType->isVectorTy()) 269064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemJ = cast<VectorType>(JType)->getNumElements(); 269164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else 269264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemJ = 1; 2693de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2694de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (IType->isVectorTy()) { 269564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant*> Mask1(numElemI), Mask2(numElemI); 269664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned v = 0; v < numElemI; ++v) { 269764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v); 269864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemJ+v); 269964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 2700de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 270164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel K1 = new ShuffleVectorInst(K, UndefValue::get(VType), 2702202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel ConstantVector::get( Mask1), 270364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getReplacementName(K, false, 1)); 2704de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 270564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0); 2706202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel K1 = ExtractElementInst::Create(K, CV0, 2707de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel getReplacementName(K, false, 1)); 270864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 270964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 271064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (JType->isVectorTy()) { 271164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant*> Mask1(numElemJ), Mask2(numElemJ); 271264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned v = 0; v < numElemJ; ++v) { 271364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v); 271464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemI+v); 271564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 271664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 271764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel K2 = new ShuffleVectorInst(K, UndefValue::get(VType), 2718202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel ConstantVector::get( Mask2), 271964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getReplacementName(K, false, 2)); 272064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 272164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem-1); 2722202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel K2 = ExtractElementInst::Create(K, CV1, 2723de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel getReplacementName(K, false, 2)); 2724de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2725de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2726de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K1->insertAfter(K); 2727de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K2->insertAfter(K1); 2728de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel InsertionPt = K2; 2729de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2730de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2731de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2732de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Move all uses of the function I (including pairing-induced uses) after J. 2733de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::canMoveUsesOfIAfterJ(BasicBlock &BB, 27342f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel DenseSet<ValuePair> &LoadMoveSetPairs, 2735de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J) { 2736de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Skip to the first instruction past I. 2737ded681d2725907c7de9db53d59cee0c51fad6fcbBenjamin Kramer BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I)); 2738de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2739de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> Users; 2740de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker WriteSet(*AA); 2741de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (; cast<Instruction>(L) != J; ++L) 27422f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel (void) trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs); 2743de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2744de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel assert(cast<Instruction>(L) == J && 2745de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel "Tracking has not proceeded far enough to check for dependencies"); 2746de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // If J is now in the use set of I, then trackUsesOfI will return true 2747de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // and we have a dependency cycle (and the fusing operation must abort). 27482f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel return !trackUsesOfI(Users, WriteSet, I, J, true, &LoadMoveSetPairs); 2749de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2750de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2751de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Move all uses of the function I (including pairing-induced uses) after J. 2752de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::moveUsesOfIAfterJ(BasicBlock &BB, 27532f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel DenseSet<ValuePair> &LoadMoveSetPairs, 2754de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *&InsertionPt, 2755de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J) { 2756de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Skip to the first instruction past I. 2757ded681d2725907c7de9db53d59cee0c51fad6fcbBenjamin Kramer BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I)); 2758de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2759de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> Users; 2760de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker WriteSet(*AA); 2761de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (; cast<Instruction>(L) != J;) { 27622f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel if (trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs)) { 2763de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Move this instruction 2764de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *InstToMove = L; ++L; 2765de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2766de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: moving: " << *InstToMove << 2767de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " to after " << *InsertionPt << "\n"); 2768de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel InstToMove->removeFromParent(); 2769de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel InstToMove->insertAfter(InsertionPt); 2770de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel InsertionPt = InstToMove; 2771de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 2772de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++L; 2773de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2774de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2775de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2776de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2777de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Collect all load instruction that are in the move set of a given first 2778de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pair member. These loads depend on the first instruction, I, and so need 2779de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // to be moved after J (the second instruction) when the pair is fused. 2780de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::collectPairLoadMoveSet(BasicBlock &BB, 2781de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 2782de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> &LoadMoveSet, 27832f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel DenseSet<ValuePair> &LoadMoveSetPairs, 2784de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I) { 2785de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Skip to the first instruction past I. 2786ded681d2725907c7de9db53d59cee0c51fad6fcbBenjamin Kramer BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I)); 2787de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2788de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> Users; 2789de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker WriteSet(*AA); 2790de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2791de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Note: We cannot end the loop when we reach J because J could be moved 2792de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // farther down the use chain by another instruction pairing. Also, J 2793de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // could be before I if this is an inverted input. 2794de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (BasicBlock::iterator E = BB.end(); cast<Instruction>(L) != E; ++L) { 2795de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (trackUsesOfI(Users, WriteSet, I, L)) { 27962f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel if (L->mayReadFromMemory()) { 2797de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel LoadMoveSet.insert(ValuePair(L, I)); 27982f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel LoadMoveSetPairs.insert(ValuePair(L, I)); 27992f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel } 2800de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2801de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2802de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2803de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2804de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // In cases where both load/stores and the computation of their pointers 2805de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // are chosen for vectorization, we can end up in a situation where the 2806de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // aliasing analysis starts returning different query results as the 2807de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // process of fusing instruction pairs continues. Because the algorithm 2808de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // relies on finding the same use trees here as were found earlier, we'll 2809de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // need to precompute the necessary aliasing information here and then 2810de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // manually update it during the fusion process. 2811de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::collectLoadMoveSet(BasicBlock &BB, 2812de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 2813de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 28142f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel std::multimap<Value *, Value *> &LoadMoveSet, 28152f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel DenseSet<ValuePair> &LoadMoveSetPairs) { 2816de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::vector<Value *>::iterator PI = PairableInsts.begin(), 2817de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PIE = PairableInsts.end(); PI != PIE; ++PI) { 2818de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(*PI); 2819de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (P == ChosenPairs.end()) continue; 2820de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2821de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I = cast<Instruction>(P->first); 28222f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel collectPairLoadMoveSet(BB, ChosenPairs, LoadMoveSet, 28232f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel LoadMoveSetPairs, I); 2824de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2825de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2826de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2827ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel // When the first instruction in each pair is cloned, it will inherit its 2828ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel // parent's metadata. This metadata must be combined with that of the other 2829ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel // instruction in a safe way. 2830ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel void BBVectorize::combineMetadata(Instruction *K, const Instruction *J) { 2831ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel SmallVector<std::pair<unsigned, MDNode*>, 4> Metadata; 2832ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel K->getAllMetadataOtherThanDebugLoc(Metadata); 2833ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel for (unsigned i = 0, n = Metadata.size(); i < n; ++i) { 2834ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel unsigned Kind = Metadata[i].first; 2835ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel MDNode *JMD = J->getMetadata(Kind); 2836ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel MDNode *KMD = Metadata[i].second; 2837ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel 2838ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel switch (Kind) { 2839ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel default: 2840ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel K->setMetadata(Kind, 0); // Remove unknown metadata 2841ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel break; 2842ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel case LLVMContext::MD_tbaa: 2843ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel K->setMetadata(Kind, MDNode::getMostGenericTBAA(JMD, KMD)); 2844ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel break; 2845ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel case LLVMContext::MD_fpmath: 2846ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel K->setMetadata(Kind, MDNode::getMostGenericFPMath(JMD, KMD)); 2847ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel break; 2848ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel } 2849ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel } 2850ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel } 2851ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel 2852de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function fuses the chosen instruction pairs into vector instructions, 2853de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // taking care preserve any needed scalar outputs and, then, it reorders the 2854de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // remaining instructions as needed (users of the first member of the pair 2855de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // need to be moved to after the location of the second member of the pair 2856de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // because the vector instruction is inserted in the location of the pair's 2857de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // second member). 2858de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::fuseChosenPairs(BasicBlock &BB, 2859de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 2860a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 286172465ea23d010507d3746adc126d719005981e05Hal Finkel DenseSet<ValuePair> &FixedOrderPairs, 286272465ea23d010507d3746adc126d719005981e05Hal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes, 286372465ea23d010507d3746adc126d719005981e05Hal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairs, 286472465ea23d010507d3746adc126d719005981e05Hal Finkel std::multimap<ValuePair, ValuePair> &ConnectedPairDeps) { 2865de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel LLVMContext& Context = BB.getContext(); 2866de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2867de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // During the vectorization process, the order of the pairs to be fused 2868de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // could be flipped. So we'll add each pair, flipped, into the ChosenPairs 2869de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // list. After a pair is fused, the flipped pair is removed from the list. 287072465ea23d010507d3746adc126d719005981e05Hal Finkel DenseSet<ValuePair> FlippedPairs; 2871de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseMap<Value *, Value *>::iterator P = ChosenPairs.begin(), 2872de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = ChosenPairs.end(); P != E; ++P) 287372465ea23d010507d3746adc126d719005981e05Hal Finkel FlippedPairs.insert(ValuePair(P->second, P->first)); 287472465ea23d010507d3746adc126d719005981e05Hal Finkel for (DenseSet<ValuePair>::iterator P = FlippedPairs.begin(), 2875de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = FlippedPairs.end(); P != E; ++P) 2876de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairs.insert(*P); 2877de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2878de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::multimap<Value *, Value *> LoadMoveSet; 28792f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel DenseSet<ValuePair> LoadMoveSetPairs; 28802f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel collectLoadMoveSet(BB, PairableInsts, ChosenPairs, 28812f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel LoadMoveSet, LoadMoveSetPairs); 2882de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2883de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n"); 2884de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2885de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) { 2886de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(PI); 2887de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (P == ChosenPairs.end()) { 2888de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++PI; 2889de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 2890de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2891de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2892de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (getDepthFactor(P->first) == 0) { 2893de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // These instructions are not really fused, but are tracked as though 2894de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // they are. Any case in which it would be interesting to fuse them 2895de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // will be taken care of by InstCombine. 2896de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel --NumFusedOps; 2897de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++PI; 2898de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 2899de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2900de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2901de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I = cast<Instruction>(P->first), 2902de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel *J = cast<Instruction>(P->second); 2903de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2904de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: fusing: " << *I << 2905de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " <-> " << *J << "\n"); 2906de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2907de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Remove the pair and flipped pair from the list. 2908de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *>::iterator FP = ChosenPairs.find(P->second); 2909de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel assert(FP != ChosenPairs.end() && "Flipped pair not found in list"); 2910de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairs.erase(FP); 2911de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairs.erase(P); 2912de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 29132f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel if (!canMoveUsesOfIAfterJ(BB, LoadMoveSetPairs, I, J)) { 2914de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: fusion of: " << *I << 2915de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " <-> " << *J << 2916de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " aborted because of non-trivial dependency cycle\n"); 2917de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel --NumFusedOps; 2918de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++PI; 2919de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 2920de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2921de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2922a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel // If the pair must have the other order, then flip it. 2923a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel bool FlipPairOrder = FixedOrderPairs.count(ValuePair(J, I)); 292472465ea23d010507d3746adc126d719005981e05Hal Finkel if (!FlipPairOrder && !FixedOrderPairs.count(ValuePair(I, J))) { 292572465ea23d010507d3746adc126d719005981e05Hal Finkel // This pair does not have a fixed order, and so we might want to 292672465ea23d010507d3746adc126d719005981e05Hal Finkel // flip it if that will yield fewer shuffles. We count the number 292772465ea23d010507d3746adc126d719005981e05Hal Finkel // of dependencies connected via swaps, and those directly connected, 292872465ea23d010507d3746adc126d719005981e05Hal Finkel // and flip the order if the number of swaps is greater. 292972465ea23d010507d3746adc126d719005981e05Hal Finkel bool OrigOrder = true; 293072465ea23d010507d3746adc126d719005981e05Hal Finkel VPPIteratorPair IP = ConnectedPairDeps.equal_range(ValuePair(I, J)); 293172465ea23d010507d3746adc126d719005981e05Hal Finkel if (IP.first == ConnectedPairDeps.end()) { 293272465ea23d010507d3746adc126d719005981e05Hal Finkel IP = ConnectedPairDeps.equal_range(ValuePair(J, I)); 293372465ea23d010507d3746adc126d719005981e05Hal Finkel OrigOrder = false; 293472465ea23d010507d3746adc126d719005981e05Hal Finkel } 293572465ea23d010507d3746adc126d719005981e05Hal Finkel 293672465ea23d010507d3746adc126d719005981e05Hal Finkel if (IP.first != ConnectedPairDeps.end()) { 293772465ea23d010507d3746adc126d719005981e05Hal Finkel unsigned NumDepsDirect = 0, NumDepsSwap = 0; 293872465ea23d010507d3746adc126d719005981e05Hal Finkel for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first; 293972465ea23d010507d3746adc126d719005981e05Hal Finkel Q != IP.second; ++Q) { 294072465ea23d010507d3746adc126d719005981e05Hal Finkel DenseMap<VPPair, unsigned>::iterator R = 294172465ea23d010507d3746adc126d719005981e05Hal Finkel PairConnectionTypes.find(VPPair(Q->second, Q->first)); 294272465ea23d010507d3746adc126d719005981e05Hal Finkel assert(R != PairConnectionTypes.end() && 294372465ea23d010507d3746adc126d719005981e05Hal Finkel "Cannot find pair connection type"); 294472465ea23d010507d3746adc126d719005981e05Hal Finkel if (R->second == PairConnectionDirect) 294572465ea23d010507d3746adc126d719005981e05Hal Finkel ++NumDepsDirect; 294672465ea23d010507d3746adc126d719005981e05Hal Finkel else if (R->second == PairConnectionSwap) 294772465ea23d010507d3746adc126d719005981e05Hal Finkel ++NumDepsSwap; 294872465ea23d010507d3746adc126d719005981e05Hal Finkel } 294972465ea23d010507d3746adc126d719005981e05Hal Finkel 295072465ea23d010507d3746adc126d719005981e05Hal Finkel if (!OrigOrder) 295172465ea23d010507d3746adc126d719005981e05Hal Finkel std::swap(NumDepsDirect, NumDepsSwap); 295272465ea23d010507d3746adc126d719005981e05Hal Finkel 295372465ea23d010507d3746adc126d719005981e05Hal Finkel if (NumDepsSwap > NumDepsDirect) { 295472465ea23d010507d3746adc126d719005981e05Hal Finkel FlipPairOrder = true; 295572465ea23d010507d3746adc126d719005981e05Hal Finkel DEBUG(dbgs() << "BBV: reordering pair: " << *I << 295672465ea23d010507d3746adc126d719005981e05Hal Finkel " <-> " << *J << "\n"); 295772465ea23d010507d3746adc126d719005981e05Hal Finkel } 295872465ea23d010507d3746adc126d719005981e05Hal Finkel } 295972465ea23d010507d3746adc126d719005981e05Hal Finkel } 2960282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel 2961202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel Instruction *L = I, *H = J; 2962a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel if (FlipPairOrder) 2963202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel std::swap(H, L); 2964202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel 296572465ea23d010507d3746adc126d719005981e05Hal Finkel // If the pair being fused uses the opposite order from that in the pair 296672465ea23d010507d3746adc126d719005981e05Hal Finkel // connection map, then we need to flip the types. 296772465ea23d010507d3746adc126d719005981e05Hal Finkel VPPIteratorPair IP = ConnectedPairs.equal_range(ValuePair(H, L)); 296872465ea23d010507d3746adc126d719005981e05Hal Finkel for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first; 296972465ea23d010507d3746adc126d719005981e05Hal Finkel Q != IP.second; ++Q) { 297072465ea23d010507d3746adc126d719005981e05Hal Finkel DenseMap<VPPair, unsigned>::iterator R = PairConnectionTypes.find(*Q); 297172465ea23d010507d3746adc126d719005981e05Hal Finkel assert(R != PairConnectionTypes.end() && 297272465ea23d010507d3746adc126d719005981e05Hal Finkel "Cannot find pair connection type"); 297372465ea23d010507d3746adc126d719005981e05Hal Finkel if (R->second == PairConnectionDirect) 297472465ea23d010507d3746adc126d719005981e05Hal Finkel R->second = PairConnectionSwap; 297572465ea23d010507d3746adc126d719005981e05Hal Finkel else if (R->second == PairConnectionSwap) 297672465ea23d010507d3746adc126d719005981e05Hal Finkel R->second = PairConnectionDirect; 297772465ea23d010507d3746adc126d719005981e05Hal Finkel } 297872465ea23d010507d3746adc126d719005981e05Hal Finkel 297972465ea23d010507d3746adc126d719005981e05Hal Finkel bool LBeforeH = !FlipPairOrder; 2980de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned NumOperands = I->getNumOperands(); 2981de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel SmallVector<Value *, 3> ReplacedOperands(NumOperands); 298272465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementInputsForPair(Context, L, H, ReplacedOperands, 298372465ea23d010507d3746adc126d719005981e05Hal Finkel LBeforeH); 2984de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2985de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Make a copy of the original operation, change its type to the vector 2986de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // type and replace its operands with the vector operands. 298772465ea23d010507d3746adc126d719005981e05Hal Finkel Instruction *K = L->clone(); 298872465ea23d010507d3746adc126d719005981e05Hal Finkel if (L->hasName()) 298972465ea23d010507d3746adc126d719005981e05Hal Finkel K->takeName(L); 299072465ea23d010507d3746adc126d719005981e05Hal Finkel else if (H->hasName()) 299172465ea23d010507d3746adc126d719005981e05Hal Finkel K->takeName(H); 2992de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2993de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!isa<StoreInst>(K)) 2994202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel K->mutateType(getVecTypeForPair(L->getType(), H->getType())); 2995de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 299672465ea23d010507d3746adc126d719005981e05Hal Finkel combineMetadata(K, H); 2997430b9079c614cd3f45015a6516590d33742cc802Hal Finkel K->intersectOptionalDataWith(H); 2998ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel 2999de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (unsigned o = 0; o < NumOperands; ++o) 3000de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K->setOperand(o, ReplacedOperands[o]); 3001de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 3002de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K->insertAfter(J); 3003de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 3004de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Instruction insertion point: 3005de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *InsertionPt = K; 3006de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *K1 = 0, *K2 = 0; 3007202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel replaceOutputsOfPair(Context, L, H, K, InsertionPt, K1, K2); 3008de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 3009de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // The use tree of the first original instruction must be moved to after 3010de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the location of the second instruction. The entire use tree of the 3011de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // first instruction is disjoint from the input tree of the second 3012de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // (by definition), and so commutes with it. 3013de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 30142f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel moveUsesOfIAfterJ(BB, LoadMoveSetPairs, InsertionPt, I, J); 3015de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 3016de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!isa<StoreInst>(I)) { 301772465ea23d010507d3746adc126d719005981e05Hal Finkel L->replaceAllUsesWith(K1); 301872465ea23d010507d3746adc126d719005981e05Hal Finkel H->replaceAllUsesWith(K2); 301972465ea23d010507d3746adc126d719005981e05Hal Finkel AA->replaceWithNewValue(L, K1); 302072465ea23d010507d3746adc126d719005981e05Hal Finkel AA->replaceWithNewValue(H, K2); 3021de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 3022de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 3023de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Instructions that may read from memory may be in the load move set. 3024de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Once an instruction is fused, we no longer need its move set, and so 3025de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the values of the map never need to be updated. However, when a load 3026de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // is fused, we need to merge the entries from both instructions in the 3027de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pair in case those instructions were in the move set of some other 3028de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // yet-to-be-fused pair. The loads in question are the keys of the map. 3029de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (I->mayReadFromMemory()) { 3030de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<ValuePair> NewSetMembers; 3031de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair IPairRange = LoadMoveSet.equal_range(I); 3032de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel VPIteratorPair JPairRange = LoadMoveSet.equal_range(J); 3033de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<Value *, Value *>::iterator N = IPairRange.first; 3034de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel N != IPairRange.second; ++N) 3035de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel NewSetMembers.push_back(ValuePair(K, N->second)); 3036de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::multimap<Value *, Value *>::iterator N = JPairRange.first; 3037de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel N != JPairRange.second; ++N) 3038de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel NewSetMembers.push_back(ValuePair(K, N->second)); 3039de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::vector<ValuePair>::iterator A = NewSetMembers.begin(), 30402f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel AE = NewSetMembers.end(); A != AE; ++A) { 3041de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel LoadMoveSet.insert(*A); 30422f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel LoadMoveSetPairs.insert(*A); 30432f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel } 3044de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 3045de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 3046de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Before removing I, set the iterator to the next instruction. 3047de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PI = llvm::next(BasicBlock::iterator(I)); 3048de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (cast<Instruction>(PI) == J) 3049de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++PI; 3050de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 3051de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel SE->forgetValue(I); 3052de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel SE->forgetValue(J); 3053de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel I->eraseFromParent(); 3054de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel J->eraseFromParent(); 305572465ea23d010507d3746adc126d719005981e05Hal Finkel 305672465ea23d010507d3746adc126d719005981e05Hal Finkel DEBUG(if (PrintAfterEveryPair) dbgs() << "BBV: block is now: \n" << 305772465ea23d010507d3746adc126d719005981e05Hal Finkel BB << "\n"); 3058de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 3059de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 3060de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: final: \n" << BB << "\n"); 3061de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 3062de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel} 3063de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 3064de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelchar BBVectorize::ID = 0; 3065de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic const char bb_vectorize_name[] = "Basic-Block Vectorization"; 3066de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelINITIALIZE_PASS_BEGIN(BBVectorize, BBV_NAME, bb_vectorize_name, false, false) 3067de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelINITIALIZE_AG_DEPENDENCY(AliasAnalysis) 30688bd6c52396ab6e7955fdcc1bce099b7cba29a308Chandler CarruthINITIALIZE_AG_DEPENDENCY(TargetTransformInfo) 3069e29c19091cca58db668407dfc5dd86c70e8b3d49Hal FinkelINITIALIZE_PASS_DEPENDENCY(DominatorTree) 3070de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelINITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 3071de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelINITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false) 3072de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 3073bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin ZhengBasicBlockPass *llvm::createBBVectorizePass(const VectorizeConfig &C) { 3074bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng return new BBVectorize(C); 3075de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel} 3076de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 3077bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zhengbool 3078bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zhengllvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB, const VectorizeConfig &C) { 3079bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng BBVectorize BBVectorizer(P, C); 308087825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng return BBVectorizer.vectorizeBB(BB); 308187825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng} 3082bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng 3083bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng//===----------------------------------------------------------------------===// 3084bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin ZhengVectorizeConfig::VectorizeConfig() { 3085bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng VectorBits = ::VectorBits; 3086768edf3cd037aab10391abc279f71470df8e3156Hal Finkel VectorizeBools = !::NoBools; 308786312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeInts = !::NoInts; 308886312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeFloats = !::NoFloats; 3089f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel VectorizePointers = !::NoPointers; 309086312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeCasts = !::NoCasts; 309186312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeMath = !::NoMath; 309286312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeFMA = !::NoFMA; 3093fc3665c87519850f629c9565535e3be447e10addHal Finkel VectorizeSelect = !::NoSelect; 3094e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel VectorizeCmp = !::NoCmp; 3095f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel VectorizeGEP = !::NoGEP; 309686312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeMemOps = !::NoMemOps; 3097bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng AlignedOnly = ::AlignedOnly; 3098bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng ReqChainDepth= ::ReqChainDepth; 3099bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng SearchLimit = ::SearchLimit; 3100bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng MaxCandPairsForCycleCheck = ::MaxCandPairsForCycleCheck; 3101bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng SplatBreaksChain = ::SplatBreaksChain; 3102bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng MaxInsts = ::MaxInsts; 3103bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng MaxIter = ::MaxIter; 310464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Pow2LenOnly = ::Pow2LenOnly; 3105bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng NoMemOpBoost = ::NoMemOpBoost; 3106bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng FastDep = ::FastDep; 3107bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng} 3108