BBVectorize.cpp revision 8f3359a4b396d3f1a7b2726e02f199be74c62e4c
1de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel//===- BBVectorize.cpp - A Basic-Block Vectorizer -------------------------===//
2de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel//
3de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel//                     The LLVM Compiler Infrastructure
4de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel//
5de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// This file is distributed under the University of Illinois Open Source
6de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// License. See LICENSE.TXT for details.
7de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel//
8de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel//===----------------------------------------------------------------------===//
9de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel//
10de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// This file implements a basic-block vectorization pass. The algorithm was
11de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// inspired by that used by the Vienna MAP Vectorizor by Franchetti and Kral,
12de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// et al. It works by looking for chains of pairable operations and then
13de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// pairing them.
14de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel//
15de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel//===----------------------------------------------------------------------===//
16de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
17de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#define BBV_NAME "bb-vectorize"
18de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#define DEBUG_TYPE BBV_NAME
19d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Transforms/Vectorize.h"
20de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/DenseMap.h"
21de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/DenseSet.h"
22d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/ADT/STLExtras.h"
2386c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel#include "llvm/ADT/SmallSet.h"
24de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/SmallVector.h"
25de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/Statistic.h"
26de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/StringExtras.h"
27de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/AliasAnalysis.h"
28de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/AliasSetTracker.h"
29e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel#include "llvm/Analysis/Dominators.h"
30de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/ScalarEvolution.h"
31de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/ScalarEvolutionExpressions.h"
32be04929f7fd76a921540e9901f24563e51dc1219Chandler Carruth#include "llvm/Analysis/TargetTransformInfo.h"
33de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/ValueTracking.h"
340b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/Constants.h"
350b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/DataLayout.h"
360b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/DerivedTypes.h"
370b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/Function.h"
380b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/Instructions.h"
390b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/IntrinsicInst.h"
400b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/Intrinsics.h"
410b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/LLVMContext.h"
420b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/Metadata.h"
430b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/Type.h"
44d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Pass.h"
45de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Support/CommandLine.h"
46de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Support/Debug.h"
47de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Support/ValueHandle.h"
48d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Support/raw_ostream.h"
4964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel#include "llvm/Transforms/Utils/Local.h"
50de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include <algorithm>
51de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include <map>
52de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelusing namespace llvm;
53de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
5465309660fa61a837cc05323f69c618a7d8134d56Hal Finkelstatic cl::opt<bool>
5565309660fa61a837cc05323f69c618a7d8134d56Hal FinkelIgnoreTargetInfo("bb-vectorize-ignore-target-info",  cl::init(false),
5665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel  cl::Hidden, cl::desc("Ignore target information"));
5765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel
58de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned>
59de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelReqChainDepth("bb-vectorize-req-chain-depth", cl::init(6), cl::Hidden,
60de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  cl::desc("The required chain depth for vectorization"));
61de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
6278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkelstatic cl::opt<bool>
6378fd353d5e5daedc47ecc31b6193ca48793c249cHal FinkelUseChainDepthWithTI("bb-vectorize-use-chain-depth",  cl::init(false),
6478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel  cl::Hidden, cl::desc("Use the chain depth requirement with"
6578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                       " target information"));
6678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel
67de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned>
68de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelSearchLimit("bb-vectorize-search-limit", cl::init(400), cl::Hidden,
69de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  cl::desc("The maximum search distance for instruction pairs"));
70de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
71de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool>
72de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelSplatBreaksChain("bb-vectorize-splat-breaks-chain", cl::init(false), cl::Hidden,
73de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  cl::desc("Replicating one element to a pair breaks the chain"));
74de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
75de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned>
76de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelVectorBits("bb-vectorize-vector-bits", cl::init(128), cl::Hidden,
77de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  cl::desc("The size of the native vector registers"));
78de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
79de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned>
80de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelMaxIter("bb-vectorize-max-iter", cl::init(0), cl::Hidden,
81de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  cl::desc("The maximum number of pairing iterations"));
82de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
8364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkelstatic cl::opt<bool>
8464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal FinkelPow2LenOnly("bb-vectorize-pow2-len-only", cl::init(false), cl::Hidden,
8564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel  cl::desc("Don't try to form non-2^n-length vectors"));
8664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
87de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned>
885d4e18bc39fea892f523d960213906d296d3cb38Hal FinkelMaxInsts("bb-vectorize-max-instr-per-group", cl::init(500), cl::Hidden,
895d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel  cl::desc("The maximum number of pairable instructions per group"));
905d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel
915d4e18bc39fea892f523d960213906d296d3cb38Hal Finkelstatic cl::opt<unsigned>
92de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelMaxCandPairsForCycleCheck("bb-vectorize-max-cycle-check-pairs", cl::init(200),
93de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  cl::Hidden, cl::desc("The maximum number of candidate pairs with which to use"
94de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                       " a full cycle check"));
95de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
96de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool>
97768edf3cd037aab10391abc279f71470df8e3156Hal FinkelNoBools("bb-vectorize-no-bools", cl::init(false), cl::Hidden,
98768edf3cd037aab10391abc279f71470df8e3156Hal Finkel  cl::desc("Don't try to vectorize boolean (i1) values"));
99768edf3cd037aab10391abc279f71470df8e3156Hal Finkel
100768edf3cd037aab10391abc279f71470df8e3156Hal Finkelstatic cl::opt<bool>
101de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoInts("bb-vectorize-no-ints", cl::init(false), cl::Hidden,
102de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  cl::desc("Don't try to vectorize integer values"));
103de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
104de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool>
105de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoFloats("bb-vectorize-no-floats", cl::init(false), cl::Hidden,
106de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  cl::desc("Don't try to vectorize floating-point values"));
107de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
108822ab00847da841a63be4e3883cb5f442dc69069Hal Finkel// FIXME: This should default to false once pointer vector support works.
109de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool>
110822ab00847da841a63be4e3883cb5f442dc69069Hal FinkelNoPointers("bb-vectorize-no-pointers", cl::init(/*false*/ true), cl::Hidden,
111f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel  cl::desc("Don't try to vectorize pointer values"));
112f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel
113f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkelstatic cl::opt<bool>
114de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoCasts("bb-vectorize-no-casts", cl::init(false), cl::Hidden,
115de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  cl::desc("Don't try to vectorize casting (conversion) operations"));
116de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
117de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool>
118de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoMath("bb-vectorize-no-math", cl::init(false), cl::Hidden,
119de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  cl::desc("Don't try to vectorize floating-point math intrinsics"));
120de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
121de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool>
122de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoFMA("bb-vectorize-no-fma", cl::init(false), cl::Hidden,
123de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  cl::desc("Don't try to vectorize the fused-multiply-add intrinsic"));
124de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
125de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool>
126fc3665c87519850f629c9565535e3be447e10addHal FinkelNoSelect("bb-vectorize-no-select", cl::init(false), cl::Hidden,
127fc3665c87519850f629c9565535e3be447e10addHal Finkel  cl::desc("Don't try to vectorize select instructions"));
128fc3665c87519850f629c9565535e3be447e10addHal Finkel
129fc3665c87519850f629c9565535e3be447e10addHal Finkelstatic cl::opt<bool>
130e415f96b6a43ac8861148a11a4258bc38c247e8fHal FinkelNoCmp("bb-vectorize-no-cmp", cl::init(false), cl::Hidden,
131e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel  cl::desc("Don't try to vectorize comparison instructions"));
132e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel
133e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkelstatic cl::opt<bool>
134f3f5a1e6f77a842ccb24cc81766437da5197d712Hal FinkelNoGEP("bb-vectorize-no-gep", cl::init(false), cl::Hidden,
135f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel  cl::desc("Don't try to vectorize getelementptr instructions"));
136f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel
137f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkelstatic cl::opt<bool>
138de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoMemOps("bb-vectorize-no-mem-ops", cl::init(false), cl::Hidden,
139de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  cl::desc("Don't try to vectorize loads and stores"));
140de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
141de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool>
142de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelAlignedOnly("bb-vectorize-aligned-only", cl::init(false), cl::Hidden,
143de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  cl::desc("Only generate aligned loads and stores"));
144de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
145de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool>
146edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal FinkelNoMemOpBoost("bb-vectorize-no-mem-op-boost",
147edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel  cl::init(false), cl::Hidden,
148edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel  cl::desc("Don't boost the chain-depth contribution of loads and stores"));
149edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel
150edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkelstatic cl::opt<bool>
151de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelFastDep("bb-vectorize-fast-dep", cl::init(false), cl::Hidden,
152de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  cl::desc("Use a fast instruction dependency analysis"));
153de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
154de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#ifndef NDEBUG
155de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool>
156de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelDebugInstructionExamination("bb-vectorize-debug-instruction-examination",
157de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  cl::init(false), cl::Hidden,
158de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  cl::desc("When debugging is enabled, output information on the"
159de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel           " instruction-examination process"));
160de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool>
161de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelDebugCandidateSelection("bb-vectorize-debug-candidate-selection",
162de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  cl::init(false), cl::Hidden,
163de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  cl::desc("When debugging is enabled, output information on the"
164de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel           " candidate-selection process"));
165de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool>
166de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelDebugPairSelection("bb-vectorize-debug-pair-selection",
167de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  cl::init(false), cl::Hidden,
168de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  cl::desc("When debugging is enabled, output information on the"
169de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel           " pair-selection process"));
170de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool>
171de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelDebugCycleCheck("bb-vectorize-debug-cycle-check",
172de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  cl::init(false), cl::Hidden,
173de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  cl::desc("When debugging is enabled, output information on the"
174de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel           " cycle-checking process"));
17572465ea23d010507d3746adc126d719005981e05Hal Finkel
17672465ea23d010507d3746adc126d719005981e05Hal Finkelstatic cl::opt<bool>
17772465ea23d010507d3746adc126d719005981e05Hal FinkelPrintAfterEveryPair("bb-vectorize-debug-print-after-every-pair",
17872465ea23d010507d3746adc126d719005981e05Hal Finkel  cl::init(false), cl::Hidden,
17972465ea23d010507d3746adc126d719005981e05Hal Finkel  cl::desc("When debugging is enabled, dump the basic block after"
18072465ea23d010507d3746adc126d719005981e05Hal Finkel           " every pair is fused"));
181de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#endif
182de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
183de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelSTATISTIC(NumFusedOps, "Number of operations fused by bb-vectorize");
184de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
185de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelnamespace {
186de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  struct BBVectorize : public BasicBlockPass {
187de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    static char ID; // Pass identification, replacement for typeid
188bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng
189940371bc65570ec0add1ede4f4d9f0a41ba25e09Hongbin Zheng    const VectorizeConfig Config;
190bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng
191bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng    BBVectorize(const VectorizeConfig &C = VectorizeConfig())
192bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng      : BasicBlockPass(ID), Config(C) {
193de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      initializeBBVectorizePass(*PassRegistry::getPassRegistry());
194de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
195de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
196bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng    BBVectorize(Pass *P, const VectorizeConfig &C)
197bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng      : BasicBlockPass(ID), Config(C) {
19887825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng      AA = &P->getAnalysis<AliasAnalysis>();
199e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel      DT = &P->getAnalysis<DominatorTree>();
20087825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng      SE = &P->getAnalysis<ScalarEvolution>();
2013574eca1b02600bac4e625297f4ecf745f4c4f32Micah Villmow      TD = P->getAnalysisIfAvailable<DataLayout>();
2028bd6c52396ab6e7955fdcc1bce099b7cba29a308Chandler Carruth      TTI = IgnoreTargetInfo ? 0 : &P->getAnalysis<TargetTransformInfo>();
20387825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng    }
20487825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng
205de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    typedef std::pair<Value *, Value *> ValuePair;
20665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel    typedef std::pair<ValuePair, int> ValuePairWithCost;
207de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    typedef std::pair<ValuePair, size_t> ValuePairWithDepth;
208de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    typedef std::pair<ValuePair, ValuePair> VPPair; // A ValuePair pair
20972465ea23d010507d3746adc126d719005981e05Hal Finkel    typedef std::pair<VPPair, unsigned> VPPairWithType;
210de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    typedef std::pair<std::multimap<Value *, Value *>::iterator,
211de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel              std::multimap<Value *, Value *>::iterator> VPIteratorPair;
212de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    typedef std::pair<std::multimap<ValuePair, ValuePair>::iterator,
213de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel              std::multimap<ValuePair, ValuePair>::iterator>
214de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                VPPIteratorPair;
215de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
216de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    AliasAnalysis *AA;
217e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel    DominatorTree *DT;
218de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    ScalarEvolution *SE;
2193574eca1b02600bac4e625297f4ecf745f4c4f32Micah Villmow    DataLayout *TD;
220abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth    const TargetTransformInfo *TTI;
221de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
222de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // FIXME: const correct?
223de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
22464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    bool vectorizePairs(BasicBlock &BB, bool NonPow2Len = false);
225de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2265d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel    bool getCandidatePairs(BasicBlock &BB,
2275d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel                       BasicBlock::iterator &Start,
228de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                       std::multimap<Value *, Value *> &CandidatePairs,
229a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel                       DenseSet<ValuePair> &FixedOrderPairs,
23065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel                       DenseMap<ValuePair, int> &CandidatePairCostSavings,
23164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                       std::vector<Value *> &PairableInsts, bool NonPow2Len);
232de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
23378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel    // FIXME: The current implementation does not account for pairs that
23478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel    // are connected in multiple ways. For example:
23578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel    //   C1 = A1 / A2; C2 = A2 / A1 (which may be both direct and a swap)
23672465ea23d010507d3746adc126d719005981e05Hal Finkel    enum PairConnectionType {
23772465ea23d010507d3746adc126d719005981e05Hal Finkel      PairConnectionDirect,
23872465ea23d010507d3746adc126d719005981e05Hal Finkel      PairConnectionSwap,
23972465ea23d010507d3746adc126d719005981e05Hal Finkel      PairConnectionSplat
24072465ea23d010507d3746adc126d719005981e05Hal Finkel    };
24172465ea23d010507d3746adc126d719005981e05Hal Finkel
242de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    void computeConnectedPairs(std::multimap<Value *, Value *> &CandidatePairs,
243b1a82589339fed148c12b052d30861a539552f1aHal Finkel                       DenseSet<ValuePair> &CandidatePairsSet,
244de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                       std::vector<Value *> &PairableInsts,
24572465ea23d010507d3746adc126d719005981e05Hal Finkel                       std::multimap<ValuePair, ValuePair> &ConnectedPairs,
24672465ea23d010507d3746adc126d719005981e05Hal Finkel                       DenseMap<VPPair, unsigned> &PairConnectionTypes);
247de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
248de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    void buildDepMap(BasicBlock &BB,
249de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                       std::multimap<Value *, Value *> &CandidatePairs,
250de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                       std::vector<Value *> &PairableInsts,
251de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                       DenseSet<ValuePair> &PairableInstUsers);
252de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
253de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    void choosePairs(std::multimap<Value *, Value *> &CandidatePairs,
254b1a82589339fed148c12b052d30861a539552f1aHal Finkel                        DenseSet<ValuePair> &CandidatePairsSet,
25565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel                        DenseMap<ValuePair, int> &CandidatePairCostSavings,
256de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                        std::vector<Value *> &PairableInsts,
25786ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel                        DenseSet<ValuePair> &FixedOrderPairs,
25886ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel                        DenseMap<VPPair, unsigned> &PairConnectionTypes,
259de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                        std::multimap<ValuePair, ValuePair> &ConnectedPairs,
26086ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel                        std::multimap<ValuePair, ValuePair> &ConnectedPairDeps,
261de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                        DenseSet<ValuePair> &PairableInstUsers,
262de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                        DenseMap<Value *, Value *>& ChosenPairs);
263de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
264de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    void fuseChosenPairs(BasicBlock &BB,
265de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     std::vector<Value *> &PairableInsts,
266a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel                     DenseMap<Value *, Value *>& ChosenPairs,
26772465ea23d010507d3746adc126d719005981e05Hal Finkel                     DenseSet<ValuePair> &FixedOrderPairs,
26872465ea23d010507d3746adc126d719005981e05Hal Finkel                     DenseMap<VPPair, unsigned> &PairConnectionTypes,
26972465ea23d010507d3746adc126d719005981e05Hal Finkel                     std::multimap<ValuePair, ValuePair> &ConnectedPairs,
27072465ea23d010507d3746adc126d719005981e05Hal Finkel                     std::multimap<ValuePair, ValuePair> &ConnectedPairDeps);
27172465ea23d010507d3746adc126d719005981e05Hal Finkel
272de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
273de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore);
274de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
275de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    bool areInstsCompatible(Instruction *I, Instruction *J,
27665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel                       bool IsSimpleLoadStore, bool NonPow2Len,
277a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel                       int &CostSavings, int &FixedOrder);
278de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
279de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    bool trackUsesOfI(DenseSet<Value *> &Users,
280de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      AliasSetTracker &WriteSet, Instruction *I,
281de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      Instruction *J, bool UpdateUsers = true,
2822f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel                      DenseSet<ValuePair> *LoadMoveSetPairs = 0);
2831230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop
284de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    void computePairsConnectedTo(
285de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::multimap<Value *, Value *> &CandidatePairs,
28600f63b1b84d059a1ffa572e76708e03750a9e523Hal Finkel                      DenseSet<ValuePair> &CandidatePairsSet,
287de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::vector<Value *> &PairableInsts,
288de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
28972465ea23d010507d3746adc126d719005981e05Hal Finkel                      DenseMap<VPPair, unsigned> &PairConnectionTypes,
290de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      ValuePair P);
291de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
292de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    bool pairsConflict(ValuePair P, ValuePair Q,
293de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                 DenseSet<ValuePair> &PairableInstUsers,
294da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel                 std::multimap<ValuePair, ValuePair> *PairableInstUserMap = 0,
295da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel                 DenseSet<VPPair> *PairableInstUserPairSet = 0);
296de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
297de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    bool pairWillFormCycle(ValuePair P,
298de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                       std::multimap<ValuePair, ValuePair> &PairableInstUsers,
299de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                       DenseSet<ValuePair> &CurrentPairs);
300de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
301de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    void pruneTreeFor(
302de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::multimap<Value *, Value *> &CandidatePairs,
303de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::vector<Value *> &PairableInsts,
304de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
305de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      DenseSet<ValuePair> &PairableInstUsers,
306de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
307da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel                      DenseSet<VPPair> &PairableInstUserPairSet,
308de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      DenseMap<Value *, Value *> &ChosenPairs,
309de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      DenseMap<ValuePair, size_t> &Tree,
310de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      DenseSet<ValuePair> &PrunedTree, ValuePair J,
311de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      bool UseCycleCheck);
312de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
313de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    void buildInitialTreeFor(
314de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::multimap<Value *, Value *> &CandidatePairs,
315b1a82589339fed148c12b052d30861a539552f1aHal Finkel                      DenseSet<ValuePair> &CandidatePairsSet,
316de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::vector<Value *> &PairableInsts,
317de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
318de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      DenseSet<ValuePair> &PairableInstUsers,
319de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      DenseMap<Value *, Value *> &ChosenPairs,
320de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      DenseMap<ValuePair, size_t> &Tree, ValuePair J);
321de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
322de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    void findBestTreeFor(
323de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::multimap<Value *, Value *> &CandidatePairs,
324b1a82589339fed148c12b052d30861a539552f1aHal Finkel                      DenseSet<ValuePair> &CandidatePairsSet,
32565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel                      DenseMap<ValuePair, int> &CandidatePairCostSavings,
326de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::vector<Value *> &PairableInsts,
32786ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel                      DenseSet<ValuePair> &FixedOrderPairs,
32886ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel                      DenseMap<VPPair, unsigned> &PairConnectionTypes,
329de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
33086ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel                      std::multimap<ValuePair, ValuePair> &ConnectedPairDeps,
331de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      DenseSet<ValuePair> &PairableInstUsers,
332de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
333da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel                      DenseSet<VPPair> &PairableInstUserPairSet,
334de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      DenseMap<Value *, Value *> &ChosenPairs,
335de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth,
33665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel                      int &BestEffSize, VPIteratorPair ChoiceRange,
337de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      bool UseCycleCheck);
338de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
339de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I,
340202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel                     Instruction *J, unsigned o);
341de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
342de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    void fillNewShuffleMask(LLVMContext& Context, Instruction *J,
34364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                     unsigned MaskOffset, unsigned NumInElem,
34464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                     unsigned NumInElem1, unsigned IdxOffset,
34564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                     std::vector<Constant*> &Mask);
346de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
347de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    Value *getReplacementShuffleMask(LLVMContext& Context, Instruction *I,
348de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     Instruction *J);
349de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
35064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    bool expandIEChain(LLVMContext& Context, Instruction *I, Instruction *J,
35164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                       unsigned o, Value *&LOp, unsigned numElemL,
35272465ea23d010507d3746adc126d719005981e05Hal Finkel                       Type *ArgTypeL, Type *ArgTypeR, bool IBeforeJ,
35364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                       unsigned IdxOff = 0);
35464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
355de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    Value *getReplacementInput(LLVMContext& Context, Instruction *I,
35672465ea23d010507d3746adc126d719005981e05Hal Finkel                     Instruction *J, unsigned o, bool IBeforeJ);
357de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
358de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    void getReplacementInputsForPair(LLVMContext& Context, Instruction *I,
35972465ea23d010507d3746adc126d719005981e05Hal Finkel                     Instruction *J, SmallVector<Value *, 3> &ReplacedOperands,
36072465ea23d010507d3746adc126d719005981e05Hal Finkel                     bool IBeforeJ);
361de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
362de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    void replaceOutputsOfPair(LLVMContext& Context, Instruction *I,
363de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     Instruction *J, Instruction *K,
364de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     Instruction *&InsertionPt, Instruction *&K1,
365202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel                     Instruction *&K2);
366de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
367de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    void collectPairLoadMoveSet(BasicBlock &BB,
368de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     DenseMap<Value *, Value *> &ChosenPairs,
369de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     std::multimap<Value *, Value *> &LoadMoveSet,
3702f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel                     DenseSet<ValuePair> &LoadMoveSetPairs,
371de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     Instruction *I);
372de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
373de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    void collectLoadMoveSet(BasicBlock &BB,
374de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     std::vector<Value *> &PairableInsts,
375de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     DenseMap<Value *, Value *> &ChosenPairs,
3762f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel                     std::multimap<Value *, Value *> &LoadMoveSet,
3772f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel                     DenseSet<ValuePair> &LoadMoveSetPairs);
378de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
379de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    bool canMoveUsesOfIAfterJ(BasicBlock &BB,
3802f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel                     DenseSet<ValuePair> &LoadMoveSetPairs,
381de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     Instruction *I, Instruction *J);
382de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
383de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    void moveUsesOfIAfterJ(BasicBlock &BB,
3842f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel                     DenseSet<ValuePair> &LoadMoveSetPairs,
385de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     Instruction *&InsertionPt,
386de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     Instruction *I, Instruction *J);
387de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
388ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel    void combineMetadata(Instruction *K, const Instruction *J);
389ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel
39087825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng    bool vectorizeBB(BasicBlock &BB) {
391e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel      if (!DT->isReachableFromEntry(&BB)) {
392e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel        DEBUG(dbgs() << "BBV: skipping unreachable " << BB.getName() <<
393e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel              " in " << BB.getParent()->getName() << "\n");
394e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel        return false;
395e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel      }
396e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel
397abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth      DEBUG(if (TTI) dbgs() << "BBV: using target information\n");
39865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel
399de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      bool changed = false;
400de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // Iterate a sufficient number of times to merge types of size 1 bit,
401de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // then 2 bits, then 4, etc. up to half of the target vector width of the
402de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // target vector register.
40364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      unsigned n = 1;
40464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      for (unsigned v = 2;
405abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth           (TTI || v <= Config.VectorBits) &&
40665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel           (!Config.MaxIter || n <= Config.MaxIter);
407de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel           v *= 2, ++n) {
408bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng        DEBUG(dbgs() << "BBV: fusing loop #" << n <<
409de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel              " for " << BB.getName() << " in " <<
410de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel              BB.getParent()->getName() << "...\n");
411de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        if (vectorizePairs(BB))
412de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          changed = true;
413de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        else
414de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          break;
415de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
416de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
41764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      if (changed && !Pow2LenOnly) {
41864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        ++n;
41964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        for (; !Config.MaxIter || n <= Config.MaxIter; ++n) {
42064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          DEBUG(dbgs() << "BBV: fusing for non-2^n-length vectors loop #: " <<
42164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                n << " for " << BB.getName() << " in " <<
42264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                BB.getParent()->getName() << "...\n");
42364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          if (!vectorizePairs(BB, true)) break;
42464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        }
42564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      }
42664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
427de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      DEBUG(dbgs() << "BBV: done!\n");
428de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      return changed;
429de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
430de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
43187825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng    virtual bool runOnBasicBlock(BasicBlock &BB) {
43287825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng      AA = &getAnalysis<AliasAnalysis>();
433e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel      DT = &getAnalysis<DominatorTree>();
43487825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng      SE = &getAnalysis<ScalarEvolution>();
4353574eca1b02600bac4e625297f4ecf745f4c4f32Micah Villmow      TD = getAnalysisIfAvailable<DataLayout>();
4368bd6c52396ab6e7955fdcc1bce099b7cba29a308Chandler Carruth      TTI = IgnoreTargetInfo ? 0 : &getAnalysis<TargetTransformInfo>();
43787825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng
43887825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng      return vectorizeBB(BB);
43987825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng    }
44087825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng
441de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
442de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      BasicBlockPass::getAnalysisUsage(AU);
443de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      AU.addRequired<AliasAnalysis>();
444e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel      AU.addRequired<DominatorTree>();
445de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      AU.addRequired<ScalarEvolution>();
4468bd6c52396ab6e7955fdcc1bce099b7cba29a308Chandler Carruth      AU.addRequired<TargetTransformInfo>();
447de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      AU.addPreserved<AliasAnalysis>();
448e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel      AU.addPreserved<DominatorTree>();
449de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      AU.addPreserved<ScalarEvolution>();
4507e004d177fe76145f75a9417ed2e281f1b9abaf7Hal Finkel      AU.setPreservesCFG();
451de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
452de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
45364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    static inline VectorType *getVecTypeForPair(Type *ElemTy, Type *Elem2Ty) {
45464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      assert(ElemTy->getScalarType() == Elem2Ty->getScalarType() &&
45564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel             "Cannot form vector from incompatible scalar types");
45664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      Type *STy = ElemTy->getScalarType();
45764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
45864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      unsigned numElem;
459de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      if (VectorType *VTy = dyn_cast<VectorType>(ElemTy)) {
46064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        numElem = VTy->getNumElements();
46164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      } else {
46264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        numElem = 1;
46364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      }
46464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
46564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      if (VectorType *VTy = dyn_cast<VectorType>(Elem2Ty)) {
46664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        numElem += VTy->getNumElements();
46764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      } else {
46864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        numElem += 1;
469de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
4707e004d177fe76145f75a9417ed2e281f1b9abaf7Hal Finkel
47164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      return VectorType::get(STy, numElem);
47264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    }
47364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
47464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    static inline void getInstructionTypes(Instruction *I,
47564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                                           Type *&T1, Type *&T2) {
4763fc1e4aa159ec15058bb26acbec39f6e09990207Hal Finkel      if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
47764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        // For stores, it is the value type, not the pointer type that matters
47864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        // because the value is what will come from a vector register.
47964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
4803fc1e4aa159ec15058bb26acbec39f6e09990207Hal Finkel        Value *IVal = SI->getValueOperand();
48164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        T1 = IVal->getType();
48264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      } else {
48364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        T1 = I->getType();
48464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      }
48564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
4863fc1e4aa159ec15058bb26acbec39f6e09990207Hal Finkel      if (CastInst *CI = dyn_cast<CastInst>(I))
4873fc1e4aa159ec15058bb26acbec39f6e09990207Hal Finkel        T2 = CI->getSrcTy();
48864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      else
48964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        T2 = T1;
49065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel
49165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel      if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
49265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        T2 = SI->getCondition()->getType();
4938b9796f4f83bea2bbefcd2822eb574abdb7f3d1bHal Finkel      } else if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(I)) {
4948b9796f4f83bea2bbefcd2822eb574abdb7f3d1bHal Finkel        T2 = SI->getOperand(0)->getType();
4955094257518ea7b615d87ef5bea657625ffa81991Hal Finkel      } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) {
4965094257518ea7b615d87ef5bea657625ffa81991Hal Finkel        T2 = CI->getOperand(0)->getType();
49765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel      }
498de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
499de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
500de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // Returns the weight associated with the provided value. A chain of
501de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // candidate pairs has a length given by the sum of the weights of its
502de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // members (one weight per pair; the weight of each member of the pair
503de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // is assumed to be the same). This length is then compared to the
504de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // chain-length threshold to determine if a given chain is significant
505de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // enough to be vectorized. The length is also used in comparing
506de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // candidate chains where longer chains are considered to be better.
507de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // Note: when this function returns 0, the resulting instructions are
508de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // not actually fused.
509bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng    inline size_t getDepthFactor(Value *V) {
510de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // InsertElement and ExtractElement have a depth factor of zero. This is
511de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // for two reasons: First, they cannot be usefully fused. Second, because
512de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // the pass generates a lot of these, they can confuse the simple metric
513de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // used to compare the trees in the next iteration. Thus, giving them a
514de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // weight of zero allows the pass to essentially ignore them in
515de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // subsequent iterations when looking for vectorization opportunities
516de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // while still tracking dependency chains that flow through those
517de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // instructions.
518de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      if (isa<InsertElementInst>(V) || isa<ExtractElementInst>(V))
519de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        return 0;
520de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
521edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel      // Give a load or store half of the required depth so that load/store
522edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel      // pairs will vectorize.
523bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng      if (!Config.NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V)))
524bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng        return Config.ReqChainDepth/2;
525edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel
526de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      return 1;
527de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
528de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
529abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth    // Returns the cost of the provided instruction using TTI.
53046fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel    // This does not handle loads and stores.
53146fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel    unsigned getInstrCost(unsigned Opcode, Type *T1, Type *T2) {
53246fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      switch (Opcode) {
53346fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      default: break;
53446fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::GetElementPtr:
53546fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel        // We mark this instruction as zero-cost because scalar GEPs are usually
53646fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel        // lowered to the intruction addressing mode. At the moment we don't
53746fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel        // generate vector GEPs.
53846fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel        return 0;
53946fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::Br:
540abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth        return TTI->getCFInstrCost(Opcode);
54146fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::PHI:
54246fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel        return 0;
54346fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::Add:
54446fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::FAdd:
54546fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::Sub:
54646fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::FSub:
54746fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::Mul:
54846fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::FMul:
54946fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::UDiv:
55046fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::SDiv:
55146fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::FDiv:
55246fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::URem:
55346fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::SRem:
55446fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::FRem:
55546fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::Shl:
55646fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::LShr:
55746fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::AShr:
55846fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::And:
55946fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::Or:
56046fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::Xor:
561abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth        return TTI->getArithmeticInstrCost(Opcode, T1);
56246fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::Select:
56346fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::ICmp:
56446fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::FCmp:
565abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth        return TTI->getCmpSelInstrCost(Opcode, T1, T2);
56646fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::ZExt:
56746fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::SExt:
56846fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::FPToUI:
56946fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::FPToSI:
57046fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::FPExt:
57146fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::PtrToInt:
57246fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::IntToPtr:
57346fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::SIToFP:
57446fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::UIToFP:
57546fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::Trunc:
57646fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::FPTrunc:
57746fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      case Instruction::BitCast:
57886ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel      case Instruction::ShuffleVector:
579abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth        return TTI->getCastInstrCost(Opcode, T1, T2);
58046fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      }
58146fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel
58246fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      return 1;
58346fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel    }
58446fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel
585de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // This determines the relative offset of two loads or stores, returning
586de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // true if the offset could be determined to be some constant value.
587de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // For example, if OffsetInElmts == 1, then J accesses the memory directly
588de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // after I; if OffsetInElmts == -1 then I accesses the memory
58964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    // directly after J.
590de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    bool getPairPtrInfo(Instruction *I, Instruction *J,
591de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        Value *&IPtr, Value *&JPtr, unsigned &IAlignment, unsigned &JAlignment,
59265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        unsigned &IAddressSpace, unsigned &JAddressSpace,
59393f6f457614299eee3d22f376ab8f42a130f1912Hal Finkel        int64_t &OffsetInElmts, bool ComputeOffset = true) {
594de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      OffsetInElmts = 0;
59565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel      if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
59665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        LoadInst *LJ = cast<LoadInst>(J);
59765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        IPtr = LI->getPointerOperand();
59865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        JPtr = LJ->getPointerOperand();
59965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        IAlignment = LI->getAlignment();
60065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        JAlignment = LJ->getAlignment();
60165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        IAddressSpace = LI->getPointerAddressSpace();
60265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        JAddressSpace = LJ->getPointerAddressSpace();
603de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      } else {
60465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        StoreInst *SI = cast<StoreInst>(I), *SJ = cast<StoreInst>(J);
60565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        IPtr = SI->getPointerOperand();
60665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        JPtr = SJ->getPointerOperand();
60765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        IAlignment = SI->getAlignment();
60865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        JAlignment = SJ->getAlignment();
60965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        IAddressSpace = SI->getPointerAddressSpace();
61065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        JAddressSpace = SJ->getPointerAddressSpace();
611de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
612de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
61393f6f457614299eee3d22f376ab8f42a130f1912Hal Finkel      if (!ComputeOffset)
61493f6f457614299eee3d22f376ab8f42a130f1912Hal Finkel        return true;
61593f6f457614299eee3d22f376ab8f42a130f1912Hal Finkel
616de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      const SCEV *IPtrSCEV = SE->getSCEV(IPtr);
617de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      const SCEV *JPtrSCEV = SE->getSCEV(JPtr);
618de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
619de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // If this is a trivial offset, then we'll get something like
620de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // 1*sizeof(type). With target data, which we need anyway, this will get
621de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // constant folded into a number.
622de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      const SCEV *OffsetSCEV = SE->getMinusSCEV(JPtrSCEV, IPtrSCEV);
623de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      if (const SCEVConstant *ConstOffSCEV =
624de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel            dyn_cast<SCEVConstant>(OffsetSCEV)) {
625de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        ConstantInt *IntOff = ConstOffSCEV->getValue();
626de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        int64_t Offset = IntOff->getSExtValue();
627de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
628de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        Type *VTy = cast<PointerType>(IPtr->getType())->getElementType();
629de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        int64_t VTyTSS = (int64_t) TD->getTypeStoreSize(VTy);
630de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
63164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        Type *VTy2 = cast<PointerType>(JPtr->getType())->getElementType();
63264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        if (VTy != VTy2 && Offset < 0) {
63364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          int64_t VTy2TSS = (int64_t) TD->getTypeStoreSize(VTy2);
63464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          OffsetInElmts = Offset/VTy2TSS;
63564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          return (abs64(Offset) % VTy2TSS) == 0;
63664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        }
637de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
638de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        OffsetInElmts = Offset/VTyTSS;
639de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        return (abs64(Offset) % VTyTSS) == 0;
640de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
641de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
642de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      return false;
643de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
644de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
645de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // Returns true if the provided CallInst represents an intrinsic that can
646de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // be vectorized.
647de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    bool isVectorizableIntrinsic(CallInst* I) {
648de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      Function *F = I->getCalledFunction();
649de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      if (!F) return false;
650de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
651a77728415857196035c0090f7b2749d7971811a2Hal Finkel      Intrinsic::ID IID = (Intrinsic::ID) F->getIntrinsicID();
652de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      if (!IID) return false;
653de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
654de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      switch(IID) {
655de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      default:
656de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        return false;
657de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      case Intrinsic::sqrt:
658de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      case Intrinsic::powi:
659de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      case Intrinsic::sin:
660de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      case Intrinsic::cos:
661de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      case Intrinsic::log:
662de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      case Intrinsic::log2:
663de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      case Intrinsic::log10:
664de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      case Intrinsic::exp:
665de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      case Intrinsic::exp2:
666de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      case Intrinsic::pow:
66786312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng        return Config.VectorizeMath;
668de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      case Intrinsic::fma:
66964a7a24edf719bb6ffacc030c23f4cd99312f3fbHal Finkel      case Intrinsic::fmuladd:
67086312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng        return Config.VectorizeFMA;
671de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
672de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
673de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
674b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel    bool isPureIEChain(InsertElementInst *IE) {
675b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel      InsertElementInst *IENext = IE;
676b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel      do {
677b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel        if (!isa<UndefValue>(IENext->getOperand(0)) &&
678b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel            !isa<InsertElementInst>(IENext->getOperand(0))) {
679b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel          return false;
680b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel        }
681b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel      } while ((IENext =
682b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel                 dyn_cast<InsertElementInst>(IENext->getOperand(0))));
683b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel
684b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel      return true;
685b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel    }
686de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  };
687de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
688de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // This function implements one vectorization iteration on the provided
689de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // basic block. It returns true if the block is changed.
69064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel  bool BBVectorize::vectorizePairs(BasicBlock &BB, bool NonPow2Len) {
6915d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel    bool ShouldContinue;
6925d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel    BasicBlock::iterator Start = BB.getFirstInsertionPt();
6935d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel
6945d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel    std::vector<Value *> AllPairableInsts;
6955d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel    DenseMap<Value *, Value *> AllChosenPairs;
696a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel    DenseSet<ValuePair> AllFixedOrderPairs;
69772465ea23d010507d3746adc126d719005981e05Hal Finkel    DenseMap<VPPair, unsigned> AllPairConnectionTypes;
69872465ea23d010507d3746adc126d719005981e05Hal Finkel    std::multimap<ValuePair, ValuePair> AllConnectedPairs, AllConnectedPairDeps;
6995d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel
7005d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel    do {
7015d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel      std::vector<Value *> PairableInsts;
7025d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel      std::multimap<Value *, Value *> CandidatePairs;
703a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel      DenseSet<ValuePair> FixedOrderPairs;
70465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel      DenseMap<ValuePair, int> CandidatePairCostSavings;
7055d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel      ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs,
706a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel                                         FixedOrderPairs,
70765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel                                         CandidatePairCostSavings,
70864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                                         PairableInsts, NonPow2Len);
7095d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel      if (PairableInsts.empty()) continue;
7103706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop
711b1a82589339fed148c12b052d30861a539552f1aHal Finkel      // Build the candidate pair set for faster lookups.
712b1a82589339fed148c12b052d30861a539552f1aHal Finkel      DenseSet<ValuePair> CandidatePairsSet;
713b1a82589339fed148c12b052d30861a539552f1aHal Finkel      for (std::multimap<Value *, Value *>::iterator I = CandidatePairs.begin(),
714b1a82589339fed148c12b052d30861a539552f1aHal Finkel           E = CandidatePairs.end(); I != E; ++I)
715b1a82589339fed148c12b052d30861a539552f1aHal Finkel        CandidatePairsSet.insert(*I);
716b1a82589339fed148c12b052d30861a539552f1aHal Finkel
7175d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel      // Now we have a map of all of the pairable instructions and we need to
7185d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel      // select the best possible pairing. A good pairing is one such that the
7195d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel      // users of the pair are also paired. This defines a (directed) forest
72094c22716d60ff5edf6a98a3c67e0faa001be1142Sylvestre Ledru      // over the pairs such that two pairs are connected iff the second pair
7215d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel      // uses the first.
7223706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop
7235d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel      // Note that it only matters that both members of the second pair use some
7245d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel      // element of the first pair (to allow for splatting).
7253706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop
72672465ea23d010507d3746adc126d719005981e05Hal Finkel      std::multimap<ValuePair, ValuePair> ConnectedPairs, ConnectedPairDeps;
72772465ea23d010507d3746adc126d719005981e05Hal Finkel      DenseMap<VPPair, unsigned> PairConnectionTypes;
728b1a82589339fed148c12b052d30861a539552f1aHal Finkel      computeConnectedPairs(CandidatePairs, CandidatePairsSet,
729b1a82589339fed148c12b052d30861a539552f1aHal Finkel                            PairableInsts, ConnectedPairs, PairConnectionTypes);
7305d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel      if (ConnectedPairs.empty()) continue;
7313706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop
73272465ea23d010507d3746adc126d719005981e05Hal Finkel      for (std::multimap<ValuePair, ValuePair>::iterator
73372465ea23d010507d3746adc126d719005981e05Hal Finkel           I = ConnectedPairs.begin(), IE = ConnectedPairs.end();
73472465ea23d010507d3746adc126d719005981e05Hal Finkel           I != IE; ++I) {
73572465ea23d010507d3746adc126d719005981e05Hal Finkel        ConnectedPairDeps.insert(VPPair(I->second, I->first));
73672465ea23d010507d3746adc126d719005981e05Hal Finkel      }
73772465ea23d010507d3746adc126d719005981e05Hal Finkel
7385d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel      // Build the pairable-instruction dependency map
7395d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel      DenseSet<ValuePair> PairableInstUsers;
7405d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel      buildDepMap(BB, CandidatePairs, PairableInsts, PairableInstUsers);
7413706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop
74235564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel      // There is now a graph of the connected pairs. For each variable, pick
74335564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel      // the pairing with the largest tree meeting the depth requirement on at
74435564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel      // least one branch. Then select all pairings that are part of that tree
74535564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel      // and remove them from the list of available pairings and pairable
74635564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel      // variables.
7473706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop
7485d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel      DenseMap<Value *, Value *> ChosenPairs;
749b1a82589339fed148c12b052d30861a539552f1aHal Finkel      choosePairs(CandidatePairs, CandidatePairsSet,
750b1a82589339fed148c12b052d30861a539552f1aHal Finkel        CandidatePairCostSavings,
75186ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel        PairableInsts, FixedOrderPairs, PairConnectionTypes,
75286ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel        ConnectedPairs, ConnectedPairDeps,
7535d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel        PairableInstUsers, ChosenPairs);
7543706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop
7555d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel      if (ChosenPairs.empty()) continue;
7565d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel      AllPairableInsts.insert(AllPairableInsts.end(), PairableInsts.begin(),
7575d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel                              PairableInsts.end());
7585d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel      AllChosenPairs.insert(ChosenPairs.begin(), ChosenPairs.end());
759a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel
76072465ea23d010507d3746adc126d719005981e05Hal Finkel      // Only for the chosen pairs, propagate information on fixed-order pairs,
76172465ea23d010507d3746adc126d719005981e05Hal Finkel      // pair connections, and their types to the data structures used by the
76272465ea23d010507d3746adc126d719005981e05Hal Finkel      // pair fusion procedures.
763a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel      for (DenseMap<Value *, Value *>::iterator I = ChosenPairs.begin(),
764a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel           IE = ChosenPairs.end(); I != IE; ++I) {
765a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel        if (FixedOrderPairs.count(*I))
766a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel          AllFixedOrderPairs.insert(*I);
767a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel        else if (FixedOrderPairs.count(ValuePair(I->second, I->first)))
768a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel          AllFixedOrderPairs.insert(ValuePair(I->second, I->first));
76972465ea23d010507d3746adc126d719005981e05Hal Finkel
77072465ea23d010507d3746adc126d719005981e05Hal Finkel        for (DenseMap<Value *, Value *>::iterator J = ChosenPairs.begin();
77172465ea23d010507d3746adc126d719005981e05Hal Finkel             J != IE; ++J) {
77272465ea23d010507d3746adc126d719005981e05Hal Finkel          DenseMap<VPPair, unsigned>::iterator K =
77372465ea23d010507d3746adc126d719005981e05Hal Finkel            PairConnectionTypes.find(VPPair(*I, *J));
77472465ea23d010507d3746adc126d719005981e05Hal Finkel          if (K != PairConnectionTypes.end()) {
77572465ea23d010507d3746adc126d719005981e05Hal Finkel            AllPairConnectionTypes.insert(*K);
77672465ea23d010507d3746adc126d719005981e05Hal Finkel          } else {
77772465ea23d010507d3746adc126d719005981e05Hal Finkel            K = PairConnectionTypes.find(VPPair(*J, *I));
77872465ea23d010507d3746adc126d719005981e05Hal Finkel            if (K != PairConnectionTypes.end())
77972465ea23d010507d3746adc126d719005981e05Hal Finkel              AllPairConnectionTypes.insert(*K);
78072465ea23d010507d3746adc126d719005981e05Hal Finkel          }
78172465ea23d010507d3746adc126d719005981e05Hal Finkel        }
78272465ea23d010507d3746adc126d719005981e05Hal Finkel      }
78372465ea23d010507d3746adc126d719005981e05Hal Finkel
78472465ea23d010507d3746adc126d719005981e05Hal Finkel      for (std::multimap<ValuePair, ValuePair>::iterator
78572465ea23d010507d3746adc126d719005981e05Hal Finkel           I = ConnectedPairs.begin(), IE = ConnectedPairs.end();
78672465ea23d010507d3746adc126d719005981e05Hal Finkel           I != IE; ++I) {
78772465ea23d010507d3746adc126d719005981e05Hal Finkel        if (AllPairConnectionTypes.count(*I)) {
78872465ea23d010507d3746adc126d719005981e05Hal Finkel          AllConnectedPairs.insert(*I);
78972465ea23d010507d3746adc126d719005981e05Hal Finkel          AllConnectedPairDeps.insert(VPPair(I->second, I->first));
79072465ea23d010507d3746adc126d719005981e05Hal Finkel        }
791a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel      }
7925d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel    } while (ShouldContinue);
7935d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel
7945d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel    if (AllChosenPairs.empty()) return false;
7955d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel    NumFusedOps += AllChosenPairs.size();
7963706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop
797de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // A set of pairs has now been selected. It is now necessary to replace the
798de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // paired instructions with vector instructions. For this procedure each
79943ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop    // operand must be replaced with a vector operand. This vector is formed
800de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // by using build_vector on the old operands. The replaced values are then
801de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // replaced with a vector_extract on the result.  Subsequent optimization
802de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // passes should coalesce the build/extract combinations.
8033706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop
80472465ea23d010507d3746adc126d719005981e05Hal Finkel    fuseChosenPairs(BB, AllPairableInsts, AllChosenPairs, AllFixedOrderPairs,
80572465ea23d010507d3746adc126d719005981e05Hal Finkel                    AllPairConnectionTypes,
80672465ea23d010507d3746adc126d719005981e05Hal Finkel                    AllConnectedPairs, AllConnectedPairDeps);
80764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
80864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    // It is important to cleanup here so that future iterations of this
80964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    // function have less work to do.
8108e0d1c03ca7fd86e6879b4e37d0d7f0e982feef6Benjamin Kramer    (void) SimplifyInstructionsInBlock(&BB, TD, AA->getTargetLibraryInfo());
811de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    return true;
812de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  }
813de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
814de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // This function returns true if the provided instruction is capable of being
815de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // fused into a vector instruction. This determination is based only on the
816de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // type and other attributes of the instruction.
817de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  bool BBVectorize::isInstVectorizable(Instruction *I,
818de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                                         bool &IsSimpleLoadStore) {
819de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    IsSimpleLoadStore = false;
820de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
821de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    if (CallInst *C = dyn_cast<CallInst>(I)) {
822de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      if (!isVectorizableIntrinsic(C))
823de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        return false;
824de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    } else if (LoadInst *L = dyn_cast<LoadInst>(I)) {
825de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // Vectorize simple loads if possbile:
826de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      IsSimpleLoadStore = L->isSimple();
82786312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng      if (!IsSimpleLoadStore || !Config.VectorizeMemOps)
828de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        return false;
829de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    } else if (StoreInst *S = dyn_cast<StoreInst>(I)) {
830de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // Vectorize simple stores if possbile:
831de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      IsSimpleLoadStore = S->isSimple();
83286312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng      if (!IsSimpleLoadStore || !Config.VectorizeMemOps)
833de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        return false;
834de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    } else if (CastInst *C = dyn_cast<CastInst>(I)) {
835de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // We can vectorize casts, but not casts of pointer types, etc.
83686312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng      if (!Config.VectorizeCasts)
837de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        return false;
838de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
839de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      Type *SrcTy = C->getSrcTy();
840f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel      if (!SrcTy->isSingleValueType())
841de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        return false;
842de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
843de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      Type *DestTy = C->getDestTy();
844f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel      if (!DestTy->isSingleValueType())
845de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        return false;
846fc3665c87519850f629c9565535e3be447e10addHal Finkel    } else if (isa<SelectInst>(I)) {
847fc3665c87519850f629c9565535e3be447e10addHal Finkel      if (!Config.VectorizeSelect)
848fc3665c87519850f629c9565535e3be447e10addHal Finkel        return false;
849e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel    } else if (isa<CmpInst>(I)) {
850e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel      if (!Config.VectorizeCmp)
851e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel        return false;
852f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel    } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(I)) {
853f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel      if (!Config.VectorizeGEP)
854f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel        return false;
855f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel
856f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel      // Currently, vector GEPs exist only with one index.
857f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel      if (G->getNumIndices() != 1)
858f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel        return false;
859de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    } else if (!(I->isBinaryOp() || isa<ShuffleVectorInst>(I) ||
860de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        isa<ExtractElementInst>(I) || isa<InsertElementInst>(I))) {
861de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      return false;
862de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
863de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
864de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // We can't vectorize memory operations without target data
865de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    if (TD == 0 && IsSimpleLoadStore)
866de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      return false;
867de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
868de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    Type *T1, *T2;
86964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    getInstructionTypes(I, T1, T2);
870de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
871de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // Not every type can be vectorized...
872de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    if (!(VectorType::isValidElementType(T1) || T1->isVectorTy()) ||
873de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        !(VectorType::isValidElementType(T2) || T2->isVectorTy()))
874de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      return false;
875de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
87665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel    if (T1->getScalarSizeInBits() == 1) {
877768edf3cd037aab10391abc279f71470df8e3156Hal Finkel      if (!Config.VectorizeBools)
878768edf3cd037aab10391abc279f71470df8e3156Hal Finkel        return false;
879768edf3cd037aab10391abc279f71470df8e3156Hal Finkel    } else {
88065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel      if (!Config.VectorizeInts && T1->isIntOrIntVectorTy())
881768edf3cd037aab10391abc279f71470df8e3156Hal Finkel        return false;
882768edf3cd037aab10391abc279f71470df8e3156Hal Finkel    }
88365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel
88465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel    if (T2->getScalarSizeInBits() == 1) {
88565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel      if (!Config.VectorizeBools)
88665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        return false;
88765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel    } else {
88865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel      if (!Config.VectorizeInts && T2->isIntOrIntVectorTy())
88965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        return false;
89065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel    }
89165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel
89286312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng    if (!Config.VectorizeFloats
89386312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng        && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy()))
894de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      return false;
895de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
896e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel    // Don't vectorize target-specific types.
897e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel    if (T1->isX86_FP80Ty() || T1->isPPC_FP128Ty() || T1->isX86_MMXTy())
898e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel      return false;
899e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel    if (T2->isX86_FP80Ty() || T2->isPPC_FP128Ty() || T2->isX86_MMXTy())
900e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel      return false;
901e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel
90205bc5087a25bbcf59936d71ebfc878b545ef3e5cHal Finkel    if ((!Config.VectorizePointers || TD == 0) &&
90305bc5087a25bbcf59936d71ebfc878b545ef3e5cHal Finkel        (T1->getScalarType()->isPointerTy() ||
90405bc5087a25bbcf59936d71ebfc878b545ef3e5cHal Finkel         T2->getScalarType()->isPointerTy()))
905f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel      return false;
906f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel
907abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth    if (!TTI && (T1->getPrimitiveSizeInBits() >= Config.VectorBits ||
908abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth                 T2->getPrimitiveSizeInBits() >= Config.VectorBits))
909de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      return false;
910de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
911de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    return true;
912de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  }
913de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
914de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // This function returns true if the two provided instructions are compatible
915de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // (meaning that they can be fused into a vector instruction). This assumes
916de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // that I has already been determined to be vectorizable and that J is not
917de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // in the use tree of I.
918de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J,
91965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel                       bool IsSimpleLoadStore, bool NonPow2Len,
920a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel                       int &CostSavings, int &FixedOrder) {
921de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    DEBUG(if (DebugInstructionExamination) dbgs() << "BBV: looking at " << *I <<
922de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     " <-> " << *J << "\n");
923de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
92465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel    CostSavings = 0;
925a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel    FixedOrder = 0;
92665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel
927de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // Loads and stores can be merged if they have different alignments,
928de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // but are otherwise the same.
92964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    if (!J->isSameOperationAs(I, Instruction::CompareIgnoringAlignment |
93064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                      (NonPow2Len ? Instruction::CompareUsingScalarTypes : 0)))
93164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      return false;
93264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
93364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    Type *IT1, *IT2, *JT1, *JT2;
93464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    getInstructionTypes(I, IT1, IT2);
93564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    getInstructionTypes(J, JT1, JT2);
93664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    unsigned MaxTypeBits = std::max(
93764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      IT1->getPrimitiveSizeInBits() + JT1->getPrimitiveSizeInBits(),
93864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      IT2->getPrimitiveSizeInBits() + JT2->getPrimitiveSizeInBits());
939abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth    if (!TTI && MaxTypeBits > Config.VectorBits)
940de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      return false;
941ec4e85e3364f50802f2007e4b1e23661d4610366Hal Finkel
942de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // FIXME: handle addsub-type operations!
943de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
944de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    if (IsSimpleLoadStore) {
945de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      Value *IPtr, *JPtr;
94665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel      unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace;
947de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      int64_t OffsetInElmts = 0;
948de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
94965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel            IAddressSpace, JAddressSpace,
950de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel            OffsetInElmts) && abs64(OffsetInElmts) == 1) {
951a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel        FixedOrder = (int) OffsetInElmts;
95265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        unsigned BottomAlignment = IAlignment;
95365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        if (OffsetInElmts < 0) BottomAlignment = JAlignment;
95465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel
95565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        Type *aTypeI = isa<StoreInst>(I) ?
95665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel          cast<StoreInst>(I)->getValueOperand()->getType() : I->getType();
95765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        Type *aTypeJ = isa<StoreInst>(J) ?
95865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel          cast<StoreInst>(J)->getValueOperand()->getType() : J->getType();
95965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        Type *VType = getVecTypeForPair(aTypeI, aTypeJ);
96064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
96165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        if (Config.AlignedOnly) {
962de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          // An aligned load or store is possible only if the instruction
963de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          // with the lower offset has an alignment suitable for the
964de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          // vector type.
9651230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop
966de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          unsigned VecAlignment = TD->getPrefTypeAlignment(VType);
967de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          if (BottomAlignment < VecAlignment)
968de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel            return false;
969de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        }
97065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel
971abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth        if (TTI) {
972abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth          unsigned ICost = TTI->getMemoryOpCost(I->getOpcode(), aTypeI,
973abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth                                                IAlignment, IAddressSpace);
974abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth          unsigned JCost = TTI->getMemoryOpCost(J->getOpcode(), aTypeJ,
975abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth                                                JAlignment, JAddressSpace);
976abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth          unsigned VCost = TTI->getMemoryOpCost(I->getOpcode(), VType,
977abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth                                                BottomAlignment,
978abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth                                                IAddressSpace);
9790cf5d396c14c71dd4fa1d102c2b3d178b1191436Hal Finkel
9800cf5d396c14c71dd4fa1d102c2b3d178b1191436Hal Finkel          ICost += TTI->getAddressComputationCost(aTypeI);
9810cf5d396c14c71dd4fa1d102c2b3d178b1191436Hal Finkel          JCost += TTI->getAddressComputationCost(aTypeJ);
9820cf5d396c14c71dd4fa1d102c2b3d178b1191436Hal Finkel          VCost += TTI->getAddressComputationCost(VType);
9830cf5d396c14c71dd4fa1d102c2b3d178b1191436Hal Finkel
98465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel          if (VCost > ICost + JCost)
98565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel            return false;
98682149a9106f221aa6a7271977c236b078e621f21Hal Finkel
987dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel          // We don't want to fuse to a type that will be split, even
98882149a9106f221aa6a7271977c236b078e621f21Hal Finkel          // if the two input types will also be split and there is no other
989dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel          // associated cost.
990abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth          unsigned VParts = TTI->getNumberOfParts(VType);
991dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel          if (VParts > 1)
992dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel            return false;
993dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel          else if (!VParts && VCost == ICost + JCost)
99482149a9106f221aa6a7271977c236b078e621f21Hal Finkel            return false;
99582149a9106f221aa6a7271977c236b078e621f21Hal Finkel
99665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel          CostSavings = ICost + JCost - VCost;
99765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        }
998de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      } else {
999de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        return false;
1000de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
1001abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth    } else if (TTI) {
100246fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      unsigned ICost = getInstrCost(I->getOpcode(), IT1, IT2);
100346fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      unsigned JCost = getInstrCost(J->getOpcode(), JT1, JT2);
100465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel      Type *VT1 = getVecTypeForPair(IT1, JT1),
100565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel           *VT2 = getVecTypeForPair(IT2, JT2);
1006ec5c3c2bd37828e9fbd913f5ac7b7f75711ddd59Hal Finkel
1007ec5c3c2bd37828e9fbd913f5ac7b7f75711ddd59Hal Finkel      // Note that this procedure is incorrect for insert and extract element
1008ec5c3c2bd37828e9fbd913f5ac7b7f75711ddd59Hal Finkel      // instructions (because combining these often results in a shuffle),
1009ec5c3c2bd37828e9fbd913f5ac7b7f75711ddd59Hal Finkel      // but this cost is ignored (because insert and extract element
1010ec5c3c2bd37828e9fbd913f5ac7b7f75711ddd59Hal Finkel      // instructions are assigned a zero depth factor and are not really
1011ec5c3c2bd37828e9fbd913f5ac7b7f75711ddd59Hal Finkel      // fused in general).
101246fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel      unsigned VCost = getInstrCost(I->getOpcode(), VT1, VT2);
101365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel
101465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel      if (VCost > ICost + JCost)
101565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        return false;
101682149a9106f221aa6a7271977c236b078e621f21Hal Finkel
1017dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel      // We don't want to fuse to a type that will be split, even
101882149a9106f221aa6a7271977c236b078e621f21Hal Finkel      // if the two input types will also be split and there is no other
1019dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel      // associated cost.
1020abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth      unsigned VParts1 = TTI->getNumberOfParts(VT1),
1021abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth               VParts2 = TTI->getNumberOfParts(VT2);
10228b9796f4f83bea2bbefcd2822eb574abdb7f3d1bHal Finkel      if (VParts1 > 1 || VParts2 > 1)
1023dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel        return false;
10248b9796f4f83bea2bbefcd2822eb574abdb7f3d1bHal Finkel      else if ((!VParts1 || !VParts2) && VCost == ICost + JCost)
102582149a9106f221aa6a7271977c236b078e621f21Hal Finkel        return false;
102682149a9106f221aa6a7271977c236b078e621f21Hal Finkel
102765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel      CostSavings = ICost + JCost - VCost;
1028de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
1029de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
10306173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel    // The powi intrinsic is special because only the first argument is
10316173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel    // vectorized, the second arguments must be equal.
10326173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel    CallInst *CI = dyn_cast<CallInst>(I);
10336173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel    Function *FI;
1034a77728415857196035c0090f7b2749d7971811a2Hal Finkel    if (CI && (FI = CI->getCalledFunction())) {
1035a77728415857196035c0090f7b2749d7971811a2Hal Finkel      Intrinsic::ID IID = (Intrinsic::ID) FI->getIntrinsicID();
1036a77728415857196035c0090f7b2749d7971811a2Hal Finkel      if (IID == Intrinsic::powi) {
1037a77728415857196035c0090f7b2749d7971811a2Hal Finkel        Value *A1I = CI->getArgOperand(1),
1038a77728415857196035c0090f7b2749d7971811a2Hal Finkel              *A1J = cast<CallInst>(J)->getArgOperand(1);
1039a77728415857196035c0090f7b2749d7971811a2Hal Finkel        const SCEV *A1ISCEV = SE->getSCEV(A1I),
1040a77728415857196035c0090f7b2749d7971811a2Hal Finkel                   *A1JSCEV = SE->getSCEV(A1J);
1041a77728415857196035c0090f7b2749d7971811a2Hal Finkel        return (A1ISCEV == A1JSCEV);
1042a77728415857196035c0090f7b2749d7971811a2Hal Finkel      }
1043a77728415857196035c0090f7b2749d7971811a2Hal Finkel
1044abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth      if (IID && TTI) {
1045a77728415857196035c0090f7b2749d7971811a2Hal Finkel        SmallVector<Type*, 4> Tys;
1046a77728415857196035c0090f7b2749d7971811a2Hal Finkel        for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i)
1047a77728415857196035c0090f7b2749d7971811a2Hal Finkel          Tys.push_back(CI->getArgOperand(i)->getType());
1048abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth        unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, Tys);
1049a77728415857196035c0090f7b2749d7971811a2Hal Finkel
1050a77728415857196035c0090f7b2749d7971811a2Hal Finkel        Tys.clear();
1051a77728415857196035c0090f7b2749d7971811a2Hal Finkel        CallInst *CJ = cast<CallInst>(J);
1052a77728415857196035c0090f7b2749d7971811a2Hal Finkel        for (unsigned i = 0, ie = CJ->getNumArgOperands(); i != ie; ++i)
1053a77728415857196035c0090f7b2749d7971811a2Hal Finkel          Tys.push_back(CJ->getArgOperand(i)->getType());
1054abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth        unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, Tys);
1055a77728415857196035c0090f7b2749d7971811a2Hal Finkel
1056a77728415857196035c0090f7b2749d7971811a2Hal Finkel        Tys.clear();
1057a77728415857196035c0090f7b2749d7971811a2Hal Finkel        assert(CI->getNumArgOperands() == CJ->getNumArgOperands() &&
1058a77728415857196035c0090f7b2749d7971811a2Hal Finkel               "Intrinsic argument counts differ");
1059a77728415857196035c0090f7b2749d7971811a2Hal Finkel        for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
1060a77728415857196035c0090f7b2749d7971811a2Hal Finkel          if (IID == Intrinsic::powi && i == 1)
1061a77728415857196035c0090f7b2749d7971811a2Hal Finkel            Tys.push_back(CI->getArgOperand(i)->getType());
1062a77728415857196035c0090f7b2749d7971811a2Hal Finkel          else
1063a77728415857196035c0090f7b2749d7971811a2Hal Finkel            Tys.push_back(getVecTypeForPair(CI->getArgOperand(i)->getType(),
1064a77728415857196035c0090f7b2749d7971811a2Hal Finkel                                            CJ->getArgOperand(i)->getType()));
1065a77728415857196035c0090f7b2749d7971811a2Hal Finkel        }
1066a77728415857196035c0090f7b2749d7971811a2Hal Finkel
1067a77728415857196035c0090f7b2749d7971811a2Hal Finkel        Type *RetTy = getVecTypeForPair(IT1, JT1);
1068abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth        unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys);
1069a77728415857196035c0090f7b2749d7971811a2Hal Finkel
1070a77728415857196035c0090f7b2749d7971811a2Hal Finkel        if (VCost > ICost + JCost)
1071a77728415857196035c0090f7b2749d7971811a2Hal Finkel          return false;
1072a77728415857196035c0090f7b2749d7971811a2Hal Finkel
1073a77728415857196035c0090f7b2749d7971811a2Hal Finkel        // We don't want to fuse to a type that will be split, even
1074a77728415857196035c0090f7b2749d7971811a2Hal Finkel        // if the two input types will also be split and there is no other
1075a77728415857196035c0090f7b2749d7971811a2Hal Finkel        // associated cost.
1076abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth        unsigned RetParts = TTI->getNumberOfParts(RetTy);
1077a77728415857196035c0090f7b2749d7971811a2Hal Finkel        if (RetParts > 1)
1078a77728415857196035c0090f7b2749d7971811a2Hal Finkel          return false;
1079a77728415857196035c0090f7b2749d7971811a2Hal Finkel        else if (!RetParts && VCost == ICost + JCost)
1080a77728415857196035c0090f7b2749d7971811a2Hal Finkel          return false;
1081a77728415857196035c0090f7b2749d7971811a2Hal Finkel
1082a77728415857196035c0090f7b2749d7971811a2Hal Finkel        for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
1083a77728415857196035c0090f7b2749d7971811a2Hal Finkel          if (!Tys[i]->isVectorTy())
1084a77728415857196035c0090f7b2749d7971811a2Hal Finkel            continue;
1085a77728415857196035c0090f7b2749d7971811a2Hal Finkel
1086abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth          unsigned NumParts = TTI->getNumberOfParts(Tys[i]);
1087a77728415857196035c0090f7b2749d7971811a2Hal Finkel          if (NumParts > 1)
1088a77728415857196035c0090f7b2749d7971811a2Hal Finkel            return false;
1089a77728415857196035c0090f7b2749d7971811a2Hal Finkel          else if (!NumParts && VCost == ICost + JCost)
1090a77728415857196035c0090f7b2749d7971811a2Hal Finkel            return false;
1091a77728415857196035c0090f7b2749d7971811a2Hal Finkel        }
1092a77728415857196035c0090f7b2749d7971811a2Hal Finkel
1093a77728415857196035c0090f7b2749d7971811a2Hal Finkel        CostSavings = ICost + JCost - VCost;
1094a77728415857196035c0090f7b2749d7971811a2Hal Finkel      }
10956173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel    }
10966173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel
1097de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    return true;
1098de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  }
1099de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1100de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // Figure out whether or not J uses I and update the users and write-set
1101de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // structures associated with I. Specifically, Users represents the set of
1102de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // instructions that depend on I. WriteSet represents the set
1103de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // of memory locations that are dependent on I. If UpdateUsers is true,
1104de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // and J uses I, then Users is updated to contain J and WriteSet is updated
1105de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // to contain any memory locations to which J writes. The function returns
1106de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // true if J uses I. By default, alias analysis is used to determine
1107de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // whether J reads from memory that overlaps with a location in WriteSet.
1108de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // If LoadMoveSet is not null, then it is a previously-computed multimap
1109de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // where the key is the memory-based user instruction and the value is
1110de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // the instruction to be compared with I. So, if LoadMoveSet is provided,
1111de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // then the alias analysis is not used. This is necessary because this
1112de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // function is called during the process of moving instructions during
1113de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // vectorization and the results of the alias analysis are not stable during
1114de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // that process.
1115de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  bool BBVectorize::trackUsesOfI(DenseSet<Value *> &Users,
1116de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                       AliasSetTracker &WriteSet, Instruction *I,
1117de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                       Instruction *J, bool UpdateUsers,
11182f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel                       DenseSet<ValuePair> *LoadMoveSetPairs) {
1119de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    bool UsesI = false;
1120de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1121de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // This instruction may already be marked as a user due, for example, to
1122de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // being a member of a selected pair.
1123de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    if (Users.count(J))
1124de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      UsesI = true;
1125de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1126de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    if (!UsesI)
11277e004d177fe76145f75a9417ed2e281f1b9abaf7Hal Finkel      for (User::op_iterator JU = J->op_begin(), JE = J->op_end();
11287e004d177fe76145f75a9417ed2e281f1b9abaf7Hal Finkel           JU != JE; ++JU) {
1129de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        Value *V = *JU;
1130de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        if (I == V || Users.count(V)) {
1131de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          UsesI = true;
1132de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          break;
1133de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        }
1134de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
1135de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    if (!UsesI && J->mayReadFromMemory()) {
11362f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel      if (LoadMoveSetPairs) {
11372f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel        UsesI = LoadMoveSetPairs->count(ValuePair(J, I));
1138de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      } else {
1139de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        for (AliasSetTracker::iterator W = WriteSet.begin(),
1140de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel             WE = WriteSet.end(); W != WE; ++W) {
114138a7f22445b8782682d1f8f253454ea0390d4ac5Hal Finkel          if (W->aliasesUnknownInst(J, *AA)) {
114238a7f22445b8782682d1f8f253454ea0390d4ac5Hal Finkel            UsesI = true;
114338a7f22445b8782682d1f8f253454ea0390d4ac5Hal Finkel            break;
1144de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          }
1145de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        }
1146de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
1147de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
1148de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1149de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    if (UsesI && UpdateUsers) {
1150de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      if (J->mayWriteToMemory()) WriteSet.add(J);
1151de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      Users.insert(J);
1152de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
1153de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1154de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    return UsesI;
1155de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  }
1156de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1157de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // This function iterates over all instruction pairs in the provided
1158de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // basic block and collects all candidate pairs for vectorization.
11595d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel  bool BBVectorize::getCandidatePairs(BasicBlock &BB,
11605d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel                       BasicBlock::iterator &Start,
1161de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                       std::multimap<Value *, Value *> &CandidatePairs,
1162a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel                       DenseSet<ValuePair> &FixedOrderPairs,
116365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel                       DenseMap<ValuePair, int> &CandidatePairCostSavings,
116464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                       std::vector<Value *> &PairableInsts, bool NonPow2Len) {
1165de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    BasicBlock::iterator E = BB.end();
11665d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel    if (Start == E) return false;
11675d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel
11685d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel    bool ShouldContinue = false, IAfterStart = false;
11695d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel    for (BasicBlock::iterator I = Start++; I != E; ++I) {
11705d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel      if (I == Start) IAfterStart = true;
11715d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel
1172de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      bool IsSimpleLoadStore;
1173de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      if (!isInstVectorizable(I, IsSimpleLoadStore)) continue;
1174de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1175de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // Look for an instruction with which to pair instruction *I...
1176de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      DenseSet<Value *> Users;
1177de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      AliasSetTracker WriteSet(*AA);
11785d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel      bool JAfterStart = IAfterStart;
11795d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel      BasicBlock::iterator J = llvm::next(I);
1180bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng      for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) {
11815d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel        if (J == Start) JAfterStart = true;
11825d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel
1183de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // Determine if J uses I, if so, exit the loop.
1184bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng        bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !Config.FastDep);
1185bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng        if (Config.FastDep) {
1186de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          // Note: For this heuristic to be effective, independent operations
1187de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          // must tend to be intermixed. This is likely to be true from some
1188de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          // kinds of grouped loop unrolling (but not the generic LLVM pass),
1189de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          // but otherwise may require some kind of reordering pass.
1190de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1191de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          // When using fast dependency analysis,
1192de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          // stop searching after first use:
1193de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          if (UsesI) break;
1194de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        } else {
1195de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          if (UsesI) continue;
1196de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        }
1197de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1198de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // J does not use I, and comes before the first use of I, so it can be
1199de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // merged with I if the instructions are compatible.
1200a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel        int CostSavings, FixedOrder;
120165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        if (!areInstsCompatible(I, J, IsSimpleLoadStore, NonPow2Len,
1202a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel            CostSavings, FixedOrder)) continue;
1203de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1204de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // J is a candidate for merging with I.
1205de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        if (!PairableInsts.size() ||
1206de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel             PairableInsts[PairableInsts.size()-1] != I) {
1207de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          PairableInsts.push_back(I);
1208de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        }
12095d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel
1210de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        CandidatePairs.insert(ValuePair(I, J));
1211abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth        if (TTI)
121265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel          CandidatePairCostSavings.insert(ValuePairWithCost(ValuePair(I, J),
121365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel                                                            CostSavings));
12145d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel
1215a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel        if (FixedOrder == 1)
1216a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel          FixedOrderPairs.insert(ValuePair(I, J));
1217a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel        else if (FixedOrder == -1)
1218a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel          FixedOrderPairs.insert(ValuePair(J, I));
1219a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel
12205d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel        // The next call to this function must start after the last instruction
12215d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel        // selected during this invocation.
12225d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel        if (JAfterStart) {
12235d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel          Start = llvm::next(J);
12245d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel          IAfterStart = JAfterStart = false;
12255d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel        }
12265d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel
1227de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        DEBUG(if (DebugCandidateSelection) dbgs() << "BBV: candidate pair "
122865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel                     << *I << " <-> " << *J << " (cost savings: " <<
122965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel                     CostSavings << ")\n");
12305d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel
12315d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel        // If we have already found too many pairs, break here and this function
12325d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel        // will be called again starting after the last instruction selected
12335d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel        // during this invocation.
1234bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng        if (PairableInsts.size() >= Config.MaxInsts) {
12355d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel          ShouldContinue = true;
12365d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel          break;
12375d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel        }
1238de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
12395d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel
12405d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel      if (ShouldContinue)
12415d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel        break;
1242de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
1243de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1244de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    DEBUG(dbgs() << "BBV: found " << PairableInsts.size()
1245de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel           << " instructions with candidate pairs\n");
12465d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel
12475d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel    return ShouldContinue;
1248de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  }
1249de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1250de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // Finds candidate pairs connected to the pair P = <PI, PJ>. This means that
1251de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // it looks for pairs such that both members have an input which is an
1252de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // output of PI or PJ.
1253de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  void BBVectorize::computePairsConnectedTo(
1254de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::multimap<Value *, Value *> &CandidatePairs,
125500f63b1b84d059a1ffa572e76708e03750a9e523Hal Finkel                      DenseSet<ValuePair> &CandidatePairsSet,
1256de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::vector<Value *> &PairableInsts,
1257de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
125872465ea23d010507d3746adc126d719005981e05Hal Finkel                      DenseMap<VPPair, unsigned> &PairConnectionTypes,
1259de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      ValuePair P) {
1260bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel    StoreInst *SI, *SJ;
1261bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel
1262de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // For each possible pairing for this variable, look at the uses of
1263de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // the first value...
1264de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    for (Value::use_iterator I = P.first->use_begin(),
1265de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel         E = P.first->use_end(); I != E; ++I) {
1266bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel      if (isa<LoadInst>(*I)) {
1267bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel        // A pair cannot be connected to a load because the load only takes one
1268bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel        // operand (the address) and it is a scalar even after vectorization.
1269bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel        continue;
1270bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel      } else if ((SI = dyn_cast<StoreInst>(*I)) &&
1271bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel                 P.first == SI->getPointerOperand()) {
1272bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel        // Similarly, a pair cannot be connected to a store through its
1273bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel        // pointer operand.
1274bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel        continue;
1275bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel      }
1276bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel
1277de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // For each use of the first variable, look for uses of the second
1278de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // variable...
1279de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      for (Value::use_iterator J = P.second->use_begin(),
1280de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel           E2 = P.second->use_end(); J != E2; ++J) {
1281bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel        if ((SJ = dyn_cast<StoreInst>(*J)) &&
1282bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel            P.second == SJ->getPointerOperand())
1283bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel          continue;
1284bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel
1285de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // Look for <I, J>:
128600f63b1b84d059a1ffa572e76708e03750a9e523Hal Finkel        if (CandidatePairsSet.count(ValuePair(*I, *J))) {
128772465ea23d010507d3746adc126d719005981e05Hal Finkel          VPPair VP(P, ValuePair(*I, *J));
128872465ea23d010507d3746adc126d719005981e05Hal Finkel          ConnectedPairs.insert(VP);
128972465ea23d010507d3746adc126d719005981e05Hal Finkel          PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionDirect));
129072465ea23d010507d3746adc126d719005981e05Hal Finkel        }
1291de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1292de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // Look for <J, I>:
129300f63b1b84d059a1ffa572e76708e03750a9e523Hal Finkel        if (CandidatePairsSet.count(ValuePair(*J, *I))) {
129472465ea23d010507d3746adc126d719005981e05Hal Finkel          VPPair VP(P, ValuePair(*J, *I));
129572465ea23d010507d3746adc126d719005981e05Hal Finkel          ConnectedPairs.insert(VP);
129672465ea23d010507d3746adc126d719005981e05Hal Finkel          PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSwap));
129772465ea23d010507d3746adc126d719005981e05Hal Finkel        }
1298de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
1299de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1300bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng      if (Config.SplatBreaksChain) continue;
1301de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // Look for cases where just the first value in the pair is used by
1302de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // both members of another pair (splatting).
1303de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      for (Value::use_iterator J = P.first->use_begin(); J != E; ++J) {
1304bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel        if ((SJ = dyn_cast<StoreInst>(*J)) &&
1305bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel            P.first == SJ->getPointerOperand())
1306bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel          continue;
1307bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel
130800f63b1b84d059a1ffa572e76708e03750a9e523Hal Finkel        if (CandidatePairsSet.count(ValuePair(*I, *J))) {
130972465ea23d010507d3746adc126d719005981e05Hal Finkel          VPPair VP(P, ValuePair(*I, *J));
131072465ea23d010507d3746adc126d719005981e05Hal Finkel          ConnectedPairs.insert(VP);
131172465ea23d010507d3746adc126d719005981e05Hal Finkel          PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat));
131272465ea23d010507d3746adc126d719005981e05Hal Finkel        }
1313de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
1314de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
1315de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1316bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng    if (Config.SplatBreaksChain) return;
1317de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // Look for cases where just the second value in the pair is used by
1318de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // both members of another pair (splatting).
1319de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    for (Value::use_iterator I = P.second->use_begin(),
1320de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel         E = P.second->use_end(); I != E; ++I) {
1321bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel      if (isa<LoadInst>(*I))
1322bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel        continue;
1323bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel      else if ((SI = dyn_cast<StoreInst>(*I)) &&
1324bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel               P.second == SI->getPointerOperand())
1325bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel        continue;
1326bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel
1327de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      for (Value::use_iterator J = P.second->use_begin(); J != E; ++J) {
1328bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel        if ((SJ = dyn_cast<StoreInst>(*J)) &&
1329bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel            P.second == SJ->getPointerOperand())
1330bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel          continue;
1331bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel
133200f63b1b84d059a1ffa572e76708e03750a9e523Hal Finkel        if (CandidatePairsSet.count(ValuePair(*I, *J))) {
133372465ea23d010507d3746adc126d719005981e05Hal Finkel          VPPair VP(P, ValuePair(*I, *J));
133472465ea23d010507d3746adc126d719005981e05Hal Finkel          ConnectedPairs.insert(VP);
133572465ea23d010507d3746adc126d719005981e05Hal Finkel          PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat));
133672465ea23d010507d3746adc126d719005981e05Hal Finkel        }
1337de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
1338de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
1339de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  }
1340de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1341de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // This function figures out which pairs are connected.  Two pairs are
1342de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // connected if some output of the first pair forms an input to both members
1343de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // of the second pair.
1344de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  void BBVectorize::computeConnectedPairs(
1345de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::multimap<Value *, Value *> &CandidatePairs,
1346b1a82589339fed148c12b052d30861a539552f1aHal Finkel                      DenseSet<ValuePair> &CandidatePairsSet,
1347de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::vector<Value *> &PairableInsts,
134872465ea23d010507d3746adc126d719005981e05Hal Finkel                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
134972465ea23d010507d3746adc126d719005981e05Hal Finkel                      DenseMap<VPPair, unsigned> &PairConnectionTypes) {
1350de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
1351de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel         PE = PairableInsts.end(); PI != PE; ++PI) {
1352de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      VPIteratorPair choiceRange = CandidatePairs.equal_range(*PI);
1353de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1354de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      for (std::multimap<Value *, Value *>::iterator P = choiceRange.first;
1355de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel           P != choiceRange.second; ++P)
1356b1a82589339fed148c12b052d30861a539552f1aHal Finkel        computePairsConnectedTo(CandidatePairs, CandidatePairsSet,
1357b1a82589339fed148c12b052d30861a539552f1aHal Finkel                                PairableInsts, ConnectedPairs,
1358b1a82589339fed148c12b052d30861a539552f1aHal Finkel                                PairConnectionTypes, *P);
1359de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
1360de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1361de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    DEBUG(dbgs() << "BBV: found " << ConnectedPairs.size()
1362de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                 << " pair connections.\n");
1363de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  }
1364de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1365de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // This function builds a set of use tuples such that <A, B> is in the set
1366de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // if B is in the use tree of A. If B is in the use tree of A, then B
1367de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // depends on the output of A.
1368de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  void BBVectorize::buildDepMap(
1369de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      BasicBlock &BB,
1370de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::multimap<Value *, Value *> &CandidatePairs,
1371de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::vector<Value *> &PairableInsts,
1372de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      DenseSet<ValuePair> &PairableInstUsers) {
1373de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    DenseSet<Value *> IsInPair;
1374de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    for (std::multimap<Value *, Value *>::iterator C = CandidatePairs.begin(),
1375de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel         E = CandidatePairs.end(); C != E; ++C) {
1376de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      IsInPair.insert(C->first);
1377de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      IsInPair.insert(C->second);
1378de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
1379de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
13807a8aba906416b6998347b52c3c08610fdc190638Hal Finkel    // Iterate through the basic block, recording all users of each
1381de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // pairable instruction.
1382de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1383de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    BasicBlock::iterator E = BB.end();
1384de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    for (BasicBlock::iterator I = BB.getFirstInsertionPt(); I != E; ++I) {
1385de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      if (IsInPair.find(I) == IsInPair.end()) continue;
1386de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1387de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      DenseSet<Value *> Users;
1388de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      AliasSetTracker WriteSet(*AA);
1389de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      for (BasicBlock::iterator J = llvm::next(I); J != E; ++J)
1390de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        (void) trackUsesOfI(Users, WriteSet, I, J);
1391de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1392de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      for (DenseSet<Value *>::iterator U = Users.begin(), E = Users.end();
13938f3359a4b396d3f1a7b2726e02f199be74c62e4cHal Finkel           U != E; ++U) {
13948f3359a4b396d3f1a7b2726e02f199be74c62e4cHal Finkel        if (IsInPair.find(*U) == IsInPair.end()) continue;
1395de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        PairableInstUsers.insert(ValuePair(I, *U));
13968f3359a4b396d3f1a7b2726e02f199be74c62e4cHal Finkel      }
1397de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
1398de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  }
1399de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1400de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // Returns true if an input to pair P is an output of pair Q and also an
1401de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // input of pair Q is an output of pair P. If this is the case, then these
1402de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // two pairs cannot be simultaneously fused.
1403de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  bool BBVectorize::pairsConflict(ValuePair P, ValuePair Q,
1404de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     DenseSet<ValuePair> &PairableInstUsers,
1405da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel                     std::multimap<ValuePair, ValuePair> *PairableInstUserMap,
1406da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel                     DenseSet<VPPair> *PairableInstUserPairSet) {
1407de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // Two pairs are in conflict if they are mutual Users of eachother.
1408de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    bool QUsesP = PairableInstUsers.count(ValuePair(P.first,  Q.first))  ||
1409de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                  PairableInstUsers.count(ValuePair(P.first,  Q.second)) ||
1410de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                  PairableInstUsers.count(ValuePair(P.second, Q.first))  ||
1411de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                  PairableInstUsers.count(ValuePair(P.second, Q.second));
1412de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    bool PUsesQ = PairableInstUsers.count(ValuePair(Q.first,  P.first))  ||
1413de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                  PairableInstUsers.count(ValuePair(Q.first,  P.second)) ||
1414de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                  PairableInstUsers.count(ValuePair(Q.second, P.first))  ||
1415de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                  PairableInstUsers.count(ValuePair(Q.second, P.second));
1416de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    if (PairableInstUserMap) {
1417de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // FIXME: The expensive part of the cycle check is not so much the cycle
1418de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // check itself but this edge insertion procedure. This needs some
1419de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // profiling and probably a different data structure (same is true of
1420de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // most uses of std::multimap).
1421de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      if (PUsesQ) {
1422da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel        if (PairableInstUserPairSet->insert(VPPair(Q, P)).second)
1423de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          PairableInstUserMap->insert(VPPair(Q, P));
1424de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
1425de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      if (QUsesP) {
1426da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel        if (PairableInstUserPairSet->insert(VPPair(P, Q)).second)
1427de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          PairableInstUserMap->insert(VPPair(P, Q));
1428de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
1429de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
1430de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1431de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    return (QUsesP && PUsesQ);
1432de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  }
1433de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1434de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // This function walks the use graph of current pairs to see if, starting
1435de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // from P, the walk returns to P.
1436de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  bool BBVectorize::pairWillFormCycle(ValuePair P,
1437de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                       std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
1438de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                       DenseSet<ValuePair> &CurrentPairs) {
1439de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    DEBUG(if (DebugCycleCheck)
1440de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel            dbgs() << "BBV: starting cycle check for : " << *P.first << " <-> "
1441de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                   << *P.second << "\n");
1442de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // A lookup table of visisted pairs is kept because the PairableInstUserMap
1443de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // contains non-direct associations.
1444de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    DenseSet<ValuePair> Visited;
144535564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel    SmallVector<ValuePair, 32> Q;
1446de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // General depth-first post-order traversal:
1447de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    Q.push_back(P);
144835564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel    do {
144935564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel      ValuePair QTop = Q.pop_back_val();
1450de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      Visited.insert(QTop);
1451de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1452de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      DEBUG(if (DebugCycleCheck)
1453de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel              dbgs() << "BBV: cycle check visiting: " << *QTop.first << " <-> "
1454de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     << *QTop.second << "\n");
1455de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      VPPIteratorPair QPairRange = PairableInstUserMap.equal_range(QTop);
1456de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      for (std::multimap<ValuePair, ValuePair>::iterator C = QPairRange.first;
1457de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel           C != QPairRange.second; ++C) {
1458de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        if (C->second == P) {
1459de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          DEBUG(dbgs()
1460de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                 << "BBV: rejected to prevent non-trivial cycle formation: "
1461de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                 << *C->first.first << " <-> " << *C->first.second << "\n");
1462de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          return true;
1463de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        }
1464de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
14650b2500c504156c45cd71817a9ef6749b6cde5703David Blaikie        if (CurrentPairs.count(C->second) && !Visited.count(C->second))
1466de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          Q.push_back(C->second);
1467de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
146835564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel    } while (!Q.empty());
1469de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1470de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    return false;
1471de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  }
1472de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1473de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // This function builds the initial tree of connected pairs with the
1474de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // pair J at the root.
1475de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  void BBVectorize::buildInitialTreeFor(
1476de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::multimap<Value *, Value *> &CandidatePairs,
1477b1a82589339fed148c12b052d30861a539552f1aHal Finkel                      DenseSet<ValuePair> &CandidatePairsSet,
1478de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::vector<Value *> &PairableInsts,
1479de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
1480de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      DenseSet<ValuePair> &PairableInstUsers,
1481de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      DenseMap<Value *, Value *> &ChosenPairs,
1482de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      DenseMap<ValuePair, size_t> &Tree, ValuePair J) {
1483de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // Each of these pairs is viewed as the root node of a Tree. The Tree
1484de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // is then walked (depth-first). As this happens, we keep track of
1485de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // the pairs that compose the Tree and the maximum depth of the Tree.
148635564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel    SmallVector<ValuePairWithDepth, 32> Q;
1487de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // General depth-first post-order traversal:
1488de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first)));
148935564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel    do {
1490de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      ValuePairWithDepth QTop = Q.back();
1491de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1492de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // Push each child onto the queue:
1493de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      bool MoreChildren = false;
1494de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      size_t MaxChildDepth = QTop.second;
1495de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      VPPIteratorPair qtRange = ConnectedPairs.equal_range(QTop.first);
1496478eed85f96f0d93da43e26cfb7fc6dee981c9aaNAKAMURA Takumi      for (std::multimap<ValuePair, ValuePair>::iterator k = qtRange.first;
1497de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel           k != qtRange.second; ++k) {
1498de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // Make sure that this child pair is still a candidate:
1499b1a82589339fed148c12b052d30861a539552f1aHal Finkel        if (CandidatePairsSet.count(ValuePair(k->second))) {
1500de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          DenseMap<ValuePair, size_t>::iterator C = Tree.find(k->second);
1501de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          if (C == Tree.end()) {
1502de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel            size_t d = getDepthFactor(k->second.first);
1503de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel            Q.push_back(ValuePairWithDepth(k->second, QTop.second+d));
1504de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel            MoreChildren = true;
1505de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          } else {
1506de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel            MaxChildDepth = std::max(MaxChildDepth, C->second);
1507de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          }
1508de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        }
1509de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
1510de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1511de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      if (!MoreChildren) {
1512de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // Record the current pair as part of the Tree:
1513de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        Tree.insert(ValuePairWithDepth(QTop.first, MaxChildDepth));
1514de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        Q.pop_back();
1515de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
151635564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel    } while (!Q.empty());
1517de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  }
1518de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1519de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // Given some initial tree, prune it by removing conflicting pairs (pairs
1520de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // that cannot be simultaneously chosen for vectorization).
1521de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  void BBVectorize::pruneTreeFor(
1522de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::multimap<Value *, Value *> &CandidatePairs,
1523de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::vector<Value *> &PairableInsts,
1524de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
1525de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      DenseSet<ValuePair> &PairableInstUsers,
1526de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
1527da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel                      DenseSet<VPPair> &PairableInstUserPairSet,
1528de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      DenseMap<Value *, Value *> &ChosenPairs,
1529de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      DenseMap<ValuePair, size_t> &Tree,
1530de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      DenseSet<ValuePair> &PrunedTree, ValuePair J,
1531de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      bool UseCycleCheck) {
153235564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel    SmallVector<ValuePairWithDepth, 32> Q;
1533de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // General depth-first post-order traversal:
1534de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first)));
153535564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel    do {
153635564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel      ValuePairWithDepth QTop = Q.pop_back_val();
1537de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      PrunedTree.insert(QTop.first);
1538de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1539de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // Visit each child, pruning as necessary...
154097d19ebe5b5bf27617e536a16fa232116cefe914Hal Finkel      SmallVector<ValuePairWithDepth, 8> BestChildren;
1541de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      VPPIteratorPair QTopRange = ConnectedPairs.equal_range(QTop.first);
1542478eed85f96f0d93da43e26cfb7fc6dee981c9aaNAKAMURA Takumi      for (std::multimap<ValuePair, ValuePair>::iterator K = QTopRange.first;
1543de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel           K != QTopRange.second; ++K) {
1544de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        DenseMap<ValuePair, size_t>::iterator C = Tree.find(K->second);
1545de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        if (C == Tree.end()) continue;
1546de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1547de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // This child is in the Tree, now we need to make sure it is the
1548de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // best of any conflicting children. There could be multiple
1549de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // conflicting children, so first, determine if we're keeping
1550de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // this child, then delete conflicting children as necessary.
1551de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1552de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // It is also necessary to guard against pairing-induced
1553de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // dependencies. Consider instructions a .. x .. y .. b
1554de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // such that (a,b) are to be fused and (x,y) are to be fused
1555de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // but a is an input to x and b is an output from y. This
1556de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // means that y cannot be moved after b but x must be moved
1557de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // after b for (a,b) to be fused. In other words, after
1558de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // fusing (a,b) we have y .. a/b .. x where y is an input
1559de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // to a/b and x is an output to a/b: x and y can no longer
1560de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // be legally fused. To prevent this condition, we must
1561de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // make sure that a child pair added to the Tree is not
1562de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // both an input and output of an already-selected pair.
1563de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1564de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // Pairing-induced dependencies can also form from more complicated
1565de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // cycles. The pair vs. pair conflicts are easy to check, and so
1566de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // that is done explicitly for "fast rejection", and because for
1567de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // child vs. child conflicts, we may prefer to keep the current
1568de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // pair in preference to the already-selected child.
1569de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        DenseSet<ValuePair> CurrentPairs;
1570de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1571de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        bool CanAdd = true;
157297d19ebe5b5bf27617e536a16fa232116cefe914Hal Finkel        for (SmallVector<ValuePairWithDepth, 8>::iterator C2
157343ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop              = BestChildren.begin(), E2 = BestChildren.end();
1574de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel             C2 != E2; ++C2) {
1575de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          if (C2->first.first == C->first.first ||
1576de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel              C2->first.first == C->first.second ||
1577de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel              C2->first.second == C->first.first ||
1578de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel              C2->first.second == C->first.second ||
1579de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel              pairsConflict(C2->first, C->first, PairableInstUsers,
1580da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel                            UseCycleCheck ? &PairableInstUserMap : 0,
1581da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel                            UseCycleCheck ? &PairableInstUserPairSet : 0)) {
1582de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel            if (C2->second >= C->second) {
1583de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel              CanAdd = false;
1584de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel              break;
1585de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel            }
1586de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1587de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel            CurrentPairs.insert(C2->first);
1588de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          }
1589de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        }
1590de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        if (!CanAdd) continue;
1591de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1592de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // Even worse, this child could conflict with another node already
1593de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // selected for the Tree. If that is the case, ignore this child.
1594de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        for (DenseSet<ValuePair>::iterator T = PrunedTree.begin(),
1595de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel             E2 = PrunedTree.end(); T != E2; ++T) {
1596de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          if (T->first == C->first.first ||
1597de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel              T->first == C->first.second ||
1598de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel              T->second == C->first.first ||
1599de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel              T->second == C->first.second ||
1600de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel              pairsConflict(*T, C->first, PairableInstUsers,
1601da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel                            UseCycleCheck ? &PairableInstUserMap : 0,
1602da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel                            UseCycleCheck ? &PairableInstUserPairSet : 0)) {
1603de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel            CanAdd = false;
1604de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel            break;
1605de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          }
1606de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1607de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          CurrentPairs.insert(*T);
1608de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        }
1609de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        if (!CanAdd) continue;
1610de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1611de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // And check the queue too...
161235564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel        for (SmallVector<ValuePairWithDepth, 32>::iterator C2 = Q.begin(),
1613de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel             E2 = Q.end(); C2 != E2; ++C2) {
1614de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          if (C2->first.first == C->first.first ||
1615de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel              C2->first.first == C->first.second ||
1616de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel              C2->first.second == C->first.first ||
1617de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel              C2->first.second == C->first.second ||
1618de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel              pairsConflict(C2->first, C->first, PairableInstUsers,
1619da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel                            UseCycleCheck ? &PairableInstUserMap : 0,
1620da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel                            UseCycleCheck ? &PairableInstUserPairSet : 0)) {
1621de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel            CanAdd = false;
1622de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel            break;
1623de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          }
1624de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1625de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          CurrentPairs.insert(C2->first);
1626de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        }
1627de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        if (!CanAdd) continue;
1628de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1629de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // Last but not least, check for a conflict with any of the
1630de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // already-chosen pairs.
1631de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        for (DenseMap<Value *, Value *>::iterator C2 =
1632de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel              ChosenPairs.begin(), E2 = ChosenPairs.end();
1633de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel             C2 != E2; ++C2) {
1634de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          if (pairsConflict(*C2, C->first, PairableInstUsers,
1635da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel                            UseCycleCheck ? &PairableInstUserMap : 0,
1636da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel                            UseCycleCheck ? &PairableInstUserPairSet : 0)) {
1637de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel            CanAdd = false;
1638de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel            break;
1639de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          }
1640de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1641de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          CurrentPairs.insert(*C2);
1642de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        }
1643de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        if (!CanAdd) continue;
1644de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
16451230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop        // To check for non-trivial cycles formed by the addition of the
16461230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop        // current pair we've formed a list of all relevant pairs, now use a
16471230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop        // graph walk to check for a cycle. We start from the current pair and
16481230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop        // walk the use tree to see if we again reach the current pair. If we
16491230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop        // do, then the current pair is rejected.
1650de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1651de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // FIXME: It may be more efficient to use a topological-ordering
1652de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // algorithm to improve the cycle check. This should be investigated.
1653de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        if (UseCycleCheck &&
1654de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel            pairWillFormCycle(C->first, PairableInstUserMap, CurrentPairs))
1655de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          continue;
1656de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1657de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // This child can be added, but we may have chosen it in preference
1658de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // to an already-selected child. Check for this here, and if a
1659de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // conflict is found, then remove the previously-selected child
1660de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // before adding this one in its place.
166197d19ebe5b5bf27617e536a16fa232116cefe914Hal Finkel        for (SmallVector<ValuePairWithDepth, 8>::iterator C2
166243ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop              = BestChildren.begin(); C2 != BestChildren.end();) {
1663de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          if (C2->first.first == C->first.first ||
1664de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel              C2->first.first == C->first.second ||
1665de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel              C2->first.second == C->first.first ||
1666de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel              C2->first.second == C->first.second ||
1667de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel              pairsConflict(C2->first, C->first, PairableInstUsers))
1668d7a3425f06d51ed579bd9aefeb835b7fa4ce7849Hal Finkel            C2 = BestChildren.erase(C2);
1669de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          else
1670de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel            ++C2;
1671de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        }
1672de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1673d7a3425f06d51ed579bd9aefeb835b7fa4ce7849Hal Finkel        BestChildren.push_back(ValuePairWithDepth(C->first, C->second));
1674de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
1675de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
167697d19ebe5b5bf27617e536a16fa232116cefe914Hal Finkel      for (SmallVector<ValuePairWithDepth, 8>::iterator C
167743ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop            = BestChildren.begin(), E2 = BestChildren.end();
1678de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel           C != E2; ++C) {
1679de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        size_t DepthF = getDepthFactor(C->first.first);
1680de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        Q.push_back(ValuePairWithDepth(C->first, QTop.second+DepthF));
1681de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
168235564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel    } while (!Q.empty());
1683de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  }
1684de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1685de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // This function finds the best tree of mututally-compatible connected
1686de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // pairs, given the choice of root pairs as an iterator range.
1687de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  void BBVectorize::findBestTreeFor(
1688de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::multimap<Value *, Value *> &CandidatePairs,
1689b1a82589339fed148c12b052d30861a539552f1aHal Finkel                      DenseSet<ValuePair> &CandidatePairsSet,
169065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel                      DenseMap<ValuePair, int> &CandidatePairCostSavings,
1691de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::vector<Value *> &PairableInsts,
169286ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel                      DenseSet<ValuePair> &FixedOrderPairs,
169386ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel                      DenseMap<VPPair, unsigned> &PairConnectionTypes,
1694de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
169586ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel                      std::multimap<ValuePair, ValuePair> &ConnectedPairDeps,
1696de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      DenseSet<ValuePair> &PairableInstUsers,
1697de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
1698da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel                      DenseSet<VPPair> &PairableInstUserPairSet,
1699de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      DenseMap<Value *, Value *> &ChosenPairs,
1700de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth,
170165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel                      int &BestEffSize, VPIteratorPair ChoiceRange,
1702de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      bool UseCycleCheck) {
1703de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    for (std::multimap<Value *, Value *>::iterator J = ChoiceRange.first;
1704de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel         J != ChoiceRange.second; ++J) {
1705de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1706de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // Before going any further, make sure that this pair does not
1707de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // conflict with any already-selected pairs (see comment below
1708de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // near the Tree pruning for more details).
1709de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      DenseSet<ValuePair> ChosenPairSet;
1710de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      bool DoesConflict = false;
1711de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      for (DenseMap<Value *, Value *>::iterator C = ChosenPairs.begin(),
1712de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel           E = ChosenPairs.end(); C != E; ++C) {
1713de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        if (pairsConflict(*C, *J, PairableInstUsers,
1714da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel                          UseCycleCheck ? &PairableInstUserMap : 0,
1715da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel                          UseCycleCheck ? &PairableInstUserPairSet : 0)) {
1716de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          DoesConflict = true;
1717de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          break;
1718de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        }
1719de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1720de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        ChosenPairSet.insert(*C);
1721de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
1722de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      if (DoesConflict) continue;
1723de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1724de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      if (UseCycleCheck &&
1725de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          pairWillFormCycle(*J, PairableInstUserMap, ChosenPairSet))
1726de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        continue;
1727de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1728de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      DenseMap<ValuePair, size_t> Tree;
1729b1a82589339fed148c12b052d30861a539552f1aHal Finkel      buildInitialTreeFor(CandidatePairs, CandidatePairsSet,
1730b1a82589339fed148c12b052d30861a539552f1aHal Finkel                          PairableInsts, ConnectedPairs,
1731de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                          PairableInstUsers, ChosenPairs, Tree, *J);
1732de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1733de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // Because we'll keep the child with the largest depth, the largest
1734de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // depth is still the same in the unpruned Tree.
1735de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      size_t MaxDepth = Tree.lookup(*J);
1736de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1737de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      DEBUG(if (DebugPairSelection) dbgs() << "BBV: found Tree for pair {"
1738de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                   << *J->first << " <-> " << *J->second << "} of depth " <<
1739de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                   MaxDepth << " and size " << Tree.size() << "\n");
1740de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1741de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // At this point the Tree has been constructed, but, may contain
1742de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // contradictory children (meaning that different children of
1743de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // some tree node may be attempting to fuse the same instruction).
1744de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // So now we walk the tree again, in the case of a conflict,
1745de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // keep only the child with the largest depth. To break a tie,
1746de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // favor the first child.
1747de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
1748de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      DenseSet<ValuePair> PrunedTree;
1749de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      pruneTreeFor(CandidatePairs, PairableInsts, ConnectedPairs,
1750b1a82589339fed148c12b052d30861a539552f1aHal Finkel                   PairableInstUsers, PairableInstUserMap,
1751b1a82589339fed148c12b052d30861a539552f1aHal Finkel                   PairableInstUserPairSet,
1752da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel                   ChosenPairs, Tree, PrunedTree, *J, UseCycleCheck);
1753de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
175465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel      int EffSize = 0;
1755abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth      if (TTI) {
175678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel        DenseSet<Value *> PrunedTreeInstrs;
175778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel        for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(),
175878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel             E = PrunedTree.end(); S != E; ++S) {
175978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel          PrunedTreeInstrs.insert(S->first);
176078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel          PrunedTreeInstrs.insert(S->second);
176178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel        }
176278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel
176378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel        // The set of pairs that have already contributed to the total cost.
176478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel        DenseSet<ValuePair> IncomingPairs;
176578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel
17664387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel        // If the cost model were perfect, this might not be necessary; but we
17674387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel        // need to make sure that we don't get stuck vectorizing our own
17684387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel        // shuffle chains.
17694387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel        bool HasNontrivialInsts = false;
17704387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel
177186ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel        // The node weights represent the cost savings associated with
177286ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel        // fusing the pair of instructions.
177365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(),
177465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel             E = PrunedTree.end(); S != E; ++S) {
17754387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel          if (!isa<ShuffleVectorInst>(S->first) &&
17764387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel              !isa<InsertElementInst>(S->first) &&
17774387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel              !isa<ExtractElementInst>(S->first))
17784387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel            HasNontrivialInsts = true;
17794387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel
178078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel          bool FlipOrder = false;
178178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel
178278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel          if (getDepthFactor(S->first)) {
178378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel            int ESContrib = CandidatePairCostSavings.find(*S)->second;
178478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel            DEBUG(if (DebugPairSelection) dbgs() << "\tweight {"
178578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                   << *S->first << " <-> " << *S->second << "} = " <<
178678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                   ESContrib << "\n");
178778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel            EffSize += ESContrib;
178878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel          }
178986ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel
179078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel          // The edge weights contribute in a negative sense: they represent
179178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel          // the cost of shuffles.
179286ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel          VPPIteratorPair IP = ConnectedPairDeps.equal_range(*S);
179386ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel          if (IP.first != ConnectedPairDeps.end()) {
179486ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel            unsigned NumDepsDirect = 0, NumDepsSwap = 0;
179586ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel            for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first;
179686ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel                 Q != IP.second; ++Q) {
179778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              if (!PrunedTree.count(Q->second))
179878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                continue;
179986ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel              DenseMap<VPPair, unsigned>::iterator R =
180086ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel                PairConnectionTypes.find(VPPair(Q->second, Q->first));
180186ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel              assert(R != PairConnectionTypes.end() &&
180286ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel                     "Cannot find pair connection type");
180386ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel              if (R->second == PairConnectionDirect)
180486ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel                ++NumDepsDirect;
180586ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel              else if (R->second == PairConnectionSwap)
180686ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel                ++NumDepsSwap;
180786ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel            }
180886ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel
180986ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel            // If there are more swaps than direct connections, then
181086ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel            // the pair order will be flipped during fusion. So the real
181186ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel            // number of swaps is the minimum number.
181278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel            FlipOrder = !FixedOrderPairs.count(*S) &&
181386ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel              ((NumDepsSwap > NumDepsDirect) ||
181486ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel                FixedOrderPairs.count(ValuePair(S->second, S->first)));
181586ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel
181686ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel            for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first;
181786ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel                 Q != IP.second; ++Q) {
181878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              if (!PrunedTree.count(Q->second))
181978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                continue;
182086ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel              DenseMap<VPPair, unsigned>::iterator R =
182186ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel                PairConnectionTypes.find(VPPair(Q->second, Q->first));
182286ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel              assert(R != PairConnectionTypes.end() &&
182386ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel                     "Cannot find pair connection type");
182486ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel              Type *Ty1 = Q->second.first->getType(),
182586ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel                   *Ty2 = Q->second.second->getType();
182686ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel              Type *VTy = getVecTypeForPair(Ty1, Ty2);
182786ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel              if ((R->second == PairConnectionDirect && FlipOrder) ||
182886ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel                  (R->second == PairConnectionSwap && !FlipOrder)  ||
182978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                  R->second == PairConnectionSplat) {
183078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                int ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
183178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                                                   VTy, VTy);
1832245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel
1833245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel                if (VTy->getVectorNumElements() == 2) {
1834245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel                  if (R->second == PairConnectionSplat)
1835245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel                    ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
1836245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel                      TargetTransformInfo::SK_Broadcast, VTy));
1837245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel                  else
1838245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel                    ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
1839245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel                      TargetTransformInfo::SK_Reverse, VTy));
1840245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel                }
1841245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel
184278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" <<
184378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                  *Q->second.first << " <-> " << *Q->second.second <<
184478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                    "} -> {" <<
184578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                  *S->first << " <-> " << *S->second << "} = " <<
184678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                   ESContrib << "\n");
184778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                EffSize -= ESContrib;
184878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              }
184978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel            }
185078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel          }
185178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel
185278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel          // Compute the cost of outgoing edges. We assume that edges outgoing
185378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel          // to shuffles, inserts or extracts can be merged, and so contribute
185478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel          // no additional cost.
185578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel          if (!S->first->getType()->isVoidTy()) {
185678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel            Type *Ty1 = S->first->getType(),
185778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                 *Ty2 = S->second->getType();
185878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel            Type *VTy = getVecTypeForPair(Ty1, Ty2);
185978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel
186078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel            bool NeedsExtraction = false;
186178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel            for (Value::use_iterator I = S->first->use_begin(),
186278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                 IE = S->first->use_end(); I != IE; ++I) {
186386c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel              if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(*I)) {
186486c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                // Shuffle can be folded if it has no other input
186586c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                if (isa<UndefValue>(SI->getOperand(1)))
186686c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                  continue;
186786c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel              }
186886c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel              if (isa<ExtractElementInst>(*I))
186978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                continue;
187078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              if (PrunedTreeInstrs.count(*I))
187178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                continue;
187278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              NeedsExtraction = true;
187378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              break;
187478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel            }
187578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel
187678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel            if (NeedsExtraction) {
187778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              int ESContrib;
1878245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel              if (Ty1->isVectorTy()) {
187978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
188078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                                               Ty1, VTy);
1881245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel                ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
1882245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel                  TargetTransformInfo::SK_ExtractSubvector, VTy, 0, Ty1));
1883245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel              } else
1884abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth                ESContrib = (int) TTI->getVectorInstrCost(
188578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                                    Instruction::ExtractElement, VTy, 0);
188678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel
188778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" <<
188878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                *S->first << "} = " << ESContrib << "\n");
188978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              EffSize -= ESContrib;
189078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel            }
189178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel
189278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel            NeedsExtraction = false;
189378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel            for (Value::use_iterator I = S->second->use_begin(),
189478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                 IE = S->second->use_end(); I != IE; ++I) {
189586c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel              if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(*I)) {
189686c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                // Shuffle can be folded if it has no other input
189786c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                if (isa<UndefValue>(SI->getOperand(1)))
189886c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                  continue;
189986c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel              }
190086c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel              if (isa<ExtractElementInst>(*I))
190178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                continue;
190278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              if (PrunedTreeInstrs.count(*I))
190378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                continue;
190478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              NeedsExtraction = true;
190578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              break;
190678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel            }
190778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel
190878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel            if (NeedsExtraction) {
190978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              int ESContrib;
1910245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel              if (Ty2->isVectorTy()) {
191178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
191278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                                               Ty2, VTy);
1913245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel                ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
1914245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel                  TargetTransformInfo::SK_ExtractSubvector, VTy,
1915245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel                  Ty1->isVectorTy() ? Ty1->getVectorNumElements() : 1, Ty2));
1916245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel              } else
1917abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth                ESContrib = (int) TTI->getVectorInstrCost(
191878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                                    Instruction::ExtractElement, VTy, 1);
191978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" <<
192078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                *S->second << "} = " << ESContrib << "\n");
192178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              EffSize -= ESContrib;
192278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel            }
192378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel          }
192478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel
192578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel          // Compute the cost of incoming edges.
192678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel          if (!isa<LoadInst>(S->first) && !isa<StoreInst>(S->first)) {
192778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel            Instruction *S1 = cast<Instruction>(S->first),
192878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                        *S2 = cast<Instruction>(S->second);
192978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel            for (unsigned o = 0; o < S1->getNumOperands(); ++o) {
193078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              Value *O1 = S1->getOperand(o), *O2 = S2->getOperand(o);
193178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel
193278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              // Combining constants into vector constants (or small vector
193378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              // constants into larger ones are assumed free).
193478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              if (isa<Constant>(O1) && isa<Constant>(O2))
193578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                continue;
193678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel
193778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              if (FlipOrder)
193878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                std::swap(O1, O2);
193978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel
194078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              ValuePair VP  = ValuePair(O1, O2);
194178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              ValuePair VPR = ValuePair(O2, O1);
194278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel
194378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              // Internal edges are not handled here.
194478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              if (PrunedTree.count(VP) || PrunedTree.count(VPR))
194578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                continue;
194678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel
194778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              Type *Ty1 = O1->getType(),
194878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                   *Ty2 = O2->getType();
194978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              Type *VTy = getVecTypeForPair(Ty1, Ty2);
195078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel
195178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              // Combining vector operations of the same type is also assumed
195278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              // folded with other operations.
195386c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel              if (Ty1 == Ty2) {
195486c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                // If both are insert elements, then both can be widened.
1955b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel                InsertElementInst *IEO1 = dyn_cast<InsertElementInst>(O1),
1956b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel                                  *IEO2 = dyn_cast<InsertElementInst>(O2);
1957b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel                if (IEO1 && IEO2 && isPureIEChain(IEO1) && isPureIEChain(IEO2))
195886c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                  continue;
195986c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                // If both are extract elements, and both have the same input
196086c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                // type, then they can be replaced with a shuffle
196186c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                ExtractElementInst *EIO1 = dyn_cast<ExtractElementInst>(O1),
196286c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                                   *EIO2 = dyn_cast<ExtractElementInst>(O2);
196386c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                if (EIO1 && EIO2 &&
196486c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                    EIO1->getOperand(0)->getType() ==
196586c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                      EIO2->getOperand(0)->getType())
196686c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                  continue;
196786c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                // If both are a shuffle with equal operand types and only two
196886c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                // unqiue operands, then they can be replaced with a single
196986c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                // shuffle
197086c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                ShuffleVectorInst *SIO1 = dyn_cast<ShuffleVectorInst>(O1),
197186c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                                  *SIO2 = dyn_cast<ShuffleVectorInst>(O2);
197286c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                if (SIO1 && SIO2 &&
197386c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                    SIO1->getOperand(0)->getType() ==
197486c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                      SIO2->getOperand(0)->getType()) {
197586c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                  SmallSet<Value *, 4> SIOps;
197686c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                  SIOps.insert(SIO1->getOperand(0));
197786c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                  SIOps.insert(SIO1->getOperand(1));
197886c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                  SIOps.insert(SIO2->getOperand(0));
197986c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                  SIOps.insert(SIO2->getOperand(1));
198086c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                  if (SIOps.size() <= 2)
198186c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                    continue;
198286c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel                }
198386c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel              }
198478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel
198578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              int ESContrib;
198678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              // This pair has already been formed.
198778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              if (IncomingPairs.count(VP)) {
198878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                continue;
198978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              } else if (IncomingPairs.count(VPR)) {
199078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
199178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                                               VTy, VTy);
1992245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel
1993245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel                if (VTy->getVectorNumElements() == 2)
1994245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel                  ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
1995245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel                    TargetTransformInfo::SK_Reverse, VTy));
199678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              } else if (!Ty1->isVectorTy() && !Ty2->isVectorTy()) {
1997abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth                ESContrib = (int) TTI->getVectorInstrCost(
199878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                                    Instruction::InsertElement, VTy, 0);
1999abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth                ESContrib += (int) TTI->getVectorInstrCost(
200078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                                     Instruction::InsertElement, VTy, 1);
200178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              } else if (!Ty1->isVectorTy()) {
200278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                // O1 needs to be inserted into a vector of size O2, and then
200378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                // both need to be shuffled together.
2004abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth                ESContrib = (int) TTI->getVectorInstrCost(
200578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                                    Instruction::InsertElement, Ty2, 0);
200678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
200778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                                                VTy, Ty2);
200878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              } else if (!Ty2->isVectorTy()) {
200978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                // O2 needs to be inserted into a vector of size O1, and then
201078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                // both need to be shuffled together.
2011abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth                ESContrib = (int) TTI->getVectorInstrCost(
201278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                                    Instruction::InsertElement, Ty1, 0);
201378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
201478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                                                VTy, Ty1);
201578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              } else {
201678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                Type *TyBig = Ty1, *TySmall = Ty2;
201778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                if (Ty2->getVectorNumElements() > Ty1->getVectorNumElements())
201878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                  std::swap(TyBig, TySmall);
201978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel
202078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
202178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                                               VTy, TyBig);
202278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                if (TyBig != TySmall)
202378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                  ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
202478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                                                  TyBig, TySmall);
202578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              }
202678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel
202778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              DEBUG(if (DebugPairSelection) dbgs() << "\tcost {"
202878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                     << *O1 << " <-> " << *O2 << "} = " <<
202978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel                     ESContrib << "\n");
203078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              EffSize -= ESContrib;
203178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel              IncomingPairs.insert(VP);
203286ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel            }
203386ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel          }
203465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        }
20354387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel
20364387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel        if (!HasNontrivialInsts) {
20374387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel          DEBUG(if (DebugPairSelection) dbgs() <<
20384387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel                "\tNo non-trivial instructions in tree;"
20394387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel                " override to zero effective size\n");
20404387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel          EffSize = 0;
20414387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel        }
204265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel      } else {
204365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel        for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(),
204465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel             E = PrunedTree.end(); S != E; ++S)
204565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel          EffSize += (int) getDepthFactor(S->first);
204665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel      }
2047de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2048de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      DEBUG(if (DebugPairSelection)
2049de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel             dbgs() << "BBV: found pruned Tree for pair {"
2050de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel             << *J->first << " <-> " << *J->second << "} of depth " <<
2051de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel             MaxDepth << " and size " << PrunedTree.size() <<
2052de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel            " (effective size: " << EffSize << ")\n");
2053abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth      if (((TTI && !UseChainDepthWithTI) ||
205478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel            MaxDepth >= Config.ReqChainDepth) &&
205565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel          EffSize > 0 && EffSize > BestEffSize) {
2056de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        BestMaxDepth = MaxDepth;
2057de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        BestEffSize = EffSize;
2058de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        BestTree = PrunedTree;
2059de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
2060de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
2061de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  }
2062de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2063de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // Given the list of candidate pairs, this function selects those
2064de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // that will be fused into vector instructions.
2065de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  void BBVectorize::choosePairs(
2066de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::multimap<Value *, Value *> &CandidatePairs,
2067b1a82589339fed148c12b052d30861a539552f1aHal Finkel                      DenseSet<ValuePair> &CandidatePairsSet,
206865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel                      DenseMap<ValuePair, int> &CandidatePairCostSavings,
2069de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::vector<Value *> &PairableInsts,
207086ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel                      DenseSet<ValuePair> &FixedOrderPairs,
207186ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel                      DenseMap<VPPair, unsigned> &PairConnectionTypes,
2072de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
207386ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel                      std::multimap<ValuePair, ValuePair> &ConnectedPairDeps,
2074de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      DenseSet<ValuePair> &PairableInstUsers,
2075de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      DenseMap<Value *, Value *>& ChosenPairs) {
2076bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng    bool UseCycleCheck =
2077bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng     CandidatePairs.size() <= Config.MaxCandPairsForCycleCheck;
2078de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    std::multimap<ValuePair, ValuePair> PairableInstUserMap;
2079da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel    DenseSet<VPPair> PairableInstUserPairSet;
2080de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    for (std::vector<Value *>::iterator I = PairableInsts.begin(),
2081de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel         E = PairableInsts.end(); I != E; ++I) {
2082de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // The number of possible pairings for this variable:
2083de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      size_t NumChoices = CandidatePairs.count(*I);
2084de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      if (!NumChoices) continue;
2085de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2086de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      VPIteratorPair ChoiceRange = CandidatePairs.equal_range(*I);
2087de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2088de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // The best pair to choose and its tree:
208965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel      size_t BestMaxDepth = 0;
209065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel      int BestEffSize = 0;
2091de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      DenseSet<ValuePair> BestTree;
2092b1a82589339fed148c12b052d30861a539552f1aHal Finkel      findBestTreeFor(CandidatePairs, CandidatePairsSet,
2093b1a82589339fed148c12b052d30861a539552f1aHal Finkel                      CandidatePairCostSavings,
209486ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel                      PairableInsts, FixedOrderPairs, PairConnectionTypes,
209586ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel                      ConnectedPairs, ConnectedPairDeps,
2096da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel                      PairableInstUsers, PairableInstUserMap,
2097da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel                      PairableInstUserPairSet, ChosenPairs,
2098de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      BestTree, BestMaxDepth, BestEffSize, ChoiceRange,
2099de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                      UseCycleCheck);
2100de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2101de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // A tree has been chosen (or not) at this point. If no tree was
2102de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // chosen, then this instruction, I, cannot be paired (and is no longer
2103de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // considered).
2104de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2105de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      DEBUG(if (BestTree.size() > 0)
2106de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel              dbgs() << "BBV: selected pairs in the best tree for: "
2107de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     << *cast<Instruction>(*I) << "\n");
2108de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2109de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      for (DenseSet<ValuePair>::iterator S = BestTree.begin(),
2110de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel           SE2 = BestTree.end(); S != SE2; ++S) {
2111de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // Insert the members of this tree into the list of chosen pairs.
2112de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        ChosenPairs.insert(ValuePair(S->first, S->second));
2113de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        DEBUG(dbgs() << "BBV: selected pair: " << *S->first << " <-> " <<
2114de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel               *S->second << "\n");
2115de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2116de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // Remove all candidate pairs that have values in the chosen tree.
2117de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        for (std::multimap<Value *, Value *>::iterator K =
2118de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel               CandidatePairs.begin(); K != CandidatePairs.end();) {
2119de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          if (K->first == S->first || K->second == S->first ||
2120de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel              K->second == S->second || K->first == S->second) {
2121de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel            // Don't remove the actual pair chosen so that it can be used
2122de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel            // in subsequent tree selections.
2123b1a82589339fed148c12b052d30861a539552f1aHal Finkel            if (!(K->first == S->first && K->second == S->second)) {
2124b1a82589339fed148c12b052d30861a539552f1aHal Finkel              CandidatePairsSet.erase(*K);
2125de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel              CandidatePairs.erase(K++);
2126b1a82589339fed148c12b052d30861a539552f1aHal Finkel            } else
2127de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel              ++K;
2128de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          } else {
2129de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel            ++K;
2130de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          }
2131de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        }
2132de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
2133de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
2134de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2135de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    DEBUG(dbgs() << "BBV: selected " << ChosenPairs.size() << " pairs.\n");
2136de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  }
2137de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2138de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  std::string getReplacementName(Instruction *I, bool IsInput, unsigned o,
2139de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     unsigned n = 0) {
2140de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    if (!I->hasName())
2141de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      return "";
2142de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2143de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    return (I->getName() + (IsInput ? ".v.i" : ".v.r") + utostr(o) +
2144de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel             (n > 0 ? "." + utostr(n) : "")).str();
2145de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  }
2146de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2147de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // Returns the value that is to be used as the pointer input to the vector
2148de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // instruction that fuses I with J.
2149de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  Value *BBVectorize::getReplacementPointerInput(LLVMContext& Context,
2150202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel                     Instruction *I, Instruction *J, unsigned o) {
2151de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    Value *IPtr, *JPtr;
215265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel    unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace;
2153de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    int64_t OffsetInElmts;
2154282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel
2155202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel    // Note: the analysis might fail here, that is why the pair order has
2156282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel    // been precomputed (OffsetInElmts must be unused here).
2157de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    (void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
215865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel                          IAddressSpace, JAddressSpace,
215993f6f457614299eee3d22f376ab8f42a130f1912Hal Finkel                          OffsetInElmts, false);
2160de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2161de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // The pointer value is taken to be the one with the lowest offset.
2162202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel    Value *VPtr = IPtr;
2163de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
216464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    Type *ArgTypeI = cast<PointerType>(IPtr->getType())->getElementType();
216564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    Type *ArgTypeJ = cast<PointerType>(JPtr->getType())->getElementType();
216664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
2167de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    Type *VArgPtrType = PointerType::get(VArgType,
2168de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      cast<PointerType>(IPtr->getType())->getAddressSpace());
2169de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    return new BitCastInst(VPtr, VArgPtrType, getReplacementName(I, true, o),
2170202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel                        /* insert before */ I);
2171de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  }
2172de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2173de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  void BBVectorize::fillNewShuffleMask(LLVMContext& Context, Instruction *J,
217464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                     unsigned MaskOffset, unsigned NumInElem,
217564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                     unsigned NumInElem1, unsigned IdxOffset,
217664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                     std::vector<Constant*> &Mask) {
217764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    unsigned NumElem1 = cast<VectorType>(J->getType())->getNumElements();
217864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    for (unsigned v = 0; v < NumElem1; ++v) {
2179de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      int m = cast<ShuffleVectorInst>(J)->getMaskValue(v);
2180de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      if (m < 0) {
2181de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        Mask[v+MaskOffset] = UndefValue::get(Type::getInt32Ty(Context));
2182de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      } else {
2183de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        unsigned mm = m + (int) IdxOffset;
218464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        if (m >= (int) NumInElem1)
2185de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          mm += (int) NumInElem;
2186de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2187de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        Mask[v+MaskOffset] =
2188de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          ConstantInt::get(Type::getInt32Ty(Context), mm);
2189de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
2190de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
2191de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  }
2192de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2193de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // Returns the value that is to be used as the vector-shuffle mask to the
2194de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // vector instruction that fuses I with J.
2195de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  Value *BBVectorize::getReplacementShuffleMask(LLVMContext& Context,
2196de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     Instruction *I, Instruction *J) {
2197de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // This is the shuffle mask. We need to append the second
2198de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // mask to the first, and the numbers need to be adjusted.
2199de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
220064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    Type *ArgTypeI = I->getType();
220164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    Type *ArgTypeJ = J->getType();
220264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
220364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
220464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    unsigned NumElemI = cast<VectorType>(ArgTypeI)->getNumElements();
2205de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2206de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // Get the total number of elements in the fused vector type.
2207de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // By definition, this must equal the number of elements in
2208de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // the final mask.
2209de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    unsigned NumElem = cast<VectorType>(VArgType)->getNumElements();
2210de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    std::vector<Constant*> Mask(NumElem);
2211de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
221264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    Type *OpTypeI = I->getOperand(0)->getType();
221364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    unsigned NumInElemI = cast<VectorType>(OpTypeI)->getNumElements();
221464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    Type *OpTypeJ = J->getOperand(0)->getType();
221564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    unsigned NumInElemJ = cast<VectorType>(OpTypeJ)->getNumElements();
221664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
221764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    // The fused vector will be:
221864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    // -----------------------------------------------------
221964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    // | NumInElemI | NumInElemJ | NumInElemI | NumInElemJ |
222064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    // -----------------------------------------------------
222164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    // from which we'll extract NumElem total elements (where the first NumElemI
222264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    // of them come from the mask in I and the remainder come from the mask
222364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    // in J.
2224de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2225de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // For the mask from the first pair...
222664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    fillNewShuffleMask(Context, I, 0,        NumInElemJ, NumInElemI,
222764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                       0,          Mask);
2228de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2229de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // For the mask from the second pair...
223064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    fillNewShuffleMask(Context, J, NumElemI, NumInElemI, NumInElemJ,
223164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                       NumInElemI, Mask);
2232de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2233de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    return ConstantVector::get(Mask);
2234de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  }
2235de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
223664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel  bool BBVectorize::expandIEChain(LLVMContext& Context, Instruction *I,
223764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                                  Instruction *J, unsigned o, Value *&LOp,
223864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                                  unsigned numElemL,
223964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                                  Type *ArgTypeL, Type *ArgTypeH,
224072465ea23d010507d3746adc126d719005981e05Hal Finkel                                  bool IBeforeJ, unsigned IdxOff) {
224164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    bool ExpandedIEChain = false;
224264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    if (InsertElementInst *LIE = dyn_cast<InsertElementInst>(LOp)) {
224364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      // If we have a pure insertelement chain, then this can be rewritten
224464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      // into a chain that directly builds the larger type.
2245b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel      if (isPureIEChain(LIE)) {
224664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        SmallVector<Value *, 8> VectElemts(numElemL,
224764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          UndefValue::get(ArgTypeL->getScalarType()));
224864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        InsertElementInst *LIENext = LIE;
224964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        do {
225064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          unsigned Idx =
225164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            cast<ConstantInt>(LIENext->getOperand(2))->getSExtValue();
225264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          VectElemts[Idx] = LIENext->getOperand(1);
225364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        } while ((LIENext =
225464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                   dyn_cast<InsertElementInst>(LIENext->getOperand(0))));
225564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
225664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        LIENext = 0;
225764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        Value *LIEPrev = UndefValue::get(ArgTypeH);
225864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        for (unsigned i = 0; i < numElemL; ++i) {
225964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          if (isa<UndefValue>(VectElemts[i])) continue;
226064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          LIENext = InsertElementInst::Create(LIEPrev, VectElemts[i],
226164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                             ConstantInt::get(Type::getInt32Ty(Context),
226264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                                              i + IdxOff),
226372465ea23d010507d3746adc126d719005981e05Hal Finkel                             getReplacementName(IBeforeJ ? I : J,
226472465ea23d010507d3746adc126d719005981e05Hal Finkel                                                true, o, i+1));
226572465ea23d010507d3746adc126d719005981e05Hal Finkel          LIENext->insertBefore(IBeforeJ ? J : I);
226664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          LIEPrev = LIENext;
226764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        }
226864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
226964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        LOp = LIENext ? (Value*) LIENext : UndefValue::get(ArgTypeH);
227064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        ExpandedIEChain = true;
227164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      }
227264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    }
227364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
227464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    return ExpandedIEChain;
227564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel  }
227664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
2277de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // Returns the value to be used as the specified operand of the vector
2278de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // instruction that fuses I with J.
2279de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  Value *BBVectorize::getReplacementInput(LLVMContext& Context, Instruction *I,
228072465ea23d010507d3746adc126d719005981e05Hal Finkel                     Instruction *J, unsigned o, bool IBeforeJ) {
2281de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0);
2282de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), 1);
2283de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
228464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    // Compute the fused vector type for this operand
228564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    Type *ArgTypeI = I->getOperand(o)->getType();
228664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    Type *ArgTypeJ = J->getOperand(o)->getType();
228764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    VectorType *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
2288de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2289de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    Instruction *L = I, *H = J;
229064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    Type *ArgTypeL = ArgTypeI, *ArgTypeH = ArgTypeJ;
2291de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
229264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    unsigned numElemL;
229364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    if (ArgTypeL->isVectorTy())
229464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      numElemL = cast<VectorType>(ArgTypeL)->getNumElements();
229564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    else
229664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      numElemL = 1;
2297de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
229864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    unsigned numElemH;
229964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    if (ArgTypeH->isVectorTy())
230064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      numElemH = cast<VectorType>(ArgTypeH)->getNumElements();
230164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    else
230264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      numElemH = 1;
230364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
230464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    Value *LOp = L->getOperand(o);
230564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    Value *HOp = H->getOperand(o);
230664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    unsigned numElem = VArgType->getNumElements();
230764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
230864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    // First, we check if we can reuse the "original" vector outputs (if these
230964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    // exist). We might need a shuffle.
231064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    ExtractElementInst *LEE = dyn_cast<ExtractElementInst>(LOp);
231164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    ExtractElementInst *HEE = dyn_cast<ExtractElementInst>(HOp);
231264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    ShuffleVectorInst *LSV = dyn_cast<ShuffleVectorInst>(LOp);
231364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    ShuffleVectorInst *HSV = dyn_cast<ShuffleVectorInst>(HOp);
231464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
231564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    // FIXME: If we're fusing shuffle instructions, then we can't apply this
231664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    // optimization. The input vectors to the shuffle might be a different
231764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    // length from the shuffle outputs. Unfortunately, the replacement
231864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    // shuffle mask has already been formed, and the mask entries are sensitive
231964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    // to the sizes of the inputs.
232064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    bool IsSizeChangeShuffle =
232164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      isa<ShuffleVectorInst>(L) &&
232264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        (LOp->getType() != L->getType() || HOp->getType() != H->getType());
232364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
232464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    if ((LEE || LSV) && (HEE || HSV) && !IsSizeChangeShuffle) {
232564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      // We can have at most two unique vector inputs.
232664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      bool CanUseInputs = true;
232764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      Value *I1, *I2 = 0;
232864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      if (LEE) {
232964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        I1 = LEE->getOperand(0);
233064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      } else {
233164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        I1 = LSV->getOperand(0);
233264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        I2 = LSV->getOperand(1);
233364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        if (I2 == I1 || isa<UndefValue>(I2))
233464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          I2 = 0;
233564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      }
233664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
233764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      if (HEE) {
233864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        Value *I3 = HEE->getOperand(0);
233964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        if (!I2 && I3 != I1)
234064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          I2 = I3;
234164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        else if (I3 != I1 && I3 != I2)
234264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          CanUseInputs = false;
234364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      } else {
234464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        Value *I3 = HSV->getOperand(0);
234564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        if (!I2 && I3 != I1)
234664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          I2 = I3;
234764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        else if (I3 != I1 && I3 != I2)
234864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          CanUseInputs = false;
234964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
235064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        if (CanUseInputs) {
235164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          Value *I4 = HSV->getOperand(1);
235264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          if (!isa<UndefValue>(I4)) {
235364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            if (!I2 && I4 != I1)
235464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel              I2 = I4;
235564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            else if (I4 != I1 && I4 != I2)
235664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel              CanUseInputs = false;
235764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          }
235864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        }
235964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      }
236064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
236164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      if (CanUseInputs) {
236264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        unsigned LOpElem =
236364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          cast<VectorType>(cast<Instruction>(LOp)->getOperand(0)->getType())
236464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            ->getNumElements();
236564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        unsigned HOpElem =
236664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          cast<VectorType>(cast<Instruction>(HOp)->getOperand(0)->getType())
236764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            ->getNumElements();
236864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
236964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        // We have one or two input vectors. We need to map each index of the
237064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        // operands to the index of the original vector.
237164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        SmallVector<std::pair<int, int>, 8>  II(numElem);
237264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        for (unsigned i = 0; i < numElemL; ++i) {
237364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          int Idx, INum;
237464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          if (LEE) {
237564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            Idx =
237664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel              cast<ConstantInt>(LEE->getOperand(1))->getSExtValue();
237764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            INum = LEE->getOperand(0) == I1 ? 0 : 1;
237864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          } else {
237964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            Idx = LSV->getMaskValue(i);
238064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            if (Idx < (int) LOpElem) {
238164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel              INum = LSV->getOperand(0) == I1 ? 0 : 1;
238264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            } else {
238364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel              Idx -= LOpElem;
238464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel              INum = LSV->getOperand(1) == I1 ? 0 : 1;
238564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            }
238664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          }
238764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
238864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          II[i] = std::pair<int, int>(Idx, INum);
238964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        }
239064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        for (unsigned i = 0; i < numElemH; ++i) {
239164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          int Idx, INum;
239264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          if (HEE) {
239364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            Idx =
239464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel              cast<ConstantInt>(HEE->getOperand(1))->getSExtValue();
239564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            INum = HEE->getOperand(0) == I1 ? 0 : 1;
239664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          } else {
239764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            Idx = HSV->getMaskValue(i);
239864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            if (Idx < (int) HOpElem) {
239964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel              INum = HSV->getOperand(0) == I1 ? 0 : 1;
240064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            } else {
240164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel              Idx -= HOpElem;
240264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel              INum = HSV->getOperand(1) == I1 ? 0 : 1;
240364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            }
240464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          }
240564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
240664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          II[i + numElemL] = std::pair<int, int>(Idx, INum);
240764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        }
240864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
240964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        // We now have an array which tells us from which index of which
241064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        // input vector each element of the operand comes.
241164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        VectorType *I1T = cast<VectorType>(I1->getType());
241264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        unsigned I1Elem = I1T->getNumElements();
241364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
241464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        if (!I2) {
241564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          // In this case there is only one underlying vector input. Check for
241664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          // the trivial case where we can use the input directly.
241764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          if (I1Elem == numElem) {
241864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            bool ElemInOrder = true;
241964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            for (unsigned i = 0; i < numElem; ++i) {
242064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel              if (II[i].first != (int) i && II[i].first != -1) {
242164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                ElemInOrder = false;
242264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                break;
242364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel              }
242464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            }
242564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
242664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            if (ElemInOrder)
242764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel              return I1;
242864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          }
242964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
243064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          // A shuffle is needed.
243164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          std::vector<Constant *> Mask(numElem);
243264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          for (unsigned i = 0; i < numElem; ++i) {
243364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            int Idx = II[i].first;
243464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            if (Idx == -1)
243564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel              Mask[i] = UndefValue::get(Type::getInt32Ty(Context));
243664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            else
243764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel              Mask[i] = ConstantInt::get(Type::getInt32Ty(Context), Idx);
243864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          }
243964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
244064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          Instruction *S =
244164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            new ShuffleVectorInst(I1, UndefValue::get(I1T),
244264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                                  ConstantVector::get(Mask),
244372465ea23d010507d3746adc126d719005981e05Hal Finkel                                  getReplacementName(IBeforeJ ? I : J,
244472465ea23d010507d3746adc126d719005981e05Hal Finkel                                                     true, o));
244572465ea23d010507d3746adc126d719005981e05Hal Finkel          S->insertBefore(IBeforeJ ? J : I);
244664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          return S;
244764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        }
244864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
244964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        VectorType *I2T = cast<VectorType>(I2->getType());
245064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        unsigned I2Elem = I2T->getNumElements();
245164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
245264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        // This input comes from two distinct vectors. The first step is to
245364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        // make sure that both vectors are the same length. If not, the
245464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        // smaller one will need to grow before they can be shuffled together.
245564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        if (I1Elem < I2Elem) {
245664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          std::vector<Constant *> Mask(I2Elem);
245764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          unsigned v = 0;
245864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          for (; v < I1Elem; ++v)
245964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
246064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          for (; v < I2Elem; ++v)
246164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            Mask[v] = UndefValue::get(Type::getInt32Ty(Context));
246264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
246364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          Instruction *NewI1 =
246464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            new ShuffleVectorInst(I1, UndefValue::get(I1T),
246564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                                  ConstantVector::get(Mask),
246672465ea23d010507d3746adc126d719005981e05Hal Finkel                                  getReplacementName(IBeforeJ ? I : J,
246772465ea23d010507d3746adc126d719005981e05Hal Finkel                                                     true, o, 1));
246872465ea23d010507d3746adc126d719005981e05Hal Finkel          NewI1->insertBefore(IBeforeJ ? J : I);
246964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          I1 = NewI1;
247064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          I1T = I2T;
247164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          I1Elem = I2Elem;
247264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        } else if (I1Elem > I2Elem) {
247364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          std::vector<Constant *> Mask(I1Elem);
247464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          unsigned v = 0;
247564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          for (; v < I2Elem; ++v)
247664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
247764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          for (; v < I1Elem; ++v)
247864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            Mask[v] = UndefValue::get(Type::getInt32Ty(Context));
247964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
248064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          Instruction *NewI2 =
248164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            new ShuffleVectorInst(I2, UndefValue::get(I2T),
248264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                                  ConstantVector::get(Mask),
248372465ea23d010507d3746adc126d719005981e05Hal Finkel                                  getReplacementName(IBeforeJ ? I : J,
248472465ea23d010507d3746adc126d719005981e05Hal Finkel                                                     true, o, 1));
248572465ea23d010507d3746adc126d719005981e05Hal Finkel          NewI2->insertBefore(IBeforeJ ? J : I);
248664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          I2 = NewI2;
248764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          I2T = I1T;
248864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          I2Elem = I1Elem;
248964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        }
249064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
249164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        // Now that both I1 and I2 are the same length we can shuffle them
249264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        // together (and use the result).
249364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        std::vector<Constant *> Mask(numElem);
249464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        for (unsigned v = 0; v < numElem; ++v) {
249564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          if (II[v].first == -1) {
249664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            Mask[v] = UndefValue::get(Type::getInt32Ty(Context));
249764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          } else {
249864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            int Idx = II[v].first + II[v].second * I1Elem;
249964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), Idx);
250064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          }
250164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        }
250264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
250364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        Instruction *NewOp =
250464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          new ShuffleVectorInst(I1, I2, ConstantVector::get(Mask),
250572465ea23d010507d3746adc126d719005981e05Hal Finkel                                getReplacementName(IBeforeJ ? I : J, true, o));
250672465ea23d010507d3746adc126d719005981e05Hal Finkel        NewOp->insertBefore(IBeforeJ ? J : I);
250764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        return NewOp;
250864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      }
2509de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
2510de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
251164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    Type *ArgType = ArgTypeL;
251264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    if (numElemL < numElemH) {
251364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      if (numElemL == 1 && expandIEChain(Context, I, J, o, HOp, numElemH,
251472465ea23d010507d3746adc126d719005981e05Hal Finkel                                         ArgTypeL, VArgType, IBeforeJ, 1)) {
251564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        // This is another short-circuit case: we're combining a scalar into
251664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        // a vector that is formed by an IE chain. We've just expanded the IE
251764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        // chain, now insert the scalar and we're done.
251864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
251964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        Instruction *S = InsertElementInst::Create(HOp, LOp, CV0,
252072465ea23d010507d3746adc126d719005981e05Hal Finkel                           getReplacementName(IBeforeJ ? I : J, true, o));
252172465ea23d010507d3746adc126d719005981e05Hal Finkel        S->insertBefore(IBeforeJ ? J : I);
252264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        return S;
252364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      } else if (!expandIEChain(Context, I, J, o, LOp, numElemL, ArgTypeL,
252472465ea23d010507d3746adc126d719005981e05Hal Finkel                                ArgTypeH, IBeforeJ)) {
252564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        // The two vector inputs to the shuffle must be the same length,
252664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        // so extend the smaller vector to be the same length as the larger one.
252764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        Instruction *NLOp;
252864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        if (numElemL > 1) {
252964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
253064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          std::vector<Constant *> Mask(numElemH);
253164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          unsigned v = 0;
253264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          for (; v < numElemL; ++v)
253364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
253464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          for (; v < numElemH; ++v)
253564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            Mask[v] = UndefValue::get(Type::getInt32Ty(Context));
253664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
253764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          NLOp = new ShuffleVectorInst(LOp, UndefValue::get(ArgTypeL),
253864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                                       ConstantVector::get(Mask),
253972465ea23d010507d3746adc126d719005981e05Hal Finkel                                       getReplacementName(IBeforeJ ? I : J,
254072465ea23d010507d3746adc126d719005981e05Hal Finkel                                                          true, o, 1));
254164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        } else {
254264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          NLOp = InsertElementInst::Create(UndefValue::get(ArgTypeH), LOp, CV0,
254372465ea23d010507d3746adc126d719005981e05Hal Finkel                                           getReplacementName(IBeforeJ ? I : J,
254472465ea23d010507d3746adc126d719005981e05Hal Finkel                                                              true, o, 1));
254564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        }
254664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
254772465ea23d010507d3746adc126d719005981e05Hal Finkel        NLOp->insertBefore(IBeforeJ ? J : I);
254864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        LOp = NLOp;
254964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      }
255064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
255164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      ArgType = ArgTypeH;
255264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    } else if (numElemL > numElemH) {
255364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      if (numElemH == 1 && expandIEChain(Context, I, J, o, LOp, numElemL,
255472465ea23d010507d3746adc126d719005981e05Hal Finkel                                         ArgTypeH, VArgType, IBeforeJ)) {
255564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        Instruction *S =
255664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          InsertElementInst::Create(LOp, HOp,
255764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                                    ConstantInt::get(Type::getInt32Ty(Context),
255864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                                                     numElemL),
255972465ea23d010507d3746adc126d719005981e05Hal Finkel                                    getReplacementName(IBeforeJ ? I : J,
256072465ea23d010507d3746adc126d719005981e05Hal Finkel                                                       true, o));
256172465ea23d010507d3746adc126d719005981e05Hal Finkel        S->insertBefore(IBeforeJ ? J : I);
256264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        return S;
256364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      } else if (!expandIEChain(Context, I, J, o, HOp, numElemH, ArgTypeH,
256472465ea23d010507d3746adc126d719005981e05Hal Finkel                                ArgTypeL, IBeforeJ)) {
256564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        Instruction *NHOp;
256664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        if (numElemH > 1) {
256764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          std::vector<Constant *> Mask(numElemL);
256864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          unsigned v = 0;
256964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          for (; v < numElemH; ++v)
257064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
257164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          for (; v < numElemL; ++v)
257264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel            Mask[v] = UndefValue::get(Type::getInt32Ty(Context));
257364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
257464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          NHOp = new ShuffleVectorInst(HOp, UndefValue::get(ArgTypeH),
257564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                                       ConstantVector::get(Mask),
257672465ea23d010507d3746adc126d719005981e05Hal Finkel                                       getReplacementName(IBeforeJ ? I : J,
257772465ea23d010507d3746adc126d719005981e05Hal Finkel                                                          true, o, 1));
257864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        } else {
257964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          NHOp = InsertElementInst::Create(UndefValue::get(ArgTypeL), HOp, CV0,
258072465ea23d010507d3746adc126d719005981e05Hal Finkel                                           getReplacementName(IBeforeJ ? I : J,
258172465ea23d010507d3746adc126d719005981e05Hal Finkel                                                              true, o, 1));
258264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        }
258364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
258472465ea23d010507d3746adc126d719005981e05Hal Finkel        NHOp->insertBefore(IBeforeJ ? J : I);
258564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        HOp = NHOp;
2586de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
258764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    }
2588de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
258964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    if (ArgType->isVectorTy()) {
259064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      unsigned numElem = cast<VectorType>(VArgType)->getNumElements();
259164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      std::vector<Constant*> Mask(numElem);
259264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      for (unsigned v = 0; v < numElem; ++v) {
259364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        unsigned Idx = v;
259464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        // If the low vector was expanded, we need to skip the extra
259564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        // undefined entries.
259664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        if (v >= numElemL && numElemH > numElemL)
259764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          Idx += (numElemH - numElemL);
259864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), Idx);
259964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      }
2600de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
260164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      Instruction *BV = new ShuffleVectorInst(LOp, HOp,
260272465ea23d010507d3746adc126d719005981e05Hal Finkel                          ConstantVector::get(Mask),
260372465ea23d010507d3746adc126d719005981e05Hal Finkel                          getReplacementName(IBeforeJ ? I : J, true, o));
260472465ea23d010507d3746adc126d719005981e05Hal Finkel      BV->insertBefore(IBeforeJ ? J : I);
2605de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      return BV;
2606de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
2607de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2608de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    Instruction *BV1 = InsertElementInst::Create(
260964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                                          UndefValue::get(VArgType), LOp, CV0,
261072465ea23d010507d3746adc126d719005981e05Hal Finkel                                          getReplacementName(IBeforeJ ? I : J,
261172465ea23d010507d3746adc126d719005981e05Hal Finkel                                                             true, o, 1));
261272465ea23d010507d3746adc126d719005981e05Hal Finkel    BV1->insertBefore(IBeforeJ ? J : I);
261364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel    Instruction *BV2 = InsertElementInst::Create(BV1, HOp, CV1,
261472465ea23d010507d3746adc126d719005981e05Hal Finkel                                          getReplacementName(IBeforeJ ? I : J,
261572465ea23d010507d3746adc126d719005981e05Hal Finkel                                                             true, o, 2));
261672465ea23d010507d3746adc126d719005981e05Hal Finkel    BV2->insertBefore(IBeforeJ ? J : I);
2617de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    return BV2;
2618de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  }
2619de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2620de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // This function creates an array of values that will be used as the inputs
2621de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // to the vector instruction that fuses I with J.
2622de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  void BBVectorize::getReplacementInputsForPair(LLVMContext& Context,
2623de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     Instruction *I, Instruction *J,
262472465ea23d010507d3746adc126d719005981e05Hal Finkel                     SmallVector<Value *, 3> &ReplacedOperands,
262572465ea23d010507d3746adc126d719005981e05Hal Finkel                     bool IBeforeJ) {
2626de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    unsigned NumOperands = I->getNumOperands();
2627de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2628de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    for (unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) {
2629de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // Iterate backward so that we look at the store pointer
2630de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // first and know whether or not we need to flip the inputs.
2631de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2632de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      if (isa<LoadInst>(I) || (o == 1 && isa<StoreInst>(I))) {
2633de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // This is the pointer for a load/store instruction.
2634202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel        ReplacedOperands[o] = getReplacementPointerInput(Context, I, J, o);
2635de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        continue;
26366173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel      } else if (isa<CallInst>(I)) {
2637de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        Function *F = cast<CallInst>(I)->getCalledFunction();
2638a77728415857196035c0090f7b2749d7971811a2Hal Finkel        Intrinsic::ID IID = (Intrinsic::ID) F->getIntrinsicID();
26396173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel        if (o == NumOperands-1) {
26406173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel          BasicBlock &BB = *I->getParent();
2641bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng
26426173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel          Module *M = BB.getParent()->getParent();
264364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          Type *ArgTypeI = I->getType();
264464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          Type *ArgTypeJ = J->getType();
264564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
2646bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng
2647a77728415857196035c0090f7b2749d7971811a2Hal Finkel          ReplacedOperands[o] = Intrinsic::getDeclaration(M, IID, VArgType);
26486173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel          continue;
26496173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel        } else if (IID == Intrinsic::powi && o == 1) {
26506173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel          // The second argument of powi is a single integer and we've already
26516173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel          // checked that both arguments are equal. As a result, we just keep
26526173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel          // I's second argument.
26536173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel          ReplacedOperands[o] = I->getOperand(o);
26546173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel          continue;
26556173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel        }
2656de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      } else if (isa<ShuffleVectorInst>(I) && o == NumOperands-1) {
2657de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        ReplacedOperands[o] = getReplacementShuffleMask(Context, I, J);
2658de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        continue;
2659de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
2660de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
266172465ea23d010507d3746adc126d719005981e05Hal Finkel      ReplacedOperands[o] = getReplacementInput(Context, I, J, o, IBeforeJ);
2662de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
2663de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  }
2664de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2665de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // This function creates two values that represent the outputs of the
2666de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // original I and J instructions. These are generally vector shuffles
2667de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // or extracts. In many cases, these will end up being unused and, thus,
2668de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // eliminated by later passes.
2669de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  void BBVectorize::replaceOutputsOfPair(LLVMContext& Context, Instruction *I,
2670de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     Instruction *J, Instruction *K,
2671de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     Instruction *&InsertionPt,
2672202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel                     Instruction *&K1, Instruction *&K2) {
2673de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    if (isa<StoreInst>(I)) {
2674de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      AA->replaceWithNewValue(I, K);
2675de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      AA->replaceWithNewValue(J, K);
2676de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    } else {
2677de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      Type *IType = I->getType();
267864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      Type *JType = J->getType();
267964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
268064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      VectorType *VType = getVecTypeForPair(IType, JType);
268164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      unsigned numElem = VType->getNumElements();
268264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
268364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      unsigned numElemI, numElemJ;
268464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      if (IType->isVectorTy())
268564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        numElemI = cast<VectorType>(IType)->getNumElements();
268664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      else
268764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        numElemI = 1;
268864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
268964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      if (JType->isVectorTy())
269064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        numElemJ = cast<VectorType>(JType)->getNumElements();
269164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      else
269264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        numElemJ = 1;
2693de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2694de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      if (IType->isVectorTy()) {
269564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        std::vector<Constant*> Mask1(numElemI), Mask2(numElemI);
269664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        for (unsigned v = 0; v < numElemI; ++v) {
269764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
269864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemJ+v);
269964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        }
2700de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
270164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        K1 = new ShuffleVectorInst(K, UndefValue::get(VType),
2702202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel                                   ConstantVector::get( Mask1),
270364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                                   getReplacementName(K, false, 1));
2704de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      } else {
270564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0);
2706202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel        K1 = ExtractElementInst::Create(K, CV0,
2707de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                                          getReplacementName(K, false, 1));
270864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      }
270964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
271064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      if (JType->isVectorTy()) {
271164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        std::vector<Constant*> Mask1(numElemJ), Mask2(numElemJ);
271264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        for (unsigned v = 0; v < numElemJ; ++v) {
271364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
271464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel          Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemI+v);
271564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        }
271664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel
271764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        K2 = new ShuffleVectorInst(K, UndefValue::get(VType),
2718202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel                                   ConstantVector::get( Mask2),
271964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel                                   getReplacementName(K, false, 2));
272064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel      } else {
272164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel        Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem-1);
2722202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel        K2 = ExtractElementInst::Create(K, CV1,
2723de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                                          getReplacementName(K, false, 2));
2724de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
2725de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2726de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      K1->insertAfter(K);
2727de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      K2->insertAfter(K1);
2728de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      InsertionPt = K2;
2729de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
2730de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  }
2731de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2732de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // Move all uses of the function I (including pairing-induced uses) after J.
2733de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  bool BBVectorize::canMoveUsesOfIAfterJ(BasicBlock &BB,
27342f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel                     DenseSet<ValuePair> &LoadMoveSetPairs,
2735de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     Instruction *I, Instruction *J) {
2736de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // Skip to the first instruction past I.
2737ded681d2725907c7de9db53d59cee0c51fad6fcbBenjamin Kramer    BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I));
2738de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2739de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    DenseSet<Value *> Users;
2740de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    AliasSetTracker WriteSet(*AA);
2741de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    for (; cast<Instruction>(L) != J; ++L)
27422f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel      (void) trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs);
2743de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2744de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    assert(cast<Instruction>(L) == J &&
2745de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      "Tracking has not proceeded far enough to check for dependencies");
2746de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // If J is now in the use set of I, then trackUsesOfI will return true
2747de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // and we have a dependency cycle (and the fusing operation must abort).
27482f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel    return !trackUsesOfI(Users, WriteSet, I, J, true, &LoadMoveSetPairs);
2749de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  }
2750de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2751de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // Move all uses of the function I (including pairing-induced uses) after J.
2752de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  void BBVectorize::moveUsesOfIAfterJ(BasicBlock &BB,
27532f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel                     DenseSet<ValuePair> &LoadMoveSetPairs,
2754de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     Instruction *&InsertionPt,
2755de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     Instruction *I, Instruction *J) {
2756de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // Skip to the first instruction past I.
2757ded681d2725907c7de9db53d59cee0c51fad6fcbBenjamin Kramer    BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I));
2758de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2759de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    DenseSet<Value *> Users;
2760de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    AliasSetTracker WriteSet(*AA);
2761de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    for (; cast<Instruction>(L) != J;) {
27622f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel      if (trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs)) {
2763de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // Move this instruction
2764de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        Instruction *InstToMove = L; ++L;
2765de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2766de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        DEBUG(dbgs() << "BBV: moving: " << *InstToMove <<
2767de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                        " to after " << *InsertionPt << "\n");
2768de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        InstToMove->removeFromParent();
2769de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        InstToMove->insertAfter(InsertionPt);
2770de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        InsertionPt = InstToMove;
2771de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      } else {
2772de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        ++L;
2773de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
2774de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
2775de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  }
2776de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2777de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // Collect all load instruction that are in the move set of a given first
2778de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // pair member.  These loads depend on the first instruction, I, and so need
2779de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // to be moved after J (the second instruction) when the pair is fused.
2780de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  void BBVectorize::collectPairLoadMoveSet(BasicBlock &BB,
2781de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     DenseMap<Value *, Value *> &ChosenPairs,
2782de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     std::multimap<Value *, Value *> &LoadMoveSet,
27832f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel                     DenseSet<ValuePair> &LoadMoveSetPairs,
2784de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     Instruction *I) {
2785de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // Skip to the first instruction past I.
2786ded681d2725907c7de9db53d59cee0c51fad6fcbBenjamin Kramer    BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I));
2787de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2788de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    DenseSet<Value *> Users;
2789de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    AliasSetTracker WriteSet(*AA);
2790de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2791de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // Note: We cannot end the loop when we reach J because J could be moved
2792de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // farther down the use chain by another instruction pairing. Also, J
2793de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // could be before I if this is an inverted input.
2794de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    for (BasicBlock::iterator E = BB.end(); cast<Instruction>(L) != E; ++L) {
2795de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      if (trackUsesOfI(Users, WriteSet, I, L)) {
27962f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel        if (L->mayReadFromMemory()) {
2797de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          LoadMoveSet.insert(ValuePair(L, I));
27982f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel          LoadMoveSetPairs.insert(ValuePair(L, I));
27992f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel        }
2800de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
2801de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
2802de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  }
2803de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2804de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // In cases where both load/stores and the computation of their pointers
2805de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // are chosen for vectorization, we can end up in a situation where the
2806de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // aliasing analysis starts returning different query results as the
2807de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // process of fusing instruction pairs continues. Because the algorithm
2808de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // relies on finding the same use trees here as were found earlier, we'll
2809de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // need to precompute the necessary aliasing information here and then
2810de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // manually update it during the fusion process.
2811de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  void BBVectorize::collectLoadMoveSet(BasicBlock &BB,
2812de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     std::vector<Value *> &PairableInsts,
2813de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     DenseMap<Value *, Value *> &ChosenPairs,
28142f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel                     std::multimap<Value *, Value *> &LoadMoveSet,
28152f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel                     DenseSet<ValuePair> &LoadMoveSetPairs) {
2816de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
2817de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel         PIE = PairableInsts.end(); PI != PIE; ++PI) {
2818de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(*PI);
2819de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      if (P == ChosenPairs.end()) continue;
2820de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2821de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      Instruction *I = cast<Instruction>(P->first);
28222f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel      collectPairLoadMoveSet(BB, ChosenPairs, LoadMoveSet,
28232f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel                             LoadMoveSetPairs, I);
2824de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
2825de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  }
2826de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2827ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel  // When the first instruction in each pair is cloned, it will inherit its
2828ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel  // parent's metadata. This metadata must be combined with that of the other
2829ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel  // instruction in a safe way.
2830ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel  void BBVectorize::combineMetadata(Instruction *K, const Instruction *J) {
2831ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel    SmallVector<std::pair<unsigned, MDNode*>, 4> Metadata;
2832ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel    K->getAllMetadataOtherThanDebugLoc(Metadata);
2833ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel    for (unsigned i = 0, n = Metadata.size(); i < n; ++i) {
2834ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel      unsigned Kind = Metadata[i].first;
2835ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel      MDNode *JMD = J->getMetadata(Kind);
2836ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel      MDNode *KMD = Metadata[i].second;
2837ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel
2838ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel      switch (Kind) {
2839ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel      default:
2840ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel        K->setMetadata(Kind, 0); // Remove unknown metadata
2841ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel        break;
2842ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel      case LLVMContext::MD_tbaa:
2843ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel        K->setMetadata(Kind, MDNode::getMostGenericTBAA(JMD, KMD));
2844ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel        break;
2845ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel      case LLVMContext::MD_fpmath:
2846ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel        K->setMetadata(Kind, MDNode::getMostGenericFPMath(JMD, KMD));
2847ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel        break;
2848ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel      }
2849ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel    }
2850ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel  }
2851ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel
2852de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // This function fuses the chosen instruction pairs into vector instructions,
2853de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // taking care preserve any needed scalar outputs and, then, it reorders the
2854de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // remaining instructions as needed (users of the first member of the pair
2855de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // need to be moved to after the location of the second member of the pair
2856de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // because the vector instruction is inserted in the location of the pair's
2857de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  // second member).
2858de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  void BBVectorize::fuseChosenPairs(BasicBlock &BB,
2859de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel                     std::vector<Value *> &PairableInsts,
2860a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel                     DenseMap<Value *, Value *> &ChosenPairs,
286172465ea23d010507d3746adc126d719005981e05Hal Finkel                     DenseSet<ValuePair> &FixedOrderPairs,
286272465ea23d010507d3746adc126d719005981e05Hal Finkel                     DenseMap<VPPair, unsigned> &PairConnectionTypes,
286372465ea23d010507d3746adc126d719005981e05Hal Finkel                     std::multimap<ValuePair, ValuePair> &ConnectedPairs,
286472465ea23d010507d3746adc126d719005981e05Hal Finkel                     std::multimap<ValuePair, ValuePair> &ConnectedPairDeps) {
2865de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    LLVMContext& Context = BB.getContext();
2866de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2867de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // During the vectorization process, the order of the pairs to be fused
2868de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // could be flipped. So we'll add each pair, flipped, into the ChosenPairs
2869de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    // list. After a pair is fused, the flipped pair is removed from the list.
287072465ea23d010507d3746adc126d719005981e05Hal Finkel    DenseSet<ValuePair> FlippedPairs;
2871de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    for (DenseMap<Value *, Value *>::iterator P = ChosenPairs.begin(),
2872de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel         E = ChosenPairs.end(); P != E; ++P)
287372465ea23d010507d3746adc126d719005981e05Hal Finkel      FlippedPairs.insert(ValuePair(P->second, P->first));
287472465ea23d010507d3746adc126d719005981e05Hal Finkel    for (DenseSet<ValuePair>::iterator P = FlippedPairs.begin(),
2875de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel         E = FlippedPairs.end(); P != E; ++P)
2876de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      ChosenPairs.insert(*P);
2877de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2878de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    std::multimap<Value *, Value *> LoadMoveSet;
28792f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel    DenseSet<ValuePair> LoadMoveSetPairs;
28802f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel    collectLoadMoveSet(BB, PairableInsts, ChosenPairs,
28812f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel                       LoadMoveSet, LoadMoveSetPairs);
2882de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2883de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n");
2884de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2885de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) {
2886de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(PI);
2887de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      if (P == ChosenPairs.end()) {
2888de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        ++PI;
2889de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        continue;
2890de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
2891de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2892de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      if (getDepthFactor(P->first) == 0) {
2893de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // These instructions are not really fused, but are tracked as though
2894de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // they are. Any case in which it would be interesting to fuse them
2895de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        // will be taken care of by InstCombine.
2896de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        --NumFusedOps;
2897de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        ++PI;
2898de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        continue;
2899de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
2900de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2901de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      Instruction *I = cast<Instruction>(P->first),
2902de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        *J = cast<Instruction>(P->second);
2903de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2904de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      DEBUG(dbgs() << "BBV: fusing: " << *I <<
2905de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel             " <-> " << *J << "\n");
2906de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2907de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // Remove the pair and flipped pair from the list.
2908de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      DenseMap<Value *, Value *>::iterator FP = ChosenPairs.find(P->second);
2909de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      assert(FP != ChosenPairs.end() && "Flipped pair not found in list");
2910de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      ChosenPairs.erase(FP);
2911de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      ChosenPairs.erase(P);
2912de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
29132f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel      if (!canMoveUsesOfIAfterJ(BB, LoadMoveSetPairs, I, J)) {
2914de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        DEBUG(dbgs() << "BBV: fusion of: " << *I <<
2915de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel               " <-> " << *J <<
2916de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel               " aborted because of non-trivial dependency cycle\n");
2917de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        --NumFusedOps;
2918de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        ++PI;
2919de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        continue;
2920de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
2921de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2922a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel      // If the pair must have the other order, then flip it.
2923a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel      bool FlipPairOrder = FixedOrderPairs.count(ValuePair(J, I));
292472465ea23d010507d3746adc126d719005981e05Hal Finkel      if (!FlipPairOrder && !FixedOrderPairs.count(ValuePair(I, J))) {
292572465ea23d010507d3746adc126d719005981e05Hal Finkel        // This pair does not have a fixed order, and so we might want to
292672465ea23d010507d3746adc126d719005981e05Hal Finkel        // flip it if that will yield fewer shuffles. We count the number
292772465ea23d010507d3746adc126d719005981e05Hal Finkel        // of dependencies connected via swaps, and those directly connected,
292872465ea23d010507d3746adc126d719005981e05Hal Finkel        // and flip the order if the number of swaps is greater.
292972465ea23d010507d3746adc126d719005981e05Hal Finkel        bool OrigOrder = true;
293072465ea23d010507d3746adc126d719005981e05Hal Finkel        VPPIteratorPair IP = ConnectedPairDeps.equal_range(ValuePair(I, J));
293172465ea23d010507d3746adc126d719005981e05Hal Finkel        if (IP.first == ConnectedPairDeps.end()) {
293272465ea23d010507d3746adc126d719005981e05Hal Finkel          IP = ConnectedPairDeps.equal_range(ValuePair(J, I));
293372465ea23d010507d3746adc126d719005981e05Hal Finkel          OrigOrder = false;
293472465ea23d010507d3746adc126d719005981e05Hal Finkel        }
293572465ea23d010507d3746adc126d719005981e05Hal Finkel
293672465ea23d010507d3746adc126d719005981e05Hal Finkel        if (IP.first != ConnectedPairDeps.end()) {
293772465ea23d010507d3746adc126d719005981e05Hal Finkel          unsigned NumDepsDirect = 0, NumDepsSwap = 0;
293872465ea23d010507d3746adc126d719005981e05Hal Finkel          for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first;
293972465ea23d010507d3746adc126d719005981e05Hal Finkel               Q != IP.second; ++Q) {
294072465ea23d010507d3746adc126d719005981e05Hal Finkel            DenseMap<VPPair, unsigned>::iterator R =
294172465ea23d010507d3746adc126d719005981e05Hal Finkel              PairConnectionTypes.find(VPPair(Q->second, Q->first));
294272465ea23d010507d3746adc126d719005981e05Hal Finkel            assert(R != PairConnectionTypes.end() &&
294372465ea23d010507d3746adc126d719005981e05Hal Finkel                   "Cannot find pair connection type");
294472465ea23d010507d3746adc126d719005981e05Hal Finkel            if (R->second == PairConnectionDirect)
294572465ea23d010507d3746adc126d719005981e05Hal Finkel              ++NumDepsDirect;
294672465ea23d010507d3746adc126d719005981e05Hal Finkel            else if (R->second == PairConnectionSwap)
294772465ea23d010507d3746adc126d719005981e05Hal Finkel              ++NumDepsSwap;
294872465ea23d010507d3746adc126d719005981e05Hal Finkel          }
294972465ea23d010507d3746adc126d719005981e05Hal Finkel
295072465ea23d010507d3746adc126d719005981e05Hal Finkel          if (!OrigOrder)
295172465ea23d010507d3746adc126d719005981e05Hal Finkel            std::swap(NumDepsDirect, NumDepsSwap);
295272465ea23d010507d3746adc126d719005981e05Hal Finkel
295372465ea23d010507d3746adc126d719005981e05Hal Finkel          if (NumDepsSwap > NumDepsDirect) {
295472465ea23d010507d3746adc126d719005981e05Hal Finkel            FlipPairOrder = true;
295572465ea23d010507d3746adc126d719005981e05Hal Finkel            DEBUG(dbgs() << "BBV: reordering pair: " << *I <<
295672465ea23d010507d3746adc126d719005981e05Hal Finkel                            " <-> " << *J << "\n");
295772465ea23d010507d3746adc126d719005981e05Hal Finkel          }
295872465ea23d010507d3746adc126d719005981e05Hal Finkel        }
295972465ea23d010507d3746adc126d719005981e05Hal Finkel      }
2960282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel
2961202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel      Instruction *L = I, *H = J;
2962a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel      if (FlipPairOrder)
2963202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel        std::swap(H, L);
2964202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel
296572465ea23d010507d3746adc126d719005981e05Hal Finkel      // If the pair being fused uses the opposite order from that in the pair
296672465ea23d010507d3746adc126d719005981e05Hal Finkel      // connection map, then we need to flip the types.
296772465ea23d010507d3746adc126d719005981e05Hal Finkel      VPPIteratorPair IP = ConnectedPairs.equal_range(ValuePair(H, L));
296872465ea23d010507d3746adc126d719005981e05Hal Finkel      for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first;
296972465ea23d010507d3746adc126d719005981e05Hal Finkel           Q != IP.second; ++Q) {
297072465ea23d010507d3746adc126d719005981e05Hal Finkel        DenseMap<VPPair, unsigned>::iterator R = PairConnectionTypes.find(*Q);
297172465ea23d010507d3746adc126d719005981e05Hal Finkel        assert(R != PairConnectionTypes.end() &&
297272465ea23d010507d3746adc126d719005981e05Hal Finkel               "Cannot find pair connection type");
297372465ea23d010507d3746adc126d719005981e05Hal Finkel        if (R->second == PairConnectionDirect)
297472465ea23d010507d3746adc126d719005981e05Hal Finkel          R->second = PairConnectionSwap;
297572465ea23d010507d3746adc126d719005981e05Hal Finkel        else if (R->second == PairConnectionSwap)
297672465ea23d010507d3746adc126d719005981e05Hal Finkel          R->second = PairConnectionDirect;
297772465ea23d010507d3746adc126d719005981e05Hal Finkel      }
297872465ea23d010507d3746adc126d719005981e05Hal Finkel
297972465ea23d010507d3746adc126d719005981e05Hal Finkel      bool LBeforeH = !FlipPairOrder;
2980de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      unsigned NumOperands = I->getNumOperands();
2981de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      SmallVector<Value *, 3> ReplacedOperands(NumOperands);
298272465ea23d010507d3746adc126d719005981e05Hal Finkel      getReplacementInputsForPair(Context, L, H, ReplacedOperands,
298372465ea23d010507d3746adc126d719005981e05Hal Finkel                                  LBeforeH);
2984de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2985de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // Make a copy of the original operation, change its type to the vector
2986de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // type and replace its operands with the vector operands.
298772465ea23d010507d3746adc126d719005981e05Hal Finkel      Instruction *K = L->clone();
298872465ea23d010507d3746adc126d719005981e05Hal Finkel      if (L->hasName())
298972465ea23d010507d3746adc126d719005981e05Hal Finkel        K->takeName(L);
299072465ea23d010507d3746adc126d719005981e05Hal Finkel      else if (H->hasName())
299172465ea23d010507d3746adc126d719005981e05Hal Finkel        K->takeName(H);
2992de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
2993de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      if (!isa<StoreInst>(K))
2994202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel        K->mutateType(getVecTypeForPair(L->getType(), H->getType()));
2995de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
299672465ea23d010507d3746adc126d719005981e05Hal Finkel      combineMetadata(K, H);
2997430b9079c614cd3f45015a6516590d33742cc802Hal Finkel      K->intersectOptionalDataWith(H);
2998ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel
2999de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      for (unsigned o = 0; o < NumOperands; ++o)
3000de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        K->setOperand(o, ReplacedOperands[o]);
3001de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
3002de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      K->insertAfter(J);
3003de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
3004de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // Instruction insertion point:
3005de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      Instruction *InsertionPt = K;
3006de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      Instruction *K1 = 0, *K2 = 0;
3007202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel      replaceOutputsOfPair(Context, L, H, K, InsertionPt, K1, K2);
3008de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
3009de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // The use tree of the first original instruction must be moved to after
3010de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // the location of the second instruction. The entire use tree of the
3011de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // first instruction is disjoint from the input tree of the second
3012de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // (by definition), and so commutes with it.
3013de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
30142f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel      moveUsesOfIAfterJ(BB, LoadMoveSetPairs, InsertionPt, I, J);
3015de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
3016de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      if (!isa<StoreInst>(I)) {
301772465ea23d010507d3746adc126d719005981e05Hal Finkel        L->replaceAllUsesWith(K1);
301872465ea23d010507d3746adc126d719005981e05Hal Finkel        H->replaceAllUsesWith(K2);
301972465ea23d010507d3746adc126d719005981e05Hal Finkel        AA->replaceWithNewValue(L, K1);
302072465ea23d010507d3746adc126d719005981e05Hal Finkel        AA->replaceWithNewValue(H, K2);
3021de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
3022de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
3023de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // Instructions that may read from memory may be in the load move set.
3024de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // Once an instruction is fused, we no longer need its move set, and so
3025de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // the values of the map never need to be updated. However, when a load
3026de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // is fused, we need to merge the entries from both instructions in the
3027de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // pair in case those instructions were in the move set of some other
3028de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // yet-to-be-fused pair. The loads in question are the keys of the map.
3029de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      if (I->mayReadFromMemory()) {
3030de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        std::vector<ValuePair> NewSetMembers;
3031de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        VPIteratorPair IPairRange = LoadMoveSet.equal_range(I);
3032de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        VPIteratorPair JPairRange = LoadMoveSet.equal_range(J);
3033de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        for (std::multimap<Value *, Value *>::iterator N = IPairRange.first;
3034de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel             N != IPairRange.second; ++N)
3035de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          NewSetMembers.push_back(ValuePair(K, N->second));
3036de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        for (std::multimap<Value *, Value *>::iterator N = JPairRange.first;
3037de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel             N != JPairRange.second; ++N)
3038de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          NewSetMembers.push_back(ValuePair(K, N->second));
3039de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        for (std::vector<ValuePair>::iterator A = NewSetMembers.begin(),
30402f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel             AE = NewSetMembers.end(); A != AE; ++A) {
3041de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel          LoadMoveSet.insert(*A);
30422f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel          LoadMoveSetPairs.insert(*A);
30432f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel        }
3044de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      }
3045de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
3046de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      // Before removing I, set the iterator to the next instruction.
3047de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      PI = llvm::next(BasicBlock::iterator(I));
3048de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      if (cast<Instruction>(PI) == J)
3049de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel        ++PI;
3050de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
3051de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      SE->forgetValue(I);
3052de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      SE->forgetValue(J);
3053de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      I->eraseFromParent();
3054de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel      J->eraseFromParent();
305572465ea23d010507d3746adc126d719005981e05Hal Finkel
305672465ea23d010507d3746adc126d719005981e05Hal Finkel      DEBUG(if (PrintAfterEveryPair) dbgs() << "BBV: block is now: \n" <<
305772465ea23d010507d3746adc126d719005981e05Hal Finkel                                               BB << "\n");
3058de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    }
3059de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
3060de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel    DEBUG(dbgs() << "BBV: final: \n" << BB << "\n");
3061de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel  }
3062de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel}
3063de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
3064de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelchar BBVectorize::ID = 0;
3065de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic const char bb_vectorize_name[] = "Basic-Block Vectorization";
3066de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelINITIALIZE_PASS_BEGIN(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
3067de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelINITIALIZE_AG_DEPENDENCY(AliasAnalysis)
30688bd6c52396ab6e7955fdcc1bce099b7cba29a308Chandler CarruthINITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
3069e29c19091cca58db668407dfc5dd86c70e8b3d49Hal FinkelINITIALIZE_PASS_DEPENDENCY(DominatorTree)
3070de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelINITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
3071de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelINITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
3072de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
3073bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin ZhengBasicBlockPass *llvm::createBBVectorizePass(const VectorizeConfig &C) {
3074bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng  return new BBVectorize(C);
3075de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel}
3076de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel
3077bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zhengbool
3078bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zhengllvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB, const VectorizeConfig &C) {
3079bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng  BBVectorize BBVectorizer(P, C);
308087825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng  return BBVectorizer.vectorizeBB(BB);
308187825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng}
3082bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng
3083bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng//===----------------------------------------------------------------------===//
3084bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin ZhengVectorizeConfig::VectorizeConfig() {
3085bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng  VectorBits = ::VectorBits;
3086768edf3cd037aab10391abc279f71470df8e3156Hal Finkel  VectorizeBools = !::NoBools;
308786312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng  VectorizeInts = !::NoInts;
308886312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng  VectorizeFloats = !::NoFloats;
3089f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel  VectorizePointers = !::NoPointers;
309086312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng  VectorizeCasts = !::NoCasts;
309186312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng  VectorizeMath = !::NoMath;
309286312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng  VectorizeFMA = !::NoFMA;
3093fc3665c87519850f629c9565535e3be447e10addHal Finkel  VectorizeSelect = !::NoSelect;
3094e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel  VectorizeCmp = !::NoCmp;
3095f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel  VectorizeGEP = !::NoGEP;
309686312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng  VectorizeMemOps = !::NoMemOps;
3097bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng  AlignedOnly = ::AlignedOnly;
3098bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng  ReqChainDepth= ::ReqChainDepth;
3099bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng  SearchLimit = ::SearchLimit;
3100bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng  MaxCandPairsForCycleCheck = ::MaxCandPairsForCycleCheck;
3101bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng  SplatBreaksChain = ::SplatBreaksChain;
3102bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng  MaxInsts = ::MaxInsts;
3103bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng  MaxIter = ::MaxIter;
310464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel  Pow2LenOnly = ::Pow2LenOnly;
3105bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng  NoMemOpBoost = ::NoMemOpBoost;
3106bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng  FastDep = ::FastDep;
3107bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng}
3108