BBVectorize.cpp revision eaa8f5533f9f678fe3c56aec0201a34e46eaaf54
1de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel//===- BBVectorize.cpp - A Basic-Block Vectorizer -------------------------===// 2de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// 3de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// The LLVM Compiler Infrastructure 4de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// 5de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// This file is distributed under the University of Illinois Open Source 6de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// License. See LICENSE.TXT for details. 7de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// 8de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel//===----------------------------------------------------------------------===// 9de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// 10de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// This file implements a basic-block vectorization pass. The algorithm was 11de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// inspired by that used by the Vienna MAP Vectorizor by Franchetti and Kral, 12de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// et al. It works by looking for chains of pairable operations and then 13de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// pairing them. 14de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel// 15de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel//===----------------------------------------------------------------------===// 16de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 17de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#define BBV_NAME "bb-vectorize" 18de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#define DEBUG_TYPE BBV_NAME 19d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Transforms/Vectorize.h" 20de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/DenseMap.h" 21de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/DenseSet.h" 22d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/ADT/STLExtras.h" 2386c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel#include "llvm/ADT/SmallSet.h" 24de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/SmallVector.h" 25de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/Statistic.h" 26de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/ADT/StringExtras.h" 27de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/AliasAnalysis.h" 28de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/AliasSetTracker.h" 29e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel#include "llvm/Analysis/Dominators.h" 30de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/ScalarEvolution.h" 31de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/ScalarEvolutionExpressions.h" 32be04929f7fd76a921540e9901f24563e51dc1219Chandler Carruth#include "llvm/Analysis/TargetTransformInfo.h" 33de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Analysis/ValueTracking.h" 340b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/Constants.h" 350b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/DataLayout.h" 360b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/DerivedTypes.h" 370b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/Function.h" 380b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/Instructions.h" 390b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/IntrinsicInst.h" 400b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/Intrinsics.h" 410b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/LLVMContext.h" 420b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/Metadata.h" 430b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/Type.h" 44d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Pass.h" 45de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Support/CommandLine.h" 46de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Support/Debug.h" 47de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include "llvm/Support/ValueHandle.h" 48d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Support/raw_ostream.h" 4964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel#include "llvm/Transforms/Utils/Local.h" 50de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#include <algorithm> 51de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelusing namespace llvm; 52de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 5365309660fa61a837cc05323f69c618a7d8134d56Hal Finkelstatic cl::opt<bool> 5465309660fa61a837cc05323f69c618a7d8134d56Hal FinkelIgnoreTargetInfo("bb-vectorize-ignore-target-info", cl::init(false), 5565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel cl::Hidden, cl::desc("Ignore target information")); 5665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel 57de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned> 58de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelReqChainDepth("bb-vectorize-req-chain-depth", cl::init(6), cl::Hidden, 59de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("The required chain depth for vectorization")); 60de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 6178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkelstatic cl::opt<bool> 6278fd353d5e5daedc47ecc31b6193ca48793c249cHal FinkelUseChainDepthWithTI("bb-vectorize-use-chain-depth", cl::init(false), 6378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel cl::Hidden, cl::desc("Use the chain depth requirement with" 6478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel " target information")); 6578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 66de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned> 67de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelSearchLimit("bb-vectorize-search-limit", cl::init(400), cl::Hidden, 68de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("The maximum search distance for instruction pairs")); 69de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 70de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 71de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelSplatBreaksChain("bb-vectorize-splat-breaks-chain", cl::init(false), cl::Hidden, 72de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Replicating one element to a pair breaks the chain")); 73de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 74de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned> 75de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelVectorBits("bb-vectorize-vector-bits", cl::init(128), cl::Hidden, 76de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("The size of the native vector registers")); 77de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 78de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned> 79de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelMaxIter("bb-vectorize-max-iter", cl::init(0), cl::Hidden, 80de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("The maximum number of pairing iterations")); 81de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 8264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkelstatic cl::opt<bool> 8364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal FinkelPow2LenOnly("bb-vectorize-pow2-len-only", cl::init(false), cl::Hidden, 8464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel cl::desc("Don't try to form non-2^n-length vectors")); 8564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 86de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<unsigned> 875d4e18bc39fea892f523d960213906d296d3cb38Hal FinkelMaxInsts("bb-vectorize-max-instr-per-group", cl::init(500), cl::Hidden, 885d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel cl::desc("The maximum number of pairable instructions per group")); 895d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 905d4e18bc39fea892f523d960213906d296d3cb38Hal Finkelstatic cl::opt<unsigned> 91ab90084bca42b74a5b5edad9b416bd81e105dad0Hal FinkelMaxPairs("bb-vectorize-max-pairs-per-group", cl::init(3000), cl::Hidden, 92ab90084bca42b74a5b5edad9b416bd81e105dad0Hal Finkel cl::desc("The maximum number of candidate instruction pairs per group")); 93ab90084bca42b74a5b5edad9b416bd81e105dad0Hal Finkel 94ab90084bca42b74a5b5edad9b416bd81e105dad0Hal Finkelstatic cl::opt<unsigned> 95de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelMaxCandPairsForCycleCheck("bb-vectorize-max-cycle-check-pairs", cl::init(200), 96de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::Hidden, cl::desc("The maximum number of candidate pairs with which to use" 97de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " a full cycle check")); 98de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 99de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 100768edf3cd037aab10391abc279f71470df8e3156Hal FinkelNoBools("bb-vectorize-no-bools", cl::init(false), cl::Hidden, 101768edf3cd037aab10391abc279f71470df8e3156Hal Finkel cl::desc("Don't try to vectorize boolean (i1) values")); 102768edf3cd037aab10391abc279f71470df8e3156Hal Finkel 103768edf3cd037aab10391abc279f71470df8e3156Hal Finkelstatic cl::opt<bool> 104de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoInts("bb-vectorize-no-ints", cl::init(false), cl::Hidden, 105de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize integer values")); 106de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 107de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 108de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoFloats("bb-vectorize-no-floats", cl::init(false), cl::Hidden, 109de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize floating-point values")); 110de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 111822ab00847da841a63be4e3883cb5f442dc69069Hal Finkel// FIXME: This should default to false once pointer vector support works. 112de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 113822ab00847da841a63be4e3883cb5f442dc69069Hal FinkelNoPointers("bb-vectorize-no-pointers", cl::init(/*false*/ true), cl::Hidden, 114f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel cl::desc("Don't try to vectorize pointer values")); 115f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel 116f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkelstatic cl::opt<bool> 117de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoCasts("bb-vectorize-no-casts", cl::init(false), cl::Hidden, 118de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize casting (conversion) operations")); 119de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 120de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 121de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoMath("bb-vectorize-no-math", cl::init(false), cl::Hidden, 122de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize floating-point math intrinsics")); 123de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 124de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 125de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoFMA("bb-vectorize-no-fma", cl::init(false), cl::Hidden, 126de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize the fused-multiply-add intrinsic")); 127de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 128de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 129fc3665c87519850f629c9565535e3be447e10addHal FinkelNoSelect("bb-vectorize-no-select", cl::init(false), cl::Hidden, 130fc3665c87519850f629c9565535e3be447e10addHal Finkel cl::desc("Don't try to vectorize select instructions")); 131fc3665c87519850f629c9565535e3be447e10addHal Finkel 132fc3665c87519850f629c9565535e3be447e10addHal Finkelstatic cl::opt<bool> 133e415f96b6a43ac8861148a11a4258bc38c247e8fHal FinkelNoCmp("bb-vectorize-no-cmp", cl::init(false), cl::Hidden, 134e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel cl::desc("Don't try to vectorize comparison instructions")); 135e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel 136e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkelstatic cl::opt<bool> 137f3f5a1e6f77a842ccb24cc81766437da5197d712Hal FinkelNoGEP("bb-vectorize-no-gep", cl::init(false), cl::Hidden, 138f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel cl::desc("Don't try to vectorize getelementptr instructions")); 139f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel 140f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkelstatic cl::opt<bool> 141de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelNoMemOps("bb-vectorize-no-mem-ops", cl::init(false), cl::Hidden, 142de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Don't try to vectorize loads and stores")); 143de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 144de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 145de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelAlignedOnly("bb-vectorize-aligned-only", cl::init(false), cl::Hidden, 146de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Only generate aligned loads and stores")); 147de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 148de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 149edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal FinkelNoMemOpBoost("bb-vectorize-no-mem-op-boost", 150edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel cl::init(false), cl::Hidden, 151edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel cl::desc("Don't boost the chain-depth contribution of loads and stores")); 152edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel 153edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkelstatic cl::opt<bool> 154de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelFastDep("bb-vectorize-fast-dep", cl::init(false), cl::Hidden, 155de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("Use a fast instruction dependency analysis")); 156de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 157de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#ifndef NDEBUG 158de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 159de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelDebugInstructionExamination("bb-vectorize-debug-instruction-examination", 160de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::init(false), cl::Hidden, 161de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("When debugging is enabled, output information on the" 162de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " instruction-examination process")); 163de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 164de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelDebugCandidateSelection("bb-vectorize-debug-candidate-selection", 165de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::init(false), cl::Hidden, 166de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("When debugging is enabled, output information on the" 167de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " candidate-selection process")); 168de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 169de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelDebugPairSelection("bb-vectorize-debug-pair-selection", 170de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::init(false), cl::Hidden, 171de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("When debugging is enabled, output information on the" 172de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " pair-selection process")); 173de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic cl::opt<bool> 174de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelDebugCycleCheck("bb-vectorize-debug-cycle-check", 175de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::init(false), cl::Hidden, 176de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cl::desc("When debugging is enabled, output information on the" 177de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " cycle-checking process")); 17872465ea23d010507d3746adc126d719005981e05Hal Finkel 17972465ea23d010507d3746adc126d719005981e05Hal Finkelstatic cl::opt<bool> 18072465ea23d010507d3746adc126d719005981e05Hal FinkelPrintAfterEveryPair("bb-vectorize-debug-print-after-every-pair", 18172465ea23d010507d3746adc126d719005981e05Hal Finkel cl::init(false), cl::Hidden, 18272465ea23d010507d3746adc126d719005981e05Hal Finkel cl::desc("When debugging is enabled, dump the basic block after" 18372465ea23d010507d3746adc126d719005981e05Hal Finkel " every pair is fused")); 184de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel#endif 185de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 186de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelSTATISTIC(NumFusedOps, "Number of operations fused by bb-vectorize"); 187de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 188de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelnamespace { 189de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel struct BBVectorize : public BasicBlockPass { 190de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel static char ID; // Pass identification, replacement for typeid 191bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng 192940371bc65570ec0add1ede4f4d9f0a41ba25e09Hongbin Zheng const VectorizeConfig Config; 193bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng 194bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng BBVectorize(const VectorizeConfig &C = VectorizeConfig()) 195bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng : BasicBlockPass(ID), Config(C) { 196de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel initializeBBVectorizePass(*PassRegistry::getPassRegistry()); 197de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 198de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 199bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng BBVectorize(Pass *P, const VectorizeConfig &C) 200bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng : BasicBlockPass(ID), Config(C) { 20187825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng AA = &P->getAnalysis<AliasAnalysis>(); 202e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel DT = &P->getAnalysis<DominatorTree>(); 20387825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng SE = &P->getAnalysis<ScalarEvolution>(); 2043574eca1b02600bac4e625297f4ecf745f4c4f32Micah Villmow TD = P->getAnalysisIfAvailable<DataLayout>(); 2058bd6c52396ab6e7955fdcc1bce099b7cba29a308Chandler Carruth TTI = IgnoreTargetInfo ? 0 : &P->getAnalysis<TargetTransformInfo>(); 20687825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng } 20787825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng 208de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typedef std::pair<Value *, Value *> ValuePair; 20965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel typedef std::pair<ValuePair, int> ValuePairWithCost; 210de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typedef std::pair<ValuePair, size_t> ValuePairWithDepth; 211de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel typedef std::pair<ValuePair, ValuePair> VPPair; // A ValuePair pair 21272465ea23d010507d3746adc126d719005981e05Hal Finkel typedef std::pair<VPPair, unsigned> VPPairWithType; 213de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 214de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasAnalysis *AA; 215e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel DominatorTree *DT; 216de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ScalarEvolution *SE; 2173574eca1b02600bac4e625297f4ecf745f4c4f32Micah Villmow DataLayout *TD; 218abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth const TargetTransformInfo *TTI; 219de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 220de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // FIXME: const correct? 221de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 22264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool vectorizePairs(BasicBlock &BB, bool NonPow2Len = false); 223de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2245d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel bool getCandidatePairs(BasicBlock &BB, 2255d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel BasicBlock::iterator &Start, 2266ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel DenseMap<Value *, std::vector<Value *> > &CandidatePairs, 227a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel DenseSet<ValuePair> &FixedOrderPairs, 22865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel DenseMap<ValuePair, int> &CandidatePairCostSavings, 22964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Value *> &PairableInsts, bool NonPow2Len); 230de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 23178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // FIXME: The current implementation does not account for pairs that 23278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // are connected in multiple ways. For example: 23378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // C1 = A1 / A2; C2 = A2 / A1 (which may be both direct and a swap) 23472465ea23d010507d3746adc126d719005981e05Hal Finkel enum PairConnectionType { 23572465ea23d010507d3746adc126d719005981e05Hal Finkel PairConnectionDirect, 23672465ea23d010507d3746adc126d719005981e05Hal Finkel PairConnectionSwap, 23772465ea23d010507d3746adc126d719005981e05Hal Finkel PairConnectionSplat 23872465ea23d010507d3746adc126d719005981e05Hal Finkel }; 23972465ea23d010507d3746adc126d719005981e05Hal Finkel 24097a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel void computeConnectedPairs( 24197a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, std::vector<Value *> > &CandidatePairs, 24297a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &CandidatePairsSet, 24397a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel std::vector<Value *> &PairableInsts, 24497a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, 24597a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes); 246de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 247de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void buildDepMap(BasicBlock &BB, 24897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, std::vector<Value *> > &CandidatePairs, 24997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel std::vector<Value *> &PairableInsts, 25097a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &PairableInstUsers); 251de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2526ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel void choosePairs(DenseMap<Value *, std::vector<Value *> > &CandidatePairs, 25397a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &CandidatePairsSet, 25497a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, int> &CandidatePairCostSavings, 25597a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel std::vector<Value *> &PairableInsts, 25697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &FixedOrderPairs, 25797a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes, 25897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, 25997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps, 26097a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &PairableInstUsers, 26197a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, Value *>& ChosenPairs); 262de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 263de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void fuseChosenPairs(BasicBlock &BB, 26497a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel std::vector<Value *> &PairableInsts, 26597a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, Value *>& ChosenPairs, 26697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &FixedOrderPairs, 26797a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes, 26897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, 26997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps); 27072465ea23d010507d3746adc126d719005981e05Hal Finkel 271de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 272de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore); 273de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 274de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool areInstsCompatible(Instruction *I, Instruction *J, 27565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel bool IsSimpleLoadStore, bool NonPow2Len, 276a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel int &CostSavings, int &FixedOrder); 277de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 278de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool trackUsesOfI(DenseSet<Value *> &Users, 279de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker &WriteSet, Instruction *I, 280de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J, bool UpdateUsers = true, 2812f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel DenseSet<ValuePair> *LoadMoveSetPairs = 0); 2821230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop 28397a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel void computePairsConnectedTo( 28497a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, std::vector<Value *> > &CandidatePairs, 28597a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &CandidatePairsSet, 28697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel std::vector<Value *> &PairableInsts, 28797a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, 28897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes, 28997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel ValuePair P); 290de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 291de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool pairsConflict(ValuePair P, ValuePair Q, 29297a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &PairableInstUsers, 29397a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > 29497a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel *PairableInstUserMap = 0, 29597a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<VPPair> *PairableInstUserPairSet = 0); 296de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 297de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool pairWillFormCycle(ValuePair P, 29897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUsers, 29997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &CurrentPairs); 300de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 301f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel void pruneDAGFor( 30297a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, std::vector<Value *> > &CandidatePairs, 30397a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel std::vector<Value *> &PairableInsts, 30497a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, 30597a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &PairableInstUsers, 30697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap, 30797a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<VPPair> &PairableInstUserPairSet, 30897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, Value *> &ChosenPairs, 309f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel DenseMap<ValuePair, size_t> &DAG, 310f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel DenseSet<ValuePair> &PrunedDAG, ValuePair J, 31197a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel bool UseCycleCheck); 312de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 313f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel void buildInitialDAGFor( 31497a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, std::vector<Value *> > &CandidatePairs, 31597a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &CandidatePairsSet, 31697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel std::vector<Value *> &PairableInsts, 31797a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, 31897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &PairableInstUsers, 31997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, Value *> &ChosenPairs, 320f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel DenseMap<ValuePair, size_t> &DAG, ValuePair J); 321de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 322f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel void findBestDAGFor( 32397a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, std::vector<Value *> > &CandidatePairs, 32497a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &CandidatePairsSet, 32597a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, int> &CandidatePairCostSavings, 32697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel std::vector<Value *> &PairableInsts, 32797a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &FixedOrderPairs, 32897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes, 32997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, 33097a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps, 33197a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &PairableInstUsers, 33297a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap, 33397a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<VPPair> &PairableInstUserPairSet, 33497a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, Value *> &ChosenPairs, 335f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel DenseSet<ValuePair> &BestDAG, size_t &BestMaxDepth, 33697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel int &BestEffSize, Value *II, std::vector<Value *>&JJ, 33797a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel bool UseCycleCheck); 338de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 339de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I, 340202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel Instruction *J, unsigned o); 341de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 342de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void fillNewShuffleMask(LLVMContext& Context, Instruction *J, 34364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned MaskOffset, unsigned NumInElem, 34464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned NumInElem1, unsigned IdxOffset, 34564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant*> &Mask); 346de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 347de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *getReplacementShuffleMask(LLVMContext& Context, Instruction *I, 348de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J); 349de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 35064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool expandIEChain(LLVMContext& Context, Instruction *I, Instruction *J, 35164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned o, Value *&LOp, unsigned numElemL, 35272465ea23d010507d3746adc126d719005981e05Hal Finkel Type *ArgTypeL, Type *ArgTypeR, bool IBeforeJ, 35364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned IdxOff = 0); 35464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 355de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *getReplacementInput(LLVMContext& Context, Instruction *I, 35672465ea23d010507d3746adc126d719005981e05Hal Finkel Instruction *J, unsigned o, bool IBeforeJ); 357de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 358de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void getReplacementInputsForPair(LLVMContext& Context, Instruction *I, 359a0ec3f9b7b826b9b40b80199923b664bad808cceCraig Topper Instruction *J, SmallVectorImpl<Value *> &ReplacedOperands, 36072465ea23d010507d3746adc126d719005981e05Hal Finkel bool IBeforeJ); 361de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 362de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void replaceOutputsOfPair(LLVMContext& Context, Instruction *I, 363de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J, Instruction *K, 364de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *&InsertionPt, Instruction *&K1, 365202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel Instruction *&K2); 366de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 367de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void collectPairLoadMoveSet(BasicBlock &BB, 368de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 36997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, std::vector<Value *> > &LoadMoveSet, 3702f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel DenseSet<ValuePair> &LoadMoveSetPairs, 371de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I); 372de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 373de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void collectLoadMoveSet(BasicBlock &BB, 374de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 375de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 37697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, std::vector<Value *> > &LoadMoveSet, 3772f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel DenseSet<ValuePair> &LoadMoveSetPairs); 378de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 379de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool canMoveUsesOfIAfterJ(BasicBlock &BB, 3802f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel DenseSet<ValuePair> &LoadMoveSetPairs, 381de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J); 382de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 383de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void moveUsesOfIAfterJ(BasicBlock &BB, 3842f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel DenseSet<ValuePair> &LoadMoveSetPairs, 385de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *&InsertionPt, 386de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J); 387de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 388ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel void combineMetadata(Instruction *K, const Instruction *J); 389ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel 39087825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng bool vectorizeBB(BasicBlock &BB) { 391e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel if (!DT->isReachableFromEntry(&BB)) { 392e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel DEBUG(dbgs() << "BBV: skipping unreachable " << BB.getName() << 393e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel " in " << BB.getParent()->getName() << "\n"); 394e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel return false; 395e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel } 396e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel 397abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth DEBUG(if (TTI) dbgs() << "BBV: using target information\n"); 39865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel 399de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool changed = false; 400de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Iterate a sufficient number of times to merge types of size 1 bit, 401de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // then 2 bits, then 4, etc. up to half of the target vector width of the 402de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // target vector register. 40364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned n = 1; 40464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned v = 2; 405abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth (TTI || v <= Config.VectorBits) && 40665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel (!Config.MaxIter || n <= Config.MaxIter); 407de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel v *= 2, ++n) { 408bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng DEBUG(dbgs() << "BBV: fusing loop #" << n << 409de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " for " << BB.getName() << " in " << 410de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BB.getParent()->getName() << "...\n"); 411de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (vectorizePairs(BB)) 412de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel changed = true; 413de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel else 414de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 415de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 416de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 41764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (changed && !Pow2LenOnly) { 41864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ++n; 41964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; !Config.MaxIter || n <= Config.MaxIter; ++n) { 42064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel DEBUG(dbgs() << "BBV: fusing for non-2^n-length vectors loop #: " << 42164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel n << " for " << BB.getName() << " in " << 42264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel BB.getParent()->getName() << "...\n"); 42364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!vectorizePairs(BB, true)) break; 42464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 42564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 42664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 427de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: done!\n"); 428de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return changed; 429de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 430de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 43187825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng virtual bool runOnBasicBlock(BasicBlock &BB) { 43287825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng AA = &getAnalysis<AliasAnalysis>(); 433e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel DT = &getAnalysis<DominatorTree>(); 43487825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng SE = &getAnalysis<ScalarEvolution>(); 4353574eca1b02600bac4e625297f4ecf745f4c4f32Micah Villmow TD = getAnalysisIfAvailable<DataLayout>(); 4368bd6c52396ab6e7955fdcc1bce099b7cba29a308Chandler Carruth TTI = IgnoreTargetInfo ? 0 : &getAnalysis<TargetTransformInfo>(); 43787825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng 43887825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng return vectorizeBB(BB); 43987825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng } 44087825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng 441de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel virtual void getAnalysisUsage(AnalysisUsage &AU) const { 442de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BasicBlockPass::getAnalysisUsage(AU); 443de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AU.addRequired<AliasAnalysis>(); 444e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel AU.addRequired<DominatorTree>(); 445de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AU.addRequired<ScalarEvolution>(); 4468bd6c52396ab6e7955fdcc1bce099b7cba29a308Chandler Carruth AU.addRequired<TargetTransformInfo>(); 447de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AU.addPreserved<AliasAnalysis>(); 448e29c19091cca58db668407dfc5dd86c70e8b3d49Hal Finkel AU.addPreserved<DominatorTree>(); 449de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AU.addPreserved<ScalarEvolution>(); 4507e004d177fe76145f75a9417ed2e281f1b9abaf7Hal Finkel AU.setPreservesCFG(); 451de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 452de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 45364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel static inline VectorType *getVecTypeForPair(Type *ElemTy, Type *Elem2Ty) { 45464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel assert(ElemTy->getScalarType() == Elem2Ty->getScalarType() && 45564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel "Cannot form vector from incompatible scalar types"); 45664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *STy = ElemTy->getScalarType(); 45764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 45864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElem; 459de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (VectorType *VTy = dyn_cast<VectorType>(ElemTy)) { 46064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElem = VTy->getNumElements(); 46164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 46264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElem = 1; 46364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 46464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 46564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (VectorType *VTy = dyn_cast<VectorType>(Elem2Ty)) { 46664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElem += VTy->getNumElements(); 46764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 46864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElem += 1; 469de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 4707e004d177fe76145f75a9417ed2e281f1b9abaf7Hal Finkel 47164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return VectorType::get(STy, numElem); 47264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 47364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 47464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel static inline void getInstructionTypes(Instruction *I, 47564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *&T1, Type *&T2) { 4763fc1e4aa159ec15058bb26acbec39f6e09990207Hal Finkel if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 47764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // For stores, it is the value type, not the pointer type that matters 47864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // because the value is what will come from a vector register. 47964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 4803fc1e4aa159ec15058bb26acbec39f6e09990207Hal Finkel Value *IVal = SI->getValueOperand(); 48164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel T1 = IVal->getType(); 48264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 48364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel T1 = I->getType(); 48464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 48564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 4863fc1e4aa159ec15058bb26acbec39f6e09990207Hal Finkel if (CastInst *CI = dyn_cast<CastInst>(I)) 4873fc1e4aa159ec15058bb26acbec39f6e09990207Hal Finkel T2 = CI->getSrcTy(); 48864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else 48964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel T2 = T1; 49065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel 49165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (SelectInst *SI = dyn_cast<SelectInst>(I)) { 49265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel T2 = SI->getCondition()->getType(); 4938b9796f4f83bea2bbefcd2822eb574abdb7f3d1bHal Finkel } else if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(I)) { 4948b9796f4f83bea2bbefcd2822eb574abdb7f3d1bHal Finkel T2 = SI->getOperand(0)->getType(); 4955094257518ea7b615d87ef5bea657625ffa81991Hal Finkel } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) { 4965094257518ea7b615d87ef5bea657625ffa81991Hal Finkel T2 = CI->getOperand(0)->getType(); 49765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel } 498de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 499de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 500de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns the weight associated with the provided value. A chain of 501de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // candidate pairs has a length given by the sum of the weights of its 502de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // members (one weight per pair; the weight of each member of the pair 503de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // is assumed to be the same). This length is then compared to the 504de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // chain-length threshold to determine if a given chain is significant 505de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // enough to be vectorized. The length is also used in comparing 506de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // candidate chains where longer chains are considered to be better. 507de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Note: when this function returns 0, the resulting instructions are 508de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // not actually fused. 509bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng inline size_t getDepthFactor(Value *V) { 510de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // InsertElement and ExtractElement have a depth factor of zero. This is 511de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // for two reasons: First, they cannot be usefully fused. Second, because 512de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the pass generates a lot of these, they can confuse the simple metric 513f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel // used to compare the dags in the next iteration. Thus, giving them a 514de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // weight of zero allows the pass to essentially ignore them in 515de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // subsequent iterations when looking for vectorization opportunities 516de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // while still tracking dependency chains that flow through those 517de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // instructions. 518de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (isa<InsertElementInst>(V) || isa<ExtractElementInst>(V)) 519de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return 0; 520de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 521edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel // Give a load or store half of the required depth so that load/store 522edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel // pairs will vectorize. 523bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (!Config.NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V))) 524bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng return Config.ReqChainDepth/2; 525edc8db87dc2ed4d2971e7f50464f5f4d0fead537Hal Finkel 526de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return 1; 527de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 528de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 529abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth // Returns the cost of the provided instruction using TTI. 53046fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel // This does not handle loads and stores. 53146fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel unsigned getInstrCost(unsigned Opcode, Type *T1, Type *T2) { 53246fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel switch (Opcode) { 53346fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel default: break; 53446fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::GetElementPtr: 53546fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel // We mark this instruction as zero-cost because scalar GEPs are usually 53646fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel // lowered to the intruction addressing mode. At the moment we don't 53746fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel // generate vector GEPs. 53846fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel return 0; 53946fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::Br: 540abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth return TTI->getCFInstrCost(Opcode); 54146fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::PHI: 54246fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel return 0; 54346fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::Add: 54446fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FAdd: 54546fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::Sub: 54646fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FSub: 54746fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::Mul: 54846fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FMul: 54946fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::UDiv: 55046fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::SDiv: 55146fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FDiv: 55246fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::URem: 55346fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::SRem: 55446fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FRem: 55546fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::Shl: 55646fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::LShr: 55746fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::AShr: 55846fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::And: 55946fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::Or: 56046fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::Xor: 561abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth return TTI->getArithmeticInstrCost(Opcode, T1); 56246fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::Select: 56346fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::ICmp: 56446fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FCmp: 565abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth return TTI->getCmpSelInstrCost(Opcode, T1, T2); 56646fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::ZExt: 56746fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::SExt: 56846fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FPToUI: 56946fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FPToSI: 57046fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FPExt: 57146fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::PtrToInt: 57246fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::IntToPtr: 57346fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::SIToFP: 57446fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::UIToFP: 57546fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::Trunc: 57646fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::FPTrunc: 57746fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel case Instruction::BitCast: 57886ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel case Instruction::ShuffleVector: 579abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth return TTI->getCastInstrCost(Opcode, T1, T2); 58046fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel } 58146fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel 58246fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel return 1; 58346fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel } 58446fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel 585de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This determines the relative offset of two loads or stores, returning 586de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // true if the offset could be determined to be some constant value. 587de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // For example, if OffsetInElmts == 1, then J accesses the memory directly 588de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // after I; if OffsetInElmts == -1 then I accesses the memory 58964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // directly after J. 590de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool getPairPtrInfo(Instruction *I, Instruction *J, 591de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *&IPtr, Value *&JPtr, unsigned &IAlignment, unsigned &JAlignment, 59265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel unsigned &IAddressSpace, unsigned &JAddressSpace, 59393f6f457614299eee3d22f376ab8f42a130f1912Hal Finkel int64_t &OffsetInElmts, bool ComputeOffset = true) { 594de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel OffsetInElmts = 0; 59565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 59665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel LoadInst *LJ = cast<LoadInst>(J); 59765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel IPtr = LI->getPointerOperand(); 59865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel JPtr = LJ->getPointerOperand(); 59965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel IAlignment = LI->getAlignment(); 60065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel JAlignment = LJ->getAlignment(); 60165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel IAddressSpace = LI->getPointerAddressSpace(); 60265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel JAddressSpace = LJ->getPointerAddressSpace(); 603de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 60465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel StoreInst *SI = cast<StoreInst>(I), *SJ = cast<StoreInst>(J); 60565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel IPtr = SI->getPointerOperand(); 60665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel JPtr = SJ->getPointerOperand(); 60765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel IAlignment = SI->getAlignment(); 60865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel JAlignment = SJ->getAlignment(); 60965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel IAddressSpace = SI->getPointerAddressSpace(); 61065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel JAddressSpace = SJ->getPointerAddressSpace(); 611de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 612de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 61393f6f457614299eee3d22f376ab8f42a130f1912Hal Finkel if (!ComputeOffset) 61493f6f457614299eee3d22f376ab8f42a130f1912Hal Finkel return true; 61593f6f457614299eee3d22f376ab8f42a130f1912Hal Finkel 616de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel const SCEV *IPtrSCEV = SE->getSCEV(IPtr); 617de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel const SCEV *JPtrSCEV = SE->getSCEV(JPtr); 618de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 619de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // If this is a trivial offset, then we'll get something like 620de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // 1*sizeof(type). With target data, which we need anyway, this will get 621de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // constant folded into a number. 622de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel const SCEV *OffsetSCEV = SE->getMinusSCEV(JPtrSCEV, IPtrSCEV); 623de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (const SCEVConstant *ConstOffSCEV = 624de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel dyn_cast<SCEVConstant>(OffsetSCEV)) { 625de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ConstantInt *IntOff = ConstOffSCEV->getValue(); 626de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel int64_t Offset = IntOff->getSExtValue(); 627de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 628de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *VTy = cast<PointerType>(IPtr->getType())->getElementType(); 629de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel int64_t VTyTSS = (int64_t) TD->getTypeStoreSize(VTy); 630de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 63164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *VTy2 = cast<PointerType>(JPtr->getType())->getElementType(); 63264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (VTy != VTy2 && Offset < 0) { 63364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel int64_t VTy2TSS = (int64_t) TD->getTypeStoreSize(VTy2); 63464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel OffsetInElmts = Offset/VTy2TSS; 63564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return (abs64(Offset) % VTy2TSS) == 0; 63664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 637de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 638de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel OffsetInElmts = Offset/VTyTSS; 639de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return (abs64(Offset) % VTyTSS) == 0; 640de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 641de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 642de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 643de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 644de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 645de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns true if the provided CallInst represents an intrinsic that can 646de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // be vectorized. 647de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool isVectorizableIntrinsic(CallInst* I) { 648de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Function *F = I->getCalledFunction(); 649de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!F) return false; 650de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 651a77728415857196035c0090f7b2749d7971811a2Hal Finkel Intrinsic::ID IID = (Intrinsic::ID) F->getIntrinsicID(); 652de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!IID) return false; 653de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 654de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel switch(IID) { 655de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel default: 656de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 657de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::sqrt: 658de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::powi: 659de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::sin: 660de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::cos: 661de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::log: 662de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::log2: 663de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::log10: 664de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::exp: 665de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::exp2: 666de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::pow: 66786312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng return Config.VectorizeMath; 668de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel case Intrinsic::fma: 66964a7a24edf719bb6ffacc030c23f4cd99312f3fbHal Finkel case Intrinsic::fmuladd: 67086312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng return Config.VectorizeFMA; 671de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 672de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 673de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 674b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel bool isPureIEChain(InsertElementInst *IE) { 675b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel InsertElementInst *IENext = IE; 676b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel do { 677b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel if (!isa<UndefValue>(IENext->getOperand(0)) && 678b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel !isa<InsertElementInst>(IENext->getOperand(0))) { 679b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel return false; 680b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel } 681b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel } while ((IENext = 682b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel dyn_cast<InsertElementInst>(IENext->getOperand(0)))); 683b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel 684b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel return true; 685b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel } 686de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel }; 687de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 688de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function implements one vectorization iteration on the provided 689de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // basic block. It returns true if the block is changed. 69064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool BBVectorize::vectorizePairs(BasicBlock &BB, bool NonPow2Len) { 6915d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel bool ShouldContinue; 6925d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel BasicBlock::iterator Start = BB.getFirstInsertionPt(); 6935d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 6945d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel std::vector<Value *> AllPairableInsts; 6955d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel DenseMap<Value *, Value *> AllChosenPairs; 696a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel DenseSet<ValuePair> AllFixedOrderPairs; 69772465ea23d010507d3746adc126d719005981e05Hal Finkel DenseMap<VPPair, unsigned> AllPairConnectionTypes; 69897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > AllConnectedPairs, 69997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel AllConnectedPairDeps; 7005d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 7015d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel do { 7025d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel std::vector<Value *> PairableInsts; 7036ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel DenseMap<Value *, std::vector<Value *> > CandidatePairs; 704a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel DenseSet<ValuePair> FixedOrderPairs; 70565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel DenseMap<ValuePair, int> CandidatePairCostSavings; 7065d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs, 707a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel FixedOrderPairs, 70865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel CandidatePairCostSavings, 70964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel PairableInsts, NonPow2Len); 7105d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (PairableInsts.empty()) continue; 7113706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 712b1a82589339fed148c12b052d30861a539552f1aHal Finkel // Build the candidate pair set for faster lookups. 713b1a82589339fed148c12b052d30861a539552f1aHal Finkel DenseSet<ValuePair> CandidatePairsSet; 7146ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel for (DenseMap<Value *, std::vector<Value *> >::iterator I = 7156ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel CandidatePairs.begin(), E = CandidatePairs.end(); I != E; ++I) 7166ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel for (std::vector<Value *>::iterator J = I->second.begin(), 7176ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel JE = I->second.end(); J != JE; ++J) 7186ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel CandidatePairsSet.insert(ValuePair(I->first, *J)); 719b1a82589339fed148c12b052d30861a539552f1aHal Finkel 7205d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // Now we have a map of all of the pairable instructions and we need to 7215d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // select the best possible pairing. A good pairing is one such that the 7225d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // users of the pair are also paired. This defines a (directed) forest 72394c22716d60ff5edf6a98a3c67e0faa001be1142Sylvestre Ledru // over the pairs such that two pairs are connected iff the second pair 7245d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // uses the first. 7253706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 7265d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // Note that it only matters that both members of the second pair use some 7275d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // element of the first pair (to allow for splatting). 7283706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 72997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > ConnectedPairs, 73097a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel ConnectedPairDeps; 73172465ea23d010507d3746adc126d719005981e05Hal Finkel DenseMap<VPPair, unsigned> PairConnectionTypes; 732b1a82589339fed148c12b052d30861a539552f1aHal Finkel computeConnectedPairs(CandidatePairs, CandidatePairsSet, 733b1a82589339fed148c12b052d30861a539552f1aHal Finkel PairableInsts, ConnectedPairs, PairConnectionTypes); 7345d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (ConnectedPairs.empty()) continue; 7353706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 73697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel for (DenseMap<ValuePair, std::vector<ValuePair> >::iterator 73772465ea23d010507d3746adc126d719005981e05Hal Finkel I = ConnectedPairs.begin(), IE = ConnectedPairs.end(); 73897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel I != IE; ++I) 73997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel for (std::vector<ValuePair>::iterator J = I->second.begin(), 74097a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel JE = I->second.end(); J != JE; ++J) 74197a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel ConnectedPairDeps[*J].push_back(I->first); 74272465ea23d010507d3746adc126d719005981e05Hal Finkel 7435d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // Build the pairable-instruction dependency map 7445d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel DenseSet<ValuePair> PairableInstUsers; 7455d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel buildDepMap(BB, CandidatePairs, PairableInsts, PairableInstUsers); 7463706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 74735564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel // There is now a graph of the connected pairs. For each variable, pick 748f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel // the pairing with the largest dag meeting the depth requirement on at 749f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel // least one branch. Then select all pairings that are part of that dag 75035564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel // and remove them from the list of available pairings and pairable 75135564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel // variables. 7523706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 7535d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel DenseMap<Value *, Value *> ChosenPairs; 754b1a82589339fed148c12b052d30861a539552f1aHal Finkel choosePairs(CandidatePairs, CandidatePairsSet, 755b1a82589339fed148c12b052d30861a539552f1aHal Finkel CandidatePairCostSavings, 75686ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel PairableInsts, FixedOrderPairs, PairConnectionTypes, 75786ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel ConnectedPairs, ConnectedPairDeps, 7585d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel PairableInstUsers, ChosenPairs); 7593706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 7605d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (ChosenPairs.empty()) continue; 7615d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel AllPairableInsts.insert(AllPairableInsts.end(), PairableInsts.begin(), 7625d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel PairableInsts.end()); 7635d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel AllChosenPairs.insert(ChosenPairs.begin(), ChosenPairs.end()); 764a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel 76572465ea23d010507d3746adc126d719005981e05Hal Finkel // Only for the chosen pairs, propagate information on fixed-order pairs, 76672465ea23d010507d3746adc126d719005981e05Hal Finkel // pair connections, and their types to the data structures used by the 76772465ea23d010507d3746adc126d719005981e05Hal Finkel // pair fusion procedures. 768a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel for (DenseMap<Value *, Value *>::iterator I = ChosenPairs.begin(), 769a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel IE = ChosenPairs.end(); I != IE; ++I) { 770a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel if (FixedOrderPairs.count(*I)) 771a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel AllFixedOrderPairs.insert(*I); 772a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel else if (FixedOrderPairs.count(ValuePair(I->second, I->first))) 773a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel AllFixedOrderPairs.insert(ValuePair(I->second, I->first)); 77472465ea23d010507d3746adc126d719005981e05Hal Finkel 77572465ea23d010507d3746adc126d719005981e05Hal Finkel for (DenseMap<Value *, Value *>::iterator J = ChosenPairs.begin(); 77672465ea23d010507d3746adc126d719005981e05Hal Finkel J != IE; ++J) { 77772465ea23d010507d3746adc126d719005981e05Hal Finkel DenseMap<VPPair, unsigned>::iterator K = 77872465ea23d010507d3746adc126d719005981e05Hal Finkel PairConnectionTypes.find(VPPair(*I, *J)); 77972465ea23d010507d3746adc126d719005981e05Hal Finkel if (K != PairConnectionTypes.end()) { 78072465ea23d010507d3746adc126d719005981e05Hal Finkel AllPairConnectionTypes.insert(*K); 78172465ea23d010507d3746adc126d719005981e05Hal Finkel } else { 78272465ea23d010507d3746adc126d719005981e05Hal Finkel K = PairConnectionTypes.find(VPPair(*J, *I)); 78372465ea23d010507d3746adc126d719005981e05Hal Finkel if (K != PairConnectionTypes.end()) 78472465ea23d010507d3746adc126d719005981e05Hal Finkel AllPairConnectionTypes.insert(*K); 78572465ea23d010507d3746adc126d719005981e05Hal Finkel } 78672465ea23d010507d3746adc126d719005981e05Hal Finkel } 78772465ea23d010507d3746adc126d719005981e05Hal Finkel } 78872465ea23d010507d3746adc126d719005981e05Hal Finkel 78997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel for (DenseMap<ValuePair, std::vector<ValuePair> >::iterator 79072465ea23d010507d3746adc126d719005981e05Hal Finkel I = ConnectedPairs.begin(), IE = ConnectedPairs.end(); 79197a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel I != IE; ++I) 79297a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel for (std::vector<ValuePair>::iterator J = I->second.begin(), 79397a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel JE = I->second.end(); J != JE; ++J) 79497a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel if (AllPairConnectionTypes.count(VPPair(I->first, *J))) { 79597a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel AllConnectedPairs[I->first].push_back(*J); 79697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel AllConnectedPairDeps[*J].push_back(I->first); 79797a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel } 7985d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel } while (ShouldContinue); 7995d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 8005d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (AllChosenPairs.empty()) return false; 8015d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel NumFusedOps += AllChosenPairs.size(); 8023706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 803de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // A set of pairs has now been selected. It is now necessary to replace the 804de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // paired instructions with vector instructions. For this procedure each 80543ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop // operand must be replaced with a vector operand. This vector is formed 806de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // by using build_vector on the old operands. The replaced values are then 807de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // replaced with a vector_extract on the result. Subsequent optimization 808de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // passes should coalesce the build/extract combinations. 8093706ac7aa83ab0aed9e2da7d5fc2386ac1f035f5Sebastian Pop 81072465ea23d010507d3746adc126d719005981e05Hal Finkel fuseChosenPairs(BB, AllPairableInsts, AllChosenPairs, AllFixedOrderPairs, 81172465ea23d010507d3746adc126d719005981e05Hal Finkel AllPairConnectionTypes, 81272465ea23d010507d3746adc126d719005981e05Hal Finkel AllConnectedPairs, AllConnectedPairDeps); 81364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 81464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // It is important to cleanup here so that future iterations of this 81564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // function have less work to do. 8168e0d1c03ca7fd86e6879b4e37d0d7f0e982feef6Benjamin Kramer (void) SimplifyInstructionsInBlock(&BB, TD, AA->getTargetLibraryInfo()); 817de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return true; 818de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 819de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 820de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function returns true if the provided instruction is capable of being 821de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // fused into a vector instruction. This determination is based only on the 822de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // type and other attributes of the instruction. 823de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::isInstVectorizable(Instruction *I, 824de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool &IsSimpleLoadStore) { 825de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IsSimpleLoadStore = false; 826de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 827de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (CallInst *C = dyn_cast<CallInst>(I)) { 828de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!isVectorizableIntrinsic(C)) 829de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 830de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else if (LoadInst *L = dyn_cast<LoadInst>(I)) { 831de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Vectorize simple loads if possbile: 832de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IsSimpleLoadStore = L->isSimple(); 83386312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng if (!IsSimpleLoadStore || !Config.VectorizeMemOps) 834de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 835de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else if (StoreInst *S = dyn_cast<StoreInst>(I)) { 836de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Vectorize simple stores if possbile: 837de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IsSimpleLoadStore = S->isSimple(); 83886312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng if (!IsSimpleLoadStore || !Config.VectorizeMemOps) 839de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 840de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else if (CastInst *C = dyn_cast<CastInst>(I)) { 841de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // We can vectorize casts, but not casts of pointer types, etc. 84286312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng if (!Config.VectorizeCasts) 843de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 844de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 845de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *SrcTy = C->getSrcTy(); 846f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel if (!SrcTy->isSingleValueType()) 847de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 848de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 849de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *DestTy = C->getDestTy(); 850f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel if (!DestTy->isSingleValueType()) 851de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 852fc3665c87519850f629c9565535e3be447e10addHal Finkel } else if (isa<SelectInst>(I)) { 853fc3665c87519850f629c9565535e3be447e10addHal Finkel if (!Config.VectorizeSelect) 854fc3665c87519850f629c9565535e3be447e10addHal Finkel return false; 855e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel } else if (isa<CmpInst>(I)) { 856e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel if (!Config.VectorizeCmp) 857e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel return false; 858f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(I)) { 859f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel if (!Config.VectorizeGEP) 860f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel return false; 861f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel 862f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel // Currently, vector GEPs exist only with one index. 863f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel if (G->getNumIndices() != 1) 864f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel return false; 865de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else if (!(I->isBinaryOp() || isa<ShuffleVectorInst>(I) || 866de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel isa<ExtractElementInst>(I) || isa<InsertElementInst>(I))) { 867de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 868de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 869de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 870de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // We can't vectorize memory operations without target data 871de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (TD == 0 && IsSimpleLoadStore) 872de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 873de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 874de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *T1, *T2; 87564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getInstructionTypes(I, T1, T2); 876de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 877de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Not every type can be vectorized... 878de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!(VectorType::isValidElementType(T1) || T1->isVectorTy()) || 879de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel !(VectorType::isValidElementType(T2) || T2->isVectorTy())) 880de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 881de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 88265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (T1->getScalarSizeInBits() == 1) { 883768edf3cd037aab10391abc279f71470df8e3156Hal Finkel if (!Config.VectorizeBools) 884768edf3cd037aab10391abc279f71470df8e3156Hal Finkel return false; 885768edf3cd037aab10391abc279f71470df8e3156Hal Finkel } else { 88665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (!Config.VectorizeInts && T1->isIntOrIntVectorTy()) 887768edf3cd037aab10391abc279f71470df8e3156Hal Finkel return false; 888768edf3cd037aab10391abc279f71470df8e3156Hal Finkel } 88965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel 89065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (T2->getScalarSizeInBits() == 1) { 89165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (!Config.VectorizeBools) 89265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel return false; 89365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel } else { 89465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (!Config.VectorizeInts && T2->isIntOrIntVectorTy()) 89565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel return false; 89665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel } 89765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel 89886312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng if (!Config.VectorizeFloats 89986312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy())) 900de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 901de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 902e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel // Don't vectorize target-specific types. 903e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel if (T1->isX86_FP80Ty() || T1->isPPC_FP128Ty() || T1->isX86_MMXTy()) 904e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel return false; 905e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel if (T2->isX86_FP80Ty() || T2->isPPC_FP128Ty() || T2->isX86_MMXTy()) 906e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel return false; 907e32e5440d6aaff8a77517e9d286846ae9e380770Hal Finkel 90805bc5087a25bbcf59936d71ebfc878b545ef3e5cHal Finkel if ((!Config.VectorizePointers || TD == 0) && 90905bc5087a25bbcf59936d71ebfc878b545ef3e5cHal Finkel (T1->getScalarType()->isPointerTy() || 91005bc5087a25bbcf59936d71ebfc878b545ef3e5cHal Finkel T2->getScalarType()->isPointerTy())) 911f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel return false; 912f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel 913abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth if (!TTI && (T1->getPrimitiveSizeInBits() >= Config.VectorBits || 914abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth T2->getPrimitiveSizeInBits() >= Config.VectorBits)) 915de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 916de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 917de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return true; 918de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 919de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 920de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function returns true if the two provided instructions are compatible 921de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // (meaning that they can be fused into a vector instruction). This assumes 922de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // that I has already been determined to be vectorizable and that J is not 923f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel // in the use dag of I. 924de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J, 92565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel bool IsSimpleLoadStore, bool NonPow2Len, 926a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel int &CostSavings, int &FixedOrder) { 927de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugInstructionExamination) dbgs() << "BBV: looking at " << *I << 928de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " <-> " << *J << "\n"); 929de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 93065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel CostSavings = 0; 931a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel FixedOrder = 0; 93265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel 933de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Loads and stores can be merged if they have different alignments, 934de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // but are otherwise the same. 93564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!J->isSameOperationAs(I, Instruction::CompareIgnoringAlignment | 93664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel (NonPow2Len ? Instruction::CompareUsingScalarTypes : 0))) 93764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return false; 93864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 93964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *IT1, *IT2, *JT1, *JT2; 94064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getInstructionTypes(I, IT1, IT2); 94164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getInstructionTypes(J, JT1, JT2); 94264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned MaxTypeBits = std::max( 94364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel IT1->getPrimitiveSizeInBits() + JT1->getPrimitiveSizeInBits(), 94464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel IT2->getPrimitiveSizeInBits() + JT2->getPrimitiveSizeInBits()); 945abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth if (!TTI && MaxTypeBits > Config.VectorBits) 946de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 947ec4e85e3364f50802f2007e4b1e23661d4610366Hal Finkel 948de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // FIXME: handle addsub-type operations! 949de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 950de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (IsSimpleLoadStore) { 951de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *IPtr, *JPtr; 95265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace; 953de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel int64_t OffsetInElmts = 0; 954de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, 95565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel IAddressSpace, JAddressSpace, 956de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel OffsetInElmts) && abs64(OffsetInElmts) == 1) { 957a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel FixedOrder = (int) OffsetInElmts; 95865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel unsigned BottomAlignment = IAlignment; 95965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (OffsetInElmts < 0) BottomAlignment = JAlignment; 96065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel 96165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel Type *aTypeI = isa<StoreInst>(I) ? 96265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel cast<StoreInst>(I)->getValueOperand()->getType() : I->getType(); 96365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel Type *aTypeJ = isa<StoreInst>(J) ? 96465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel cast<StoreInst>(J)->getValueOperand()->getType() : J->getType(); 96565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel Type *VType = getVecTypeForPair(aTypeI, aTypeJ); 96664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 96765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (Config.AlignedOnly) { 968de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // An aligned load or store is possible only if the instruction 969de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // with the lower offset has an alignment suitable for the 970de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // vector type. 9711230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop 972de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned VecAlignment = TD->getPrefTypeAlignment(VType); 973de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (BottomAlignment < VecAlignment) 974de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 975de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 97665309660fa61a837cc05323f69c618a7d8134d56Hal Finkel 977abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth if (TTI) { 978abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth unsigned ICost = TTI->getMemoryOpCost(I->getOpcode(), aTypeI, 979abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth IAlignment, IAddressSpace); 980abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth unsigned JCost = TTI->getMemoryOpCost(J->getOpcode(), aTypeJ, 981abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth JAlignment, JAddressSpace); 982abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth unsigned VCost = TTI->getMemoryOpCost(I->getOpcode(), VType, 983abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth BottomAlignment, 984abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth IAddressSpace); 9850cf5d396c14c71dd4fa1d102c2b3d178b1191436Hal Finkel 9860cf5d396c14c71dd4fa1d102c2b3d178b1191436Hal Finkel ICost += TTI->getAddressComputationCost(aTypeI); 9870cf5d396c14c71dd4fa1d102c2b3d178b1191436Hal Finkel JCost += TTI->getAddressComputationCost(aTypeJ); 9880cf5d396c14c71dd4fa1d102c2b3d178b1191436Hal Finkel VCost += TTI->getAddressComputationCost(VType); 9890cf5d396c14c71dd4fa1d102c2b3d178b1191436Hal Finkel 99065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (VCost > ICost + JCost) 99165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel return false; 99282149a9106f221aa6a7271977c236b078e621f21Hal Finkel 993dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel // We don't want to fuse to a type that will be split, even 99482149a9106f221aa6a7271977c236b078e621f21Hal Finkel // if the two input types will also be split and there is no other 995dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel // associated cost. 996abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth unsigned VParts = TTI->getNumberOfParts(VType); 997dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel if (VParts > 1) 998dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel return false; 999dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel else if (!VParts && VCost == ICost + JCost) 100082149a9106f221aa6a7271977c236b078e621f21Hal Finkel return false; 100182149a9106f221aa6a7271977c236b078e621f21Hal Finkel 100265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel CostSavings = ICost + JCost - VCost; 100365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel } 1004de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 1005de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 1006de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1007abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth } else if (TTI) { 100846fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel unsigned ICost = getInstrCost(I->getOpcode(), IT1, IT2); 100946fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel unsigned JCost = getInstrCost(J->getOpcode(), JT1, JT2); 101065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel Type *VT1 = getVecTypeForPair(IT1, JT1), 101165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel *VT2 = getVecTypeForPair(IT2, JT2); 1012ec5c3c2bd37828e9fbd913f5ac7b7f75711ddd59Hal Finkel 1013ec5c3c2bd37828e9fbd913f5ac7b7f75711ddd59Hal Finkel // Note that this procedure is incorrect for insert and extract element 1014ec5c3c2bd37828e9fbd913f5ac7b7f75711ddd59Hal Finkel // instructions (because combining these often results in a shuffle), 1015ec5c3c2bd37828e9fbd913f5ac7b7f75711ddd59Hal Finkel // but this cost is ignored (because insert and extract element 1016ec5c3c2bd37828e9fbd913f5ac7b7f75711ddd59Hal Finkel // instructions are assigned a zero depth factor and are not really 1017ec5c3c2bd37828e9fbd913f5ac7b7f75711ddd59Hal Finkel // fused in general). 101846fb81cf4009cc34af97c5a1c0e824e2633fb4e4Hal Finkel unsigned VCost = getInstrCost(I->getOpcode(), VT1, VT2); 101965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel 102065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (VCost > ICost + JCost) 102165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel return false; 102282149a9106f221aa6a7271977c236b078e621f21Hal Finkel 1023dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel // We don't want to fuse to a type that will be split, even 102482149a9106f221aa6a7271977c236b078e621f21Hal Finkel // if the two input types will also be split and there is no other 1025dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel // associated cost. 1026abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth unsigned VParts1 = TTI->getNumberOfParts(VT1), 1027abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth VParts2 = TTI->getNumberOfParts(VT2); 10288b9796f4f83bea2bbefcd2822eb574abdb7f3d1bHal Finkel if (VParts1 > 1 || VParts2 > 1) 1029dc330f75b732b4ce1beace69ae7ed8e19d89bd9fHal Finkel return false; 10308b9796f4f83bea2bbefcd2822eb574abdb7f3d1bHal Finkel else if ((!VParts1 || !VParts2) && VCost == ICost + JCost) 103182149a9106f221aa6a7271977c236b078e621f21Hal Finkel return false; 103282149a9106f221aa6a7271977c236b078e621f21Hal Finkel 103365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel CostSavings = ICost + JCost - VCost; 1034de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1035de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 10366173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel // The powi intrinsic is special because only the first argument is 10376173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel // vectorized, the second arguments must be equal. 10386173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel CallInst *CI = dyn_cast<CallInst>(I); 10396173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel Function *FI; 1040a77728415857196035c0090f7b2749d7971811a2Hal Finkel if (CI && (FI = CI->getCalledFunction())) { 1041a77728415857196035c0090f7b2749d7971811a2Hal Finkel Intrinsic::ID IID = (Intrinsic::ID) FI->getIntrinsicID(); 1042a77728415857196035c0090f7b2749d7971811a2Hal Finkel if (IID == Intrinsic::powi) { 1043a77728415857196035c0090f7b2749d7971811a2Hal Finkel Value *A1I = CI->getArgOperand(1), 1044a77728415857196035c0090f7b2749d7971811a2Hal Finkel *A1J = cast<CallInst>(J)->getArgOperand(1); 1045a77728415857196035c0090f7b2749d7971811a2Hal Finkel const SCEV *A1ISCEV = SE->getSCEV(A1I), 1046a77728415857196035c0090f7b2749d7971811a2Hal Finkel *A1JSCEV = SE->getSCEV(A1J); 1047a77728415857196035c0090f7b2749d7971811a2Hal Finkel return (A1ISCEV == A1JSCEV); 1048a77728415857196035c0090f7b2749d7971811a2Hal Finkel } 1049a77728415857196035c0090f7b2749d7971811a2Hal Finkel 1050abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth if (IID && TTI) { 1051a77728415857196035c0090f7b2749d7971811a2Hal Finkel SmallVector<Type*, 4> Tys; 1052a77728415857196035c0090f7b2749d7971811a2Hal Finkel for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) 1053a77728415857196035c0090f7b2749d7971811a2Hal Finkel Tys.push_back(CI->getArgOperand(i)->getType()); 1054abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, Tys); 1055a77728415857196035c0090f7b2749d7971811a2Hal Finkel 1056a77728415857196035c0090f7b2749d7971811a2Hal Finkel Tys.clear(); 1057a77728415857196035c0090f7b2749d7971811a2Hal Finkel CallInst *CJ = cast<CallInst>(J); 1058a77728415857196035c0090f7b2749d7971811a2Hal Finkel for (unsigned i = 0, ie = CJ->getNumArgOperands(); i != ie; ++i) 1059a77728415857196035c0090f7b2749d7971811a2Hal Finkel Tys.push_back(CJ->getArgOperand(i)->getType()); 1060abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, Tys); 1061a77728415857196035c0090f7b2749d7971811a2Hal Finkel 1062a77728415857196035c0090f7b2749d7971811a2Hal Finkel Tys.clear(); 1063a77728415857196035c0090f7b2749d7971811a2Hal Finkel assert(CI->getNumArgOperands() == CJ->getNumArgOperands() && 1064a77728415857196035c0090f7b2749d7971811a2Hal Finkel "Intrinsic argument counts differ"); 1065a77728415857196035c0090f7b2749d7971811a2Hal Finkel for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) { 1066a77728415857196035c0090f7b2749d7971811a2Hal Finkel if (IID == Intrinsic::powi && i == 1) 1067a77728415857196035c0090f7b2749d7971811a2Hal Finkel Tys.push_back(CI->getArgOperand(i)->getType()); 1068a77728415857196035c0090f7b2749d7971811a2Hal Finkel else 1069a77728415857196035c0090f7b2749d7971811a2Hal Finkel Tys.push_back(getVecTypeForPair(CI->getArgOperand(i)->getType(), 1070a77728415857196035c0090f7b2749d7971811a2Hal Finkel CJ->getArgOperand(i)->getType())); 1071a77728415857196035c0090f7b2749d7971811a2Hal Finkel } 1072a77728415857196035c0090f7b2749d7971811a2Hal Finkel 1073a77728415857196035c0090f7b2749d7971811a2Hal Finkel Type *RetTy = getVecTypeForPair(IT1, JT1); 1074abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys); 1075a77728415857196035c0090f7b2749d7971811a2Hal Finkel 1076a77728415857196035c0090f7b2749d7971811a2Hal Finkel if (VCost > ICost + JCost) 1077a77728415857196035c0090f7b2749d7971811a2Hal Finkel return false; 1078a77728415857196035c0090f7b2749d7971811a2Hal Finkel 1079a77728415857196035c0090f7b2749d7971811a2Hal Finkel // We don't want to fuse to a type that will be split, even 1080a77728415857196035c0090f7b2749d7971811a2Hal Finkel // if the two input types will also be split and there is no other 1081a77728415857196035c0090f7b2749d7971811a2Hal Finkel // associated cost. 1082abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth unsigned RetParts = TTI->getNumberOfParts(RetTy); 1083a77728415857196035c0090f7b2749d7971811a2Hal Finkel if (RetParts > 1) 1084a77728415857196035c0090f7b2749d7971811a2Hal Finkel return false; 1085a77728415857196035c0090f7b2749d7971811a2Hal Finkel else if (!RetParts && VCost == ICost + JCost) 1086a77728415857196035c0090f7b2749d7971811a2Hal Finkel return false; 1087a77728415857196035c0090f7b2749d7971811a2Hal Finkel 1088a77728415857196035c0090f7b2749d7971811a2Hal Finkel for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) { 1089a77728415857196035c0090f7b2749d7971811a2Hal Finkel if (!Tys[i]->isVectorTy()) 1090a77728415857196035c0090f7b2749d7971811a2Hal Finkel continue; 1091a77728415857196035c0090f7b2749d7971811a2Hal Finkel 1092abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth unsigned NumParts = TTI->getNumberOfParts(Tys[i]); 1093a77728415857196035c0090f7b2749d7971811a2Hal Finkel if (NumParts > 1) 1094a77728415857196035c0090f7b2749d7971811a2Hal Finkel return false; 1095a77728415857196035c0090f7b2749d7971811a2Hal Finkel else if (!NumParts && VCost == ICost + JCost) 1096a77728415857196035c0090f7b2749d7971811a2Hal Finkel return false; 1097a77728415857196035c0090f7b2749d7971811a2Hal Finkel } 1098a77728415857196035c0090f7b2749d7971811a2Hal Finkel 1099a77728415857196035c0090f7b2749d7971811a2Hal Finkel CostSavings = ICost + JCost - VCost; 1100a77728415857196035c0090f7b2749d7971811a2Hal Finkel } 11016173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel } 11026173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel 1103de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return true; 1104de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1105de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1106de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Figure out whether or not J uses I and update the users and write-set 1107de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // structures associated with I. Specifically, Users represents the set of 1108de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // instructions that depend on I. WriteSet represents the set 1109de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // of memory locations that are dependent on I. If UpdateUsers is true, 1110de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // and J uses I, then Users is updated to contain J and WriteSet is updated 1111de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // to contain any memory locations to which J writes. The function returns 1112de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // true if J uses I. By default, alias analysis is used to determine 1113de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // whether J reads from memory that overlaps with a location in WriteSet. 111497a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel // If LoadMoveSet is not null, then it is a previously-computed map 1115de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // where the key is the memory-based user instruction and the value is 1116de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the instruction to be compared with I. So, if LoadMoveSet is provided, 1117de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // then the alias analysis is not used. This is necessary because this 1118de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // function is called during the process of moving instructions during 1119de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // vectorization and the results of the alias analysis are not stable during 1120de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // that process. 1121de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::trackUsesOfI(DenseSet<Value *> &Users, 1122de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker &WriteSet, Instruction *I, 1123de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J, bool UpdateUsers, 11242f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel DenseSet<ValuePair> *LoadMoveSetPairs) { 1125de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool UsesI = false; 1126de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1127de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This instruction may already be marked as a user due, for example, to 1128de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // being a member of a selected pair. 1129de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (Users.count(J)) 1130de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UsesI = true; 1131de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1132de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!UsesI) 11337e004d177fe76145f75a9417ed2e281f1b9abaf7Hal Finkel for (User::op_iterator JU = J->op_begin(), JE = J->op_end(); 11347e004d177fe76145f75a9417ed2e281f1b9abaf7Hal Finkel JU != JE; ++JU) { 1135de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *V = *JU; 1136de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (I == V || Users.count(V)) { 1137de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UsesI = true; 1138de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1139de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1140de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1141de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!UsesI && J->mayReadFromMemory()) { 11422f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel if (LoadMoveSetPairs) { 11432f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel UsesI = LoadMoveSetPairs->count(ValuePair(J, I)); 1144de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 1145de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (AliasSetTracker::iterator W = WriteSet.begin(), 1146de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel WE = WriteSet.end(); W != WE; ++W) { 114738a7f22445b8782682d1f8f253454ea0390d4ac5Hal Finkel if (W->aliasesUnknownInst(J, *AA)) { 114838a7f22445b8782682d1f8f253454ea0390d4ac5Hal Finkel UsesI = true; 114938a7f22445b8782682d1f8f253454ea0390d4ac5Hal Finkel break; 1150de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1151de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1152de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1153de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1154de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1155de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (UsesI && UpdateUsers) { 1156de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (J->mayWriteToMemory()) WriteSet.add(J); 1157de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Users.insert(J); 1158de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1159de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1160de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return UsesI; 1161de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1162de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1163de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function iterates over all instruction pairs in the provided 1164de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // basic block and collects all candidate pairs for vectorization. 11655d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel bool BBVectorize::getCandidatePairs(BasicBlock &BB, 11665d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel BasicBlock::iterator &Start, 11676ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel DenseMap<Value *, std::vector<Value *> > &CandidatePairs, 1168a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel DenseSet<ValuePair> &FixedOrderPairs, 116965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel DenseMap<ValuePair, int> &CandidatePairCostSavings, 117064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Value *> &PairableInsts, bool NonPow2Len) { 1171ab90084bca42b74a5b5edad9b416bd81e105dad0Hal Finkel size_t TotalPairs = 0; 1172de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BasicBlock::iterator E = BB.end(); 11735d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (Start == E) return false; 11745d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 11755d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel bool ShouldContinue = false, IAfterStart = false; 11765d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel for (BasicBlock::iterator I = Start++; I != E; ++I) { 11775d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (I == Start) IAfterStart = true; 11785d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 1179de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool IsSimpleLoadStore; 1180de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!isInstVectorizable(I, IsSimpleLoadStore)) continue; 1181de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1182de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Look for an instruction with which to pair instruction *I... 1183de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> Users; 1184de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker WriteSet(*AA); 1185eaa8f5533f9f678fe3c56aec0201a34e46eaaf54Hal Finkel if (I->mayWriteToMemory()) WriteSet.add(I); 1186eaa8f5533f9f678fe3c56aec0201a34e46eaaf54Hal Finkel 11875d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel bool JAfterStart = IAfterStart; 11885d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel BasicBlock::iterator J = llvm::next(I); 1189bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) { 11905d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (J == Start) JAfterStart = true; 11915d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 1192de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Determine if J uses I, if so, exit the loop. 1193bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !Config.FastDep); 1194bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (Config.FastDep) { 1195de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Note: For this heuristic to be effective, independent operations 1196de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // must tend to be intermixed. This is likely to be true from some 1197de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // kinds of grouped loop unrolling (but not the generic LLVM pass), 1198de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // but otherwise may require some kind of reordering pass. 1199de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1200de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // When using fast dependency analysis, 1201de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // stop searching after first use: 1202de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (UsesI) break; 1203de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 1204de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (UsesI) continue; 1205de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1206de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1207de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // J does not use I, and comes before the first use of I, so it can be 1208de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // merged with I if the instructions are compatible. 1209a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel int CostSavings, FixedOrder; 121065309660fa61a837cc05323f69c618a7d8134d56Hal Finkel if (!areInstsCompatible(I, J, IsSimpleLoadStore, NonPow2Len, 1211a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel CostSavings, FixedOrder)) continue; 1212de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1213de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // J is a candidate for merging with I. 1214de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!PairableInsts.size() || 1215de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInsts[PairableInsts.size()-1] != I) { 1216de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInsts.push_back(I); 1217de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 12185d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 12196ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel CandidatePairs[I].push_back(J); 1220ab90084bca42b74a5b5edad9b416bd81e105dad0Hal Finkel ++TotalPairs; 1221abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth if (TTI) 122265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel CandidatePairCostSavings.insert(ValuePairWithCost(ValuePair(I, J), 122365309660fa61a837cc05323f69c618a7d8134d56Hal Finkel CostSavings)); 12245d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 1225a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel if (FixedOrder == 1) 1226a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel FixedOrderPairs.insert(ValuePair(I, J)); 1227a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel else if (FixedOrder == -1) 1228a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel FixedOrderPairs.insert(ValuePair(J, I)); 1229a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel 12305d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // The next call to this function must start after the last instruction 12315d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // selected during this invocation. 12325d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (JAfterStart) { 12335d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel Start = llvm::next(J); 12345d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel IAfterStart = JAfterStart = false; 12355d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel } 12365d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 1237de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugCandidateSelection) dbgs() << "BBV: candidate pair " 123865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel << *I << " <-> " << *J << " (cost savings: " << 123965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel CostSavings << ")\n"); 12405d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 12415d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // If we have already found too many pairs, break here and this function 12425d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // will be called again starting after the last instruction selected 12435d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel // during this invocation. 1244ab90084bca42b74a5b5edad9b416bd81e105dad0Hal Finkel if (PairableInsts.size() >= Config.MaxInsts || 1245ab90084bca42b74a5b5edad9b416bd81e105dad0Hal Finkel TotalPairs >= Config.MaxPairs) { 12465d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel ShouldContinue = true; 12475d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel break; 12485d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel } 1249de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 12505d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 12515d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel if (ShouldContinue) 12525d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel break; 1253de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1254de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1255de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: found " << PairableInsts.size() 1256de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << " instructions with candidate pairs\n"); 12575d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel 12585d4e18bc39fea892f523d960213906d296d3cb38Hal Finkel return ShouldContinue; 1259de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1260de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1261de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Finds candidate pairs connected to the pair P = <PI, PJ>. This means that 1262de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // it looks for pairs such that both members have an input which is an 1263de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // output of PI or PJ. 1264de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::computePairsConnectedTo( 126597a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, std::vector<Value *> > &CandidatePairs, 126697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &CandidatePairsSet, 126797a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel std::vector<Value *> &PairableInsts, 126897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, 126997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes, 127097a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel ValuePair P) { 1271bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel StoreInst *SI, *SJ; 1272bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel 1273de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // For each possible pairing for this variable, look at the uses of 1274de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the first value... 1275de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (Value::use_iterator I = P.first->use_begin(), 1276de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = P.first->use_end(); I != E; ++I) { 1277bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel if (isa<LoadInst>(*I)) { 1278bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel // A pair cannot be connected to a load because the load only takes one 1279bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel // operand (the address) and it is a scalar even after vectorization. 1280bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel continue; 1281bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel } else if ((SI = dyn_cast<StoreInst>(*I)) && 1282bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel P.first == SI->getPointerOperand()) { 1283bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel // Similarly, a pair cannot be connected to a store through its 1284bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel // pointer operand. 1285bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel continue; 1286bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel } 1287bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel 1288de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // For each use of the first variable, look for uses of the second 1289de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // variable... 1290de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (Value::use_iterator J = P.second->use_begin(), 1291de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E2 = P.second->use_end(); J != E2; ++J) { 1292bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel if ((SJ = dyn_cast<StoreInst>(*J)) && 1293bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel P.second == SJ->getPointerOperand()) 1294bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel continue; 1295bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel 1296de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Look for <I, J>: 129700f63b1b84d059a1ffa572e76708e03750a9e523Hal Finkel if (CandidatePairsSet.count(ValuePair(*I, *J))) { 129872465ea23d010507d3746adc126d719005981e05Hal Finkel VPPair VP(P, ValuePair(*I, *J)); 129997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel ConnectedPairs[VP.first].push_back(VP.second); 130072465ea23d010507d3746adc126d719005981e05Hal Finkel PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionDirect)); 130172465ea23d010507d3746adc126d719005981e05Hal Finkel } 1302de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1303de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Look for <J, I>: 130400f63b1b84d059a1ffa572e76708e03750a9e523Hal Finkel if (CandidatePairsSet.count(ValuePair(*J, *I))) { 130572465ea23d010507d3746adc126d719005981e05Hal Finkel VPPair VP(P, ValuePair(*J, *I)); 130697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel ConnectedPairs[VP.first].push_back(VP.second); 130772465ea23d010507d3746adc126d719005981e05Hal Finkel PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSwap)); 130872465ea23d010507d3746adc126d719005981e05Hal Finkel } 1309de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1310de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1311bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (Config.SplatBreaksChain) continue; 1312de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Look for cases where just the first value in the pair is used by 1313de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // both members of another pair (splatting). 1314de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (Value::use_iterator J = P.first->use_begin(); J != E; ++J) { 1315bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel if ((SJ = dyn_cast<StoreInst>(*J)) && 1316bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel P.first == SJ->getPointerOperand()) 1317bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel continue; 1318bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel 131900f63b1b84d059a1ffa572e76708e03750a9e523Hal Finkel if (CandidatePairsSet.count(ValuePair(*I, *J))) { 132072465ea23d010507d3746adc126d719005981e05Hal Finkel VPPair VP(P, ValuePair(*I, *J)); 132197a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel ConnectedPairs[VP.first].push_back(VP.second); 132272465ea23d010507d3746adc126d719005981e05Hal Finkel PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat)); 132372465ea23d010507d3746adc126d719005981e05Hal Finkel } 1324de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1325de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1326de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1327bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng if (Config.SplatBreaksChain) return; 1328de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Look for cases where just the second value in the pair is used by 1329de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // both members of another pair (splatting). 1330de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (Value::use_iterator I = P.second->use_begin(), 1331de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = P.second->use_end(); I != E; ++I) { 1332bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel if (isa<LoadInst>(*I)) 1333bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel continue; 1334bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel else if ((SI = dyn_cast<StoreInst>(*I)) && 1335bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel P.second == SI->getPointerOperand()) 1336bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel continue; 1337bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel 1338de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (Value::use_iterator J = P.second->use_begin(); J != E; ++J) { 1339bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel if ((SJ = dyn_cast<StoreInst>(*J)) && 1340bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel P.second == SJ->getPointerOperand()) 1341bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel continue; 1342bba23ed672c4cedd61a302497f45bf6f53fec7b2Hal Finkel 134300f63b1b84d059a1ffa572e76708e03750a9e523Hal Finkel if (CandidatePairsSet.count(ValuePair(*I, *J))) { 134472465ea23d010507d3746adc126d719005981e05Hal Finkel VPPair VP(P, ValuePair(*I, *J)); 134597a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel ConnectedPairs[VP.first].push_back(VP.second); 134672465ea23d010507d3746adc126d719005981e05Hal Finkel PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat)); 134772465ea23d010507d3746adc126d719005981e05Hal Finkel } 1348de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1349de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1350de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1351de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1352de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function figures out which pairs are connected. Two pairs are 1353de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // connected if some output of the first pair forms an input to both members 1354de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // of the second pair. 1355de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::computeConnectedPairs( 135697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, std::vector<Value *> > &CandidatePairs, 135797a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &CandidatePairsSet, 135897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel std::vector<Value *> &PairableInsts, 135997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, 136097a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes) { 1361de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::vector<Value *>::iterator PI = PairableInsts.begin(), 1362de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PE = PairableInsts.end(); PI != PE; ++PI) { 13636ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel DenseMap<Value *, std::vector<Value *> >::iterator PP = 13646ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel CandidatePairs.find(*PI); 13656ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel if (PP == CandidatePairs.end()) 13666ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel continue; 1367de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 13686ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel for (std::vector<Value *>::iterator P = PP->second.begin(), 13696ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel E = PP->second.end(); P != E; ++P) 1370b1a82589339fed148c12b052d30861a539552f1aHal Finkel computePairsConnectedTo(CandidatePairs, CandidatePairsSet, 1371b1a82589339fed148c12b052d30861a539552f1aHal Finkel PairableInsts, ConnectedPairs, 13726ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel PairConnectionTypes, ValuePair(*PI, *P)); 1373de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1374de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 137597a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DEBUG(size_t TotalPairs = 0; 137697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel for (DenseMap<ValuePair, std::vector<ValuePair> >::iterator I = 137797a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel ConnectedPairs.begin(), IE = ConnectedPairs.end(); I != IE; ++I) 137897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel TotalPairs += I->second.size(); 137997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel dbgs() << "BBV: found " << TotalPairs 1380de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << " pair connections.\n"); 1381de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1382de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1383de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function builds a set of use tuples such that <A, B> is in the set 1384f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel // if B is in the use dag of A. If B is in the use dag of A, then B 1385de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // depends on the output of A. 1386de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::buildDepMap( 1387de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BasicBlock &BB, 13886ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel DenseMap<Value *, std::vector<Value *> > &CandidatePairs, 1389de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 1390de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> &PairableInstUsers) { 1391de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> IsInPair; 13926ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel for (DenseMap<Value *, std::vector<Value *> >::iterator C = 13936ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel CandidatePairs.begin(), E = CandidatePairs.end(); C != E; ++C) { 1394de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel IsInPair.insert(C->first); 13956ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel IsInPair.insert(C->second.begin(), C->second.end()); 1396de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1397de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 13987a8aba906416b6998347b52c3c08610fdc190638Hal Finkel // Iterate through the basic block, recording all users of each 1399de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pairable instruction. 1400de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1401c951003faf4d475d221f5e839971673d2350b983Hal Finkel BasicBlock::iterator E = BB.end(), EL = 1402c951003faf4d475d221f5e839971673d2350b983Hal Finkel BasicBlock::iterator(cast<Instruction>(PairableInsts.back())); 1403de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (BasicBlock::iterator I = BB.getFirstInsertionPt(); I != E; ++I) { 1404de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (IsInPair.find(I) == IsInPair.end()) continue; 1405de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1406de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> Users; 1407de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker WriteSet(*AA); 1408eaa8f5533f9f678fe3c56aec0201a34e46eaaf54Hal Finkel if (I->mayWriteToMemory()) WriteSet.add(I); 1409eaa8f5533f9f678fe3c56aec0201a34e46eaaf54Hal Finkel 1410c951003faf4d475d221f5e839971673d2350b983Hal Finkel for (BasicBlock::iterator J = llvm::next(I); J != E; ++J) { 1411de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel (void) trackUsesOfI(Users, WriteSet, I, J); 1412de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1413c951003faf4d475d221f5e839971673d2350b983Hal Finkel if (J == EL) 1414c951003faf4d475d221f5e839971673d2350b983Hal Finkel break; 1415c951003faf4d475d221f5e839971673d2350b983Hal Finkel } 1416c951003faf4d475d221f5e839971673d2350b983Hal Finkel 1417de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseSet<Value *>::iterator U = Users.begin(), E = Users.end(); 14188f3359a4b396d3f1a7b2726e02f199be74c62e4cHal Finkel U != E; ++U) { 14198f3359a4b396d3f1a7b2726e02f199be74c62e4cHal Finkel if (IsInPair.find(*U) == IsInPair.end()) continue; 1420de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.insert(ValuePair(I, *U)); 14218f3359a4b396d3f1a7b2726e02f199be74c62e4cHal Finkel } 1422c951003faf4d475d221f5e839971673d2350b983Hal Finkel 1423c951003faf4d475d221f5e839971673d2350b983Hal Finkel if (I == EL) 1424c951003faf4d475d221f5e839971673d2350b983Hal Finkel break; 1425de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1426de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1427de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1428de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns true if an input to pair P is an output of pair Q and also an 1429de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // input of pair Q is an output of pair P. If this is the case, then these 1430de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // two pairs cannot be simultaneously fused. 1431de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::pairsConflict(ValuePair P, ValuePair Q, 143297a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &PairableInstUsers, 143397a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > *PairableInstUserMap, 143497a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<VPPair> *PairableInstUserPairSet) { 1435de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Two pairs are in conflict if they are mutual Users of eachother. 1436de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool QUsesP = PairableInstUsers.count(ValuePair(P.first, Q.first)) || 1437de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(P.first, Q.second)) || 1438de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(P.second, Q.first)) || 1439de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(P.second, Q.second)); 1440de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool PUsesQ = PairableInstUsers.count(ValuePair(Q.first, P.first)) || 1441de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(Q.first, P.second)) || 1442de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(Q.second, P.first)) || 1443de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PairableInstUsers.count(ValuePair(Q.second, P.second)); 1444de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (PairableInstUserMap) { 1445de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // FIXME: The expensive part of the cycle check is not so much the cycle 1446de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // check itself but this edge insertion procedure. This needs some 144797a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel // profiling and probably a different data structure. 1448de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (PUsesQ) { 1449da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel if (PairableInstUserPairSet->insert(VPPair(Q, P)).second) 145097a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel (*PairableInstUserMap)[Q].push_back(P); 1451de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1452de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (QUsesP) { 1453da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel if (PairableInstUserPairSet->insert(VPPair(P, Q)).second) 145497a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel (*PairableInstUserMap)[P].push_back(Q); 1455de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1456de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1457de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1458de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return (QUsesP && PUsesQ); 1459de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1460de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1461de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function walks the use graph of current pairs to see if, starting 1462de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // from P, the walk returns to P. 1463de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::pairWillFormCycle(ValuePair P, 146497a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap, 146597a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &CurrentPairs) { 1466de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugCycleCheck) 1467de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel dbgs() << "BBV: starting cycle check for : " << *P.first << " <-> " 1468de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << *P.second << "\n"); 1469de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // A lookup table of visisted pairs is kept because the PairableInstUserMap 1470de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // contains non-direct associations. 1471de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> Visited; 147235564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel SmallVector<ValuePair, 32> Q; 1473de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // General depth-first post-order traversal: 1474de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.push_back(P); 147535564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel do { 147635564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel ValuePair QTop = Q.pop_back_val(); 1477de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Visited.insert(QTop); 1478de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1479de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugCycleCheck) 1480de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel dbgs() << "BBV: cycle check visiting: " << *QTop.first << " <-> " 1481de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << *QTop.second << "\n"); 148297a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> >::iterator QQ = 148397a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel PairableInstUserMap.find(QTop); 148497a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel if (QQ == PairableInstUserMap.end()) 148597a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel continue; 148697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel 148797a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel for (std::vector<ValuePair>::iterator C = QQ->second.begin(), 148897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel CE = QQ->second.end(); C != CE; ++C) { 148997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel if (*C == P) { 1490de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() 1491de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel << "BBV: rejected to prevent non-trivial cycle formation: " 149297a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel << QTop.first << " <-> " << C->second << "\n"); 1493de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return true; 1494de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1495de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 149697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel if (CurrentPairs.count(*C) && !Visited.count(*C)) 149797a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel Q.push_back(*C); 1498de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 149935564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel } while (!Q.empty()); 1500de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1501de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return false; 1502de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1503de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1504f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel // This function builds the initial dag of connected pairs with the 1505de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pair J at the root. 1506f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel void BBVectorize::buildInitialDAGFor( 150797a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, std::vector<Value *> > &CandidatePairs, 150897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &CandidatePairsSet, 150997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel std::vector<Value *> &PairableInsts, 151097a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, 151197a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &PairableInstUsers, 151297a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, Value *> &ChosenPairs, 1513f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel DenseMap<ValuePair, size_t> &DAG, ValuePair J) { 1514f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel // Each of these pairs is viewed as the root node of a DAG. The DAG 1515de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // is then walked (depth-first). As this happens, we keep track of 1516f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel // the pairs that compose the DAG and the maximum depth of the DAG. 151735564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel SmallVector<ValuePairWithDepth, 32> Q; 1518de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // General depth-first post-order traversal: 1519de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first))); 152035564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel do { 1521de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ValuePairWithDepth QTop = Q.back(); 1522de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1523de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Push each child onto the queue: 1524de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool MoreChildren = false; 1525de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t MaxChildDepth = QTop.second; 152697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> >::iterator QQ = 152797a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel ConnectedPairs.find(QTop.first); 152897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel if (QQ != ConnectedPairs.end()) 152997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel for (std::vector<ValuePair>::iterator k = QQ->second.begin(), 153097a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel ke = QQ->second.end(); k != ke; ++k) { 153197a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel // Make sure that this child pair is still a candidate: 153297a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel if (CandidatePairsSet.count(*k)) { 1533f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel DenseMap<ValuePair, size_t>::iterator C = DAG.find(*k); 1534f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel if (C == DAG.end()) { 153597a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel size_t d = getDepthFactor(k->first); 153697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel Q.push_back(ValuePairWithDepth(*k, QTop.second+d)); 153797a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel MoreChildren = true; 153897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel } else { 153997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel MaxChildDepth = std::max(MaxChildDepth, C->second); 154097a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel } 1541de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1542de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1543de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1544de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!MoreChildren) { 1545f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel // Record the current pair as part of the DAG: 1546f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel DAG.insert(ValuePairWithDepth(QTop.first, MaxChildDepth)); 1547de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.pop_back(); 1548de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 154935564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel } while (!Q.empty()); 1550de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1551de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1552f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel // Given some initial dag, prune it by removing conflicting pairs (pairs 1553de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // that cannot be simultaneously chosen for vectorization). 1554f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel void BBVectorize::pruneDAGFor( 155597a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, std::vector<Value *> > &CandidatePairs, 155697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel std::vector<Value *> &PairableInsts, 155797a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, 155897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &PairableInstUsers, 155997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap, 156097a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<VPPair> &PairableInstUserPairSet, 156197a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, Value *> &ChosenPairs, 1562f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel DenseMap<ValuePair, size_t> &DAG, 1563f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel DenseSet<ValuePair> &PrunedDAG, ValuePair J, 156497a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel bool UseCycleCheck) { 156535564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel SmallVector<ValuePairWithDepth, 32> Q; 1566de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // General depth-first post-order traversal: 1567de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first))); 156835564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel do { 156935564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel ValuePairWithDepth QTop = Q.pop_back_val(); 1570f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel PrunedDAG.insert(QTop.first); 1571de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1572de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Visit each child, pruning as necessary... 157397d19ebe5b5bf27617e536a16fa232116cefe914Hal Finkel SmallVector<ValuePairWithDepth, 8> BestChildren; 157497a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> >::iterator QQ = 157597a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel ConnectedPairs.find(QTop.first); 157697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel if (QQ == ConnectedPairs.end()) 157797a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel continue; 157897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel 157997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel for (std::vector<ValuePair>::iterator K = QQ->second.begin(), 158097a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel KE = QQ->second.end(); K != KE; ++K) { 1581f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel DenseMap<ValuePair, size_t>::iterator C = DAG.find(*K); 1582f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel if (C == DAG.end()) continue; 1583de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1584f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel // This child is in the DAG, now we need to make sure it is the 1585de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // best of any conflicting children. There could be multiple 1586de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // conflicting children, so first, determine if we're keeping 1587de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // this child, then delete conflicting children as necessary. 1588de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1589de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // It is also necessary to guard against pairing-induced 1590de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // dependencies. Consider instructions a .. x .. y .. b 1591de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // such that (a,b) are to be fused and (x,y) are to be fused 1592de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // but a is an input to x and b is an output from y. This 1593de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // means that y cannot be moved after b but x must be moved 1594de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // after b for (a,b) to be fused. In other words, after 1595de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // fusing (a,b) we have y .. a/b .. x where y is an input 1596de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // to a/b and x is an output to a/b: x and y can no longer 1597de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // be legally fused. To prevent this condition, we must 1598f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel // make sure that a child pair added to the DAG is not 1599de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // both an input and output of an already-selected pair. 1600de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1601de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Pairing-induced dependencies can also form from more complicated 1602de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // cycles. The pair vs. pair conflicts are easy to check, and so 1603de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // that is done explicitly for "fast rejection", and because for 1604de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // child vs. child conflicts, we may prefer to keep the current 1605de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pair in preference to the already-selected child. 1606de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> CurrentPairs; 1607de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1608de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool CanAdd = true; 16096227d5c690504c7ada5780c00a635b282c46e275Craig Topper for (SmallVectorImpl<ValuePairWithDepth>::iterator C2 161043ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop = BestChildren.begin(), E2 = BestChildren.end(); 1611de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2 != E2; ++C2) { 1612de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C2->first.first == C->first.first || 1613de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.first == C->first.second || 1614de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.first || 1615de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.second || 1616de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairsConflict(C2->first, C->first, PairableInstUsers, 1617da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel UseCycleCheck ? &PairableInstUserMap : 0, 1618da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel UseCycleCheck ? &PairableInstUserPairSet : 0)) { 1619de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C2->second >= C->second) { 1620de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CanAdd = false; 1621de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1622de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1623de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1624de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CurrentPairs.insert(C2->first); 1625de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1626de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1627de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!CanAdd) continue; 1628de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1629de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Even worse, this child could conflict with another node already 1630f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel // selected for the DAG. If that is the case, ignore this child. 1631f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel for (DenseSet<ValuePair>::iterator T = PrunedDAG.begin(), 1632f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel E2 = PrunedDAG.end(); T != E2; ++T) { 1633de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (T->first == C->first.first || 1634de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel T->first == C->first.second || 1635de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel T->second == C->first.first || 1636de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel T->second == C->first.second || 1637de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairsConflict(*T, C->first, PairableInstUsers, 1638da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel UseCycleCheck ? &PairableInstUserMap : 0, 1639da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel UseCycleCheck ? &PairableInstUserPairSet : 0)) { 1640de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CanAdd = false; 1641de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1642de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1643de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1644de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CurrentPairs.insert(*T); 1645de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1646de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!CanAdd) continue; 1647de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1648de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // And check the queue too... 16496227d5c690504c7ada5780c00a635b282c46e275Craig Topper for (SmallVectorImpl<ValuePairWithDepth>::iterator C2 = Q.begin(), 1650de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E2 = Q.end(); C2 != E2; ++C2) { 1651de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C2->first.first == C->first.first || 1652de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.first == C->first.second || 1653de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.first || 1654de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.second || 1655de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairsConflict(C2->first, C->first, PairableInstUsers, 1656da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel UseCycleCheck ? &PairableInstUserMap : 0, 1657da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel UseCycleCheck ? &PairableInstUserPairSet : 0)) { 1658de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CanAdd = false; 1659de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1660de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1661de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1662de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CurrentPairs.insert(C2->first); 1663de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1664de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!CanAdd) continue; 1665de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1666de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Last but not least, check for a conflict with any of the 1667de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // already-chosen pairs. 1668de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseMap<Value *, Value *>::iterator C2 = 1669de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairs.begin(), E2 = ChosenPairs.end(); 1670de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2 != E2; ++C2) { 1671de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (pairsConflict(*C2, C->first, PairableInstUsers, 1672da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel UseCycleCheck ? &PairableInstUserMap : 0, 1673da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel UseCycleCheck ? &PairableInstUserPairSet : 0)) { 1674de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CanAdd = false; 1675de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1676de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1677de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1678de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel CurrentPairs.insert(*C2); 1679de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1680de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!CanAdd) continue; 1681de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 16821230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop // To check for non-trivial cycles formed by the addition of the 16831230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop // current pair we've formed a list of all relevant pairs, now use a 16841230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop // graph walk to check for a cycle. We start from the current pair and 1685f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel // walk the use dag to see if we again reach the current pair. If we 16861230ad6e8cb7977527ac64dcf5005464d7d6c20bSebastian Pop // do, then the current pair is rejected. 1687de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1688de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // FIXME: It may be more efficient to use a topological-ordering 1689de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // algorithm to improve the cycle check. This should be investigated. 1690de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (UseCycleCheck && 1691de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairWillFormCycle(C->first, PairableInstUserMap, CurrentPairs)) 1692de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 1693de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1694de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This child can be added, but we may have chosen it in preference 1695de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // to an already-selected child. Check for this here, and if a 1696de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // conflict is found, then remove the previously-selected child 1697de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // before adding this one in its place. 16986227d5c690504c7ada5780c00a635b282c46e275Craig Topper for (SmallVectorImpl<ValuePairWithDepth>::iterator C2 169943ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop = BestChildren.begin(); C2 != BestChildren.end();) { 1700de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (C2->first.first == C->first.first || 1701de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.first == C->first.second || 1702de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.first || 1703de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C2->first.second == C->first.second || 1704de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel pairsConflict(C2->first, C->first, PairableInstUsers)) 1705d7a3425f06d51ed579bd9aefeb835b7fa4ce7849Hal Finkel C2 = BestChildren.erase(C2); 1706de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel else 1707de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++C2; 1708de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1709de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1710d7a3425f06d51ed579bd9aefeb835b7fa4ce7849Hal Finkel BestChildren.push_back(ValuePairWithDepth(C->first, C->second)); 1711de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1712de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 17136227d5c690504c7ada5780c00a635b282c46e275Craig Topper for (SmallVectorImpl<ValuePairWithDepth>::iterator C 171443ec0f4921e315dd9507be7467e633a837ad23dbSebastian Pop = BestChildren.begin(), E2 = BestChildren.end(); 1715de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel C != E2; ++C) { 1716de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel size_t DepthF = getDepthFactor(C->first.first); 1717de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Q.push_back(ValuePairWithDepth(C->first, QTop.second+DepthF)); 1718de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 171935564dc3ae1c377abad425cb09928eaf676dcb3cHal Finkel } while (!Q.empty()); 1720de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1721de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1722f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel // This function finds the best dag of mututally-compatible connected 1723de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pairs, given the choice of root pairs as an iterator range. 1724f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel void BBVectorize::findBestDAGFor( 172597a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, std::vector<Value *> > &CandidatePairs, 172697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &CandidatePairsSet, 172797a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, int> &CandidatePairCostSavings, 172897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel std::vector<Value *> &PairableInsts, 172997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &FixedOrderPairs, 173097a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes, 173197a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, 173297a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps, 173397a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &PairableInstUsers, 173497a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap, 173597a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<VPPair> &PairableInstUserPairSet, 173697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, Value *> &ChosenPairs, 1737f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel DenseSet<ValuePair> &BestDAG, size_t &BestMaxDepth, 173897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel int &BestEffSize, Value *II, std::vector<Value *>&JJ, 173997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel bool UseCycleCheck) { 17406ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel for (std::vector<Value *>::iterator J = JJ.begin(), JE = JJ.end(); 17416ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel J != JE; ++J) { 17426ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel ValuePair IJ(II, *J); 17436ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel if (!CandidatePairsSet.count(IJ)) 17446ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel continue; 1745de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1746de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Before going any further, make sure that this pair does not 1747de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // conflict with any already-selected pairs (see comment below 1748f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel // near the DAG pruning for more details). 1749de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<ValuePair> ChosenPairSet; 1750de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool DoesConflict = false; 1751de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseMap<Value *, Value *>::iterator C = ChosenPairs.begin(), 1752de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = ChosenPairs.end(); C != E; ++C) { 17536ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel if (pairsConflict(*C, IJ, PairableInstUsers, 1754da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel UseCycleCheck ? &PairableInstUserMap : 0, 1755da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel UseCycleCheck ? &PairableInstUserPairSet : 0)) { 1756de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DoesConflict = true; 1757de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel break; 1758de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1759de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1760de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairSet.insert(*C); 1761de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 1762de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (DoesConflict) continue; 1763de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1764de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (UseCycleCheck && 17656ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel pairWillFormCycle(IJ, PairableInstUserMap, ChosenPairSet)) 1766de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 1767de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1768f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel DenseMap<ValuePair, size_t> DAG; 1769f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel buildInitialDAGFor(CandidatePairs, CandidatePairsSet, 1770b1a82589339fed148c12b052d30861a539552f1aHal Finkel PairableInsts, ConnectedPairs, 1771f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel PairableInstUsers, ChosenPairs, DAG, IJ); 1772de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1773de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Because we'll keep the child with the largest depth, the largest 1774f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel // depth is still the same in the unpruned DAG. 1775f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel size_t MaxDepth = DAG.lookup(IJ); 1776de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1777f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel DEBUG(if (DebugPairSelection) dbgs() << "BBV: found DAG for pair {" 177876a05c93b13a8debec4497a6e4e753d7531709e5Hal Finkel << *IJ.first << " <-> " << *IJ.second << "} of depth " << 1779f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel MaxDepth << " and size " << DAG.size() << "\n"); 1780de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1781f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel // At this point the DAG has been constructed, but, may contain 1782de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // contradictory children (meaning that different children of 1783f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel // some dag node may be attempting to fuse the same instruction). 1784f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel // So now we walk the dag again, in the case of a conflict, 1785de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // keep only the child with the largest depth. To break a tie, 1786de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // favor the first child. 1787de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 1788f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel DenseSet<ValuePair> PrunedDAG; 1789f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel pruneDAGFor(CandidatePairs, PairableInsts, ConnectedPairs, 1790b1a82589339fed148c12b052d30861a539552f1aHal Finkel PairableInstUsers, PairableInstUserMap, 1791b1a82589339fed148c12b052d30861a539552f1aHal Finkel PairableInstUserPairSet, 1792f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel ChosenPairs, DAG, PrunedDAG, IJ, UseCycleCheck); 1793de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 179465309660fa61a837cc05323f69c618a7d8134d56Hal Finkel int EffSize = 0; 1795abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth if (TTI) { 1796f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel DenseSet<Value *> PrunedDAGInstrs; 1797f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel for (DenseSet<ValuePair>::iterator S = PrunedDAG.begin(), 1798f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel E = PrunedDAG.end(); S != E; ++S) { 1799f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel PrunedDAGInstrs.insert(S->first); 1800f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel PrunedDAGInstrs.insert(S->second); 180178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 180278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 180378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // The set of pairs that have already contributed to the total cost. 180478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel DenseSet<ValuePair> IncomingPairs; 180578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 18064387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel // If the cost model were perfect, this might not be necessary; but we 18074387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel // need to make sure that we don't get stuck vectorizing our own 18084387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel // shuffle chains. 18094387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel bool HasNontrivialInsts = false; 18104387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel 181186ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel // The node weights represent the cost savings associated with 181286ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel // fusing the pair of instructions. 1813f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel for (DenseSet<ValuePair>::iterator S = PrunedDAG.begin(), 1814f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel E = PrunedDAG.end(); S != E; ++S) { 18154387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel if (!isa<ShuffleVectorInst>(S->first) && 18164387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel !isa<InsertElementInst>(S->first) && 18174387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel !isa<ExtractElementInst>(S->first)) 18184387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel HasNontrivialInsts = true; 18194387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel 182078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel bool FlipOrder = false; 182178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 182278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (getDepthFactor(S->first)) { 182378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel int ESContrib = CandidatePairCostSavings.find(*S)->second; 182478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel DEBUG(if (DebugPairSelection) dbgs() << "\tweight {" 182578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel << *S->first << " <-> " << *S->second << "} = " << 182678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib << "\n"); 182778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel EffSize += ESContrib; 182878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 182986ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel 183078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // The edge weights contribute in a negative sense: they represent 183178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // the cost of shuffles. 183297a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> >::iterator SS = 183397a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel ConnectedPairDeps.find(*S); 183497a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel if (SS != ConnectedPairDeps.end()) { 183586ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel unsigned NumDepsDirect = 0, NumDepsSwap = 0; 183697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel for (std::vector<ValuePair>::iterator T = SS->second.begin(), 183797a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel TE = SS->second.end(); T != TE; ++T) { 183897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel VPPair Q(*S, *T); 1839f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel if (!PrunedDAG.count(Q.second)) 184078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel continue; 184186ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel DenseMap<VPPair, unsigned>::iterator R = 184297a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel PairConnectionTypes.find(VPPair(Q.second, Q.first)); 184386ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel assert(R != PairConnectionTypes.end() && 184486ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel "Cannot find pair connection type"); 184586ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel if (R->second == PairConnectionDirect) 184686ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel ++NumDepsDirect; 184786ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel else if (R->second == PairConnectionSwap) 184886ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel ++NumDepsSwap; 184986ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel } 185086ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel 185186ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel // If there are more swaps than direct connections, then 185286ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel // the pair order will be flipped during fusion. So the real 185386ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel // number of swaps is the minimum number. 185478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel FlipOrder = !FixedOrderPairs.count(*S) && 185586ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel ((NumDepsSwap > NumDepsDirect) || 185686ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel FixedOrderPairs.count(ValuePair(S->second, S->first))); 185786ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel 185897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel for (std::vector<ValuePair>::iterator T = SS->second.begin(), 185997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel TE = SS->second.end(); T != TE; ++T) { 186097a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel VPPair Q(*S, *T); 1861f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel if (!PrunedDAG.count(Q.second)) 186278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel continue; 186386ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel DenseMap<VPPair, unsigned>::iterator R = 186497a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel PairConnectionTypes.find(VPPair(Q.second, Q.first)); 186586ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel assert(R != PairConnectionTypes.end() && 186686ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel "Cannot find pair connection type"); 186797a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel Type *Ty1 = Q.second.first->getType(), 186897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel *Ty2 = Q.second.second->getType(); 186986ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel Type *VTy = getVecTypeForPair(Ty1, Ty2); 187086ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel if ((R->second == PairConnectionDirect && FlipOrder) || 187186ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel (R->second == PairConnectionSwap && !FlipOrder) || 187278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel R->second == PairConnectionSplat) { 187378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel int ESContrib = (int) getInstrCost(Instruction::ShuffleVector, 187478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel VTy, VTy); 1875245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel 1876245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel if (VTy->getVectorNumElements() == 2) { 1877245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel if (R->second == PairConnectionSplat) 1878245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost( 1879245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel TargetTransformInfo::SK_Broadcast, VTy)); 1880245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel else 1881245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost( 1882245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel TargetTransformInfo::SK_Reverse, VTy)); 1883245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel } 1884245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel 188578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" << 188697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel *Q.second.first << " <-> " << *Q.second.second << 188778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel "} -> {" << 188878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel *S->first << " <-> " << *S->second << "} = " << 188978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib << "\n"); 189078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel EffSize -= ESContrib; 189178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 189278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 189378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 189478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 189578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // Compute the cost of outgoing edges. We assume that edges outgoing 189678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // to shuffles, inserts or extracts can be merged, and so contribute 189778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // no additional cost. 189878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (!S->first->getType()->isVoidTy()) { 189978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Type *Ty1 = S->first->getType(), 190078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel *Ty2 = S->second->getType(); 190178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Type *VTy = getVecTypeForPair(Ty1, Ty2); 190278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 190378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel bool NeedsExtraction = false; 190478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel for (Value::use_iterator I = S->first->use_begin(), 190578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel IE = S->first->use_end(); I != IE; ++I) { 190686c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(*I)) { 190786c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel // Shuffle can be folded if it has no other input 190886c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (isa<UndefValue>(SI->getOperand(1))) 190986c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel continue; 191086c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel } 191186c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (isa<ExtractElementInst>(*I)) 191278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel continue; 1913f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel if (PrunedDAGInstrs.count(*I)) 191478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel continue; 191578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel NeedsExtraction = true; 191678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel break; 191778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 191878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 191978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (NeedsExtraction) { 192078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel int ESContrib; 1921245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel if (Ty1->isVectorTy()) { 192278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib = (int) getInstrCost(Instruction::ShuffleVector, 192378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Ty1, VTy); 1924245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost( 1925245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel TargetTransformInfo::SK_ExtractSubvector, VTy, 0, Ty1)); 1926245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel } else 1927abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth ESContrib = (int) TTI->getVectorInstrCost( 192878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Instruction::ExtractElement, VTy, 0); 192978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 193078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" << 193178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel *S->first << "} = " << ESContrib << "\n"); 193278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel EffSize -= ESContrib; 193378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 193478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 193578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel NeedsExtraction = false; 193678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel for (Value::use_iterator I = S->second->use_begin(), 193778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel IE = S->second->use_end(); I != IE; ++I) { 193886c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(*I)) { 193986c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel // Shuffle can be folded if it has no other input 194086c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (isa<UndefValue>(SI->getOperand(1))) 194186c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel continue; 194286c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel } 194386c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (isa<ExtractElementInst>(*I)) 194478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel continue; 1945f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel if (PrunedDAGInstrs.count(*I)) 194678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel continue; 194778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel NeedsExtraction = true; 194878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel break; 194978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 195078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 195178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (NeedsExtraction) { 195278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel int ESContrib; 1953245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel if (Ty2->isVectorTy()) { 195478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib = (int) getInstrCost(Instruction::ShuffleVector, 195578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Ty2, VTy); 1956245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost( 1957245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel TargetTransformInfo::SK_ExtractSubvector, VTy, 1958245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel Ty1->isVectorTy() ? Ty1->getVectorNumElements() : 1, Ty2)); 1959245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel } else 1960abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth ESContrib = (int) TTI->getVectorInstrCost( 196178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Instruction::ExtractElement, VTy, 1); 196278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" << 196378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel *S->second << "} = " << ESContrib << "\n"); 196478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel EffSize -= ESContrib; 196578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 196678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 196778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 196878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // Compute the cost of incoming edges. 196978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (!isa<LoadInst>(S->first) && !isa<StoreInst>(S->first)) { 197078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Instruction *S1 = cast<Instruction>(S->first), 197178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel *S2 = cast<Instruction>(S->second); 197278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel for (unsigned o = 0; o < S1->getNumOperands(); ++o) { 197378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Value *O1 = S1->getOperand(o), *O2 = S2->getOperand(o); 197478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 197578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // Combining constants into vector constants (or small vector 197678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // constants into larger ones are assumed free). 197778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (isa<Constant>(O1) && isa<Constant>(O2)) 197878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel continue; 197978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 198078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (FlipOrder) 198178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel std::swap(O1, O2); 198278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 198378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ValuePair VP = ValuePair(O1, O2); 198478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ValuePair VPR = ValuePair(O2, O1); 198578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 198678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // Internal edges are not handled here. 1987f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel if (PrunedDAG.count(VP) || PrunedDAG.count(VPR)) 198878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel continue; 198978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 199078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Type *Ty1 = O1->getType(), 199178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel *Ty2 = O2->getType(); 199278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Type *VTy = getVecTypeForPair(Ty1, Ty2); 199378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 199478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // Combining vector operations of the same type is also assumed 199578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // folded with other operations. 199686c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (Ty1 == Ty2) { 199786c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel // If both are insert elements, then both can be widened. 1998b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel InsertElementInst *IEO1 = dyn_cast<InsertElementInst>(O1), 1999b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel *IEO2 = dyn_cast<InsertElementInst>(O2); 2000b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel if (IEO1 && IEO2 && isPureIEChain(IEO1) && isPureIEChain(IEO2)) 200186c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel continue; 200286c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel // If both are extract elements, and both have the same input 200386c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel // type, then they can be replaced with a shuffle 200486c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel ExtractElementInst *EIO1 = dyn_cast<ExtractElementInst>(O1), 200586c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel *EIO2 = dyn_cast<ExtractElementInst>(O2); 200686c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (EIO1 && EIO2 && 200786c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel EIO1->getOperand(0)->getType() == 200886c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel EIO2->getOperand(0)->getType()) 200986c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel continue; 201086c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel // If both are a shuffle with equal operand types and only two 201186c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel // unqiue operands, then they can be replaced with a single 201286c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel // shuffle 201386c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel ShuffleVectorInst *SIO1 = dyn_cast<ShuffleVectorInst>(O1), 201486c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel *SIO2 = dyn_cast<ShuffleVectorInst>(O2); 201586c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (SIO1 && SIO2 && 201686c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel SIO1->getOperand(0)->getType() == 201786c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel SIO2->getOperand(0)->getType()) { 201886c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel SmallSet<Value *, 4> SIOps; 201986c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel SIOps.insert(SIO1->getOperand(0)); 202086c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel SIOps.insert(SIO1->getOperand(1)); 202186c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel SIOps.insert(SIO2->getOperand(0)); 202286c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel SIOps.insert(SIO2->getOperand(1)); 202386c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel if (SIOps.size() <= 2) 202486c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel continue; 202586c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel } 202686c88c938aec8006d2ce83325ec1f31e1154620bHal Finkel } 202778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 202878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel int ESContrib; 202978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // This pair has already been formed. 203078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (IncomingPairs.count(VP)) { 203178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel continue; 203278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } else if (IncomingPairs.count(VPR)) { 203378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib = (int) getInstrCost(Instruction::ShuffleVector, 203478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel VTy, VTy); 2035245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel 2036245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel if (VTy->getVectorNumElements() == 2) 2037245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost( 2038245b657ab636a505066ea6a81591a9a8b93604d2Hal Finkel TargetTransformInfo::SK_Reverse, VTy)); 203978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } else if (!Ty1->isVectorTy() && !Ty2->isVectorTy()) { 2040abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth ESContrib = (int) TTI->getVectorInstrCost( 204178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Instruction::InsertElement, VTy, 0); 2042abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth ESContrib += (int) TTI->getVectorInstrCost( 204378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Instruction::InsertElement, VTy, 1); 204478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } else if (!Ty1->isVectorTy()) { 204578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // O1 needs to be inserted into a vector of size O2, and then 204678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // both need to be shuffled together. 2047abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth ESContrib = (int) TTI->getVectorInstrCost( 204878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Instruction::InsertElement, Ty2, 0); 204978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib += (int) getInstrCost(Instruction::ShuffleVector, 205078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel VTy, Ty2); 205178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } else if (!Ty2->isVectorTy()) { 205278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // O2 needs to be inserted into a vector of size O1, and then 205378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel // both need to be shuffled together. 2054abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth ESContrib = (int) TTI->getVectorInstrCost( 205578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Instruction::InsertElement, Ty1, 0); 205678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib += (int) getInstrCost(Instruction::ShuffleVector, 205778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel VTy, Ty1); 205878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } else { 205978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel Type *TyBig = Ty1, *TySmall = Ty2; 206078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (Ty2->getVectorNumElements() > Ty1->getVectorNumElements()) 206178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel std::swap(TyBig, TySmall); 206278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 206378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib = (int) getInstrCost(Instruction::ShuffleVector, 206478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel VTy, TyBig); 206578fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel if (TyBig != TySmall) 206678fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib += (int) getInstrCost(Instruction::ShuffleVector, 206778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel TyBig, TySmall); 206878fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel } 206978fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel 207078fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" 207178fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel << *O1 << " <-> " << *O2 << "} = " << 207278fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel ESContrib << "\n"); 207378fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel EffSize -= ESContrib; 207478fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel IncomingPairs.insert(VP); 207586ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel } 207686ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel } 207765309660fa61a837cc05323f69c618a7d8134d56Hal Finkel } 20784387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel 20794387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel if (!HasNontrivialInsts) { 20804387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel DEBUG(if (DebugPairSelection) dbgs() << 2081f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel "\tNo non-trivial instructions in DAG;" 20824387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel " override to zero effective size\n"); 20834387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel EffSize = 0; 20844387b8c95971a512e07bfda30dea6459e8419e8fHal Finkel } 208565309660fa61a837cc05323f69c618a7d8134d56Hal Finkel } else { 2086f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel for (DenseSet<ValuePair>::iterator S = PrunedDAG.begin(), 2087f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel E = PrunedDAG.end(); S != E; ++S) 208865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel EffSize += (int) getDepthFactor(S->first); 208965309660fa61a837cc05323f69c618a7d8134d56Hal Finkel } 2090de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2091de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(if (DebugPairSelection) 2092f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel dbgs() << "BBV: found pruned DAG for pair {" 209376a05c93b13a8debec4497a6e4e753d7531709e5Hal Finkel << *IJ.first << " <-> " << *IJ.second << "} of depth " << 2094f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel MaxDepth << " and size " << PrunedDAG.size() << 2095de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " (effective size: " << EffSize << ")\n"); 2096abc227d9b39baed266c453f459ca14242f5c1eeaChandler Carruth if (((TTI && !UseChainDepthWithTI) || 209778fd353d5e5daedc47ecc31b6193ca48793c249cHal Finkel MaxDepth >= Config.ReqChainDepth) && 209865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel EffSize > 0 && EffSize > BestEffSize) { 2099de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BestMaxDepth = MaxDepth; 2100de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel BestEffSize = EffSize; 2101f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel BestDAG = PrunedDAG; 2102de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2103de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2104de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2105de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2106de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Given the list of candidate pairs, this function selects those 2107de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // that will be fused into vector instructions. 2108de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::choosePairs( 210997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, std::vector<Value *> > &CandidatePairs, 211097a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &CandidatePairsSet, 211197a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, int> &CandidatePairCostSavings, 211297a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel std::vector<Value *> &PairableInsts, 211397a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &FixedOrderPairs, 211497a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes, 211597a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, 211697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps, 211797a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &PairableInstUsers, 211897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, Value *>& ChosenPairs) { 2119bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng bool UseCycleCheck = 21206ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel CandidatePairsSet.size() <= Config.MaxCandPairsForCycleCheck; 21216ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel 21226ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel DenseMap<Value *, std::vector<Value *> > CandidatePairs2; 21236ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel for (DenseSet<ValuePair>::iterator I = CandidatePairsSet.begin(), 21246ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel E = CandidatePairsSet.end(); I != E; ++I) { 21256ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel std::vector<Value *> &JJ = CandidatePairs2[I->second]; 21266ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel if (JJ.empty()) JJ.reserve(32); 21276ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel JJ.push_back(I->first); 21286ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel } 21296ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel 213097a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > PairableInstUserMap; 2131da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel DenseSet<VPPair> PairableInstUserPairSet; 2132de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::vector<Value *>::iterator I = PairableInsts.begin(), 2133de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = PairableInsts.end(); I != E; ++I) { 2134de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // The number of possible pairings for this variable: 21356ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel size_t NumChoices = CandidatePairs.lookup(*I).size(); 2136de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!NumChoices) continue; 2137de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 21386ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel std::vector<Value *> &JJ = CandidatePairs[*I]; 2139de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2140f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel // The best pair to choose and its dag: 214165309660fa61a837cc05323f69c618a7d8134d56Hal Finkel size_t BestMaxDepth = 0; 214265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel int BestEffSize = 0; 2143f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel DenseSet<ValuePair> BestDAG; 2144f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel findBestDAGFor(CandidatePairs, CandidatePairsSet, 2145b1a82589339fed148c12b052d30861a539552f1aHal Finkel CandidatePairCostSavings, 214686ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel PairableInsts, FixedOrderPairs, PairConnectionTypes, 214786ccc55c82651f91fd6a312c5f6a4b511bcd1aecHal Finkel ConnectedPairs, ConnectedPairDeps, 2148da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel PairableInstUsers, PairableInstUserMap, 2149da20ea696d8b24d89ae157106ddad2337296ed50Hal Finkel PairableInstUserPairSet, ChosenPairs, 2150f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel BestDAG, BestMaxDepth, BestEffSize, *I, JJ, 2151de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel UseCycleCheck); 2152de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2153f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel if (BestDAG.empty()) 21546ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel continue; 21556ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel 2156f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel // A dag has been chosen (or not) at this point. If no dag was 2157de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // chosen, then this instruction, I, cannot be paired (and is no longer 2158de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // considered). 2159de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2160f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel DEBUG(dbgs() << "BBV: selected pairs in the best DAG for: " 21616ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel << *cast<Instruction>(*I) << "\n"); 2162de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2163f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel for (DenseSet<ValuePair>::iterator S = BestDAG.begin(), 2164f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel SE2 = BestDAG.end(); S != SE2; ++S) { 2165f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel // Insert the members of this dag into the list of chosen pairs. 2166de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairs.insert(ValuePair(S->first, S->second)); 2167de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: selected pair: " << *S->first << " <-> " << 2168de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel *S->second << "\n"); 2169de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2170f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel // Remove all candidate pairs that have values in the chosen dag. 2171f79f136cc64b0625b77c7b9008ed8c5b848b6b17Hal Finkel std::vector<Value *> &KK = CandidatePairs[S->first]; 21726ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel for (std::vector<Value *>::iterator K = KK.begin(), KE = KK.end(); 21736ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel K != KE; ++K) { 21746ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel if (*K == S->second) 21756ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel continue; 21766ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel 21776ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel CandidatePairsSet.erase(ValuePair(S->first, *K)); 21786ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel } 2179f79f136cc64b0625b77c7b9008ed8c5b848b6b17Hal Finkel 2180f79f136cc64b0625b77c7b9008ed8c5b848b6b17Hal Finkel std::vector<Value *> &LL = CandidatePairs2[S->second]; 21816ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel for (std::vector<Value *>::iterator L = LL.begin(), LE = LL.end(); 21826ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel L != LE; ++L) { 21836ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel if (*L == S->first) 21846ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel continue; 21856ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel 21866ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel CandidatePairsSet.erase(ValuePair(*L, S->second)); 21876ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel } 2188f79f136cc64b0625b77c7b9008ed8c5b848b6b17Hal Finkel 2189f79f136cc64b0625b77c7b9008ed8c5b848b6b17Hal Finkel std::vector<Value *> &MM = CandidatePairs[S->second]; 21906ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel for (std::vector<Value *>::iterator M = MM.begin(), ME = MM.end(); 21916ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel M != ME; ++M) { 21926ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel assert(*M != S->first && "Flipped pair in candidate list?"); 21936ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel CandidatePairsSet.erase(ValuePair(S->second, *M)); 21946ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel } 2195f79f136cc64b0625b77c7b9008ed8c5b848b6b17Hal Finkel 2196f79f136cc64b0625b77c7b9008ed8c5b848b6b17Hal Finkel std::vector<Value *> &NN = CandidatePairs2[S->first]; 21976ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel for (std::vector<Value *>::iterator N = NN.begin(), NE = NN.end(); 21986ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel N != NE; ++N) { 21996ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel assert(*N != S->second && "Flipped pair in candidate list?"); 22006ca6d3b1eac5b8611f3a9e2c270c2e794d37e1f5Hal Finkel CandidatePairsSet.erase(ValuePair(*N, S->first)); 2201de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2202de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2203de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2204de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2205de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: selected " << ChosenPairs.size() << " pairs.\n"); 2206de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2207de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2208de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::string getReplacementName(Instruction *I, bool IsInput, unsigned o, 2209de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned n = 0) { 2210de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!I->hasName()) 2211de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return ""; 2212de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2213de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return (I->getName() + (IsInput ? ".v.i" : ".v.r") + utostr(o) + 2214de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel (n > 0 ? "." + utostr(n) : "")).str(); 2215de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2216de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2217de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns the value that is to be used as the pointer input to the vector 2218de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // instruction that fuses I with J. 2219de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *BBVectorize::getReplacementPointerInput(LLVMContext& Context, 2220202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel Instruction *I, Instruction *J, unsigned o) { 2221de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *IPtr, *JPtr; 222265309660fa61a837cc05323f69c618a7d8134d56Hal Finkel unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace; 2223de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel int64_t OffsetInElmts; 2224282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel 2225202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel // Note: the analysis might fail here, that is why the pair order has 2226282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel // been precomputed (OffsetInElmts must be unused here). 2227de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel (void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, 222865309660fa61a837cc05323f69c618a7d8134d56Hal Finkel IAddressSpace, JAddressSpace, 222993f6f457614299eee3d22f376ab8f42a130f1912Hal Finkel OffsetInElmts, false); 2230de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2231de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // The pointer value is taken to be the one with the lowest offset. 2232202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel Value *VPtr = IPtr; 2233de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 223464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeI = cast<PointerType>(IPtr->getType())->getElementType(); 223564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeJ = cast<PointerType>(JPtr->getType())->getElementType(); 223664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); 2237de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *VArgPtrType = PointerType::get(VArgType, 2238de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel cast<PointerType>(IPtr->getType())->getAddressSpace()); 2239de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return new BitCastInst(VPtr, VArgPtrType, getReplacementName(I, true, o), 2240202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel /* insert before */ I); 2241de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2242de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2243de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::fillNewShuffleMask(LLVMContext& Context, Instruction *J, 224464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned MaskOffset, unsigned NumInElem, 224564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned NumInElem1, unsigned IdxOffset, 224664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant*> &Mask) { 224764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned NumElem1 = cast<VectorType>(J->getType())->getNumElements(); 224864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned v = 0; v < NumElem1; ++v) { 2249de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel int m = cast<ShuffleVectorInst>(J)->getMaskValue(v); 2250de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (m < 0) { 2251de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Mask[v+MaskOffset] = UndefValue::get(Type::getInt32Ty(Context)); 2252de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 2253de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned mm = m + (int) IdxOffset; 225464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (m >= (int) NumInElem1) 2255de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel mm += (int) NumInElem; 2256de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2257de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Mask[v+MaskOffset] = 2258de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ConstantInt::get(Type::getInt32Ty(Context), mm); 2259de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2260de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2261de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2262de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2263de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns the value that is to be used as the vector-shuffle mask to the 2264de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // vector instruction that fuses I with J. 2265de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *BBVectorize::getReplacementShuffleMask(LLVMContext& Context, 2266de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J) { 2267de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This is the shuffle mask. We need to append the second 2268de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // mask to the first, and the numbers need to be adjusted. 2269de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 227064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeI = I->getType(); 227164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeJ = J->getType(); 227264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); 227364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 227464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned NumElemI = cast<VectorType>(ArgTypeI)->getNumElements(); 2275de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2276de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Get the total number of elements in the fused vector type. 2277de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // By definition, this must equal the number of elements in 2278de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the final mask. 2279de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned NumElem = cast<VectorType>(VArgType)->getNumElements(); 2280de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Constant*> Mask(NumElem); 2281de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 228264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *OpTypeI = I->getOperand(0)->getType(); 228364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned NumInElemI = cast<VectorType>(OpTypeI)->getNumElements(); 228464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *OpTypeJ = J->getOperand(0)->getType(); 228564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned NumInElemJ = cast<VectorType>(OpTypeJ)->getNumElements(); 228664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 228764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // The fused vector will be: 228864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // ----------------------------------------------------- 228964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // | NumInElemI | NumInElemJ | NumInElemI | NumInElemJ | 229064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // ----------------------------------------------------- 229164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // from which we'll extract NumElem total elements (where the first NumElemI 229264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // of them come from the mask in I and the remainder come from the mask 229364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // in J. 2294de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2295de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // For the mask from the first pair... 229664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel fillNewShuffleMask(Context, I, 0, NumInElemJ, NumInElemI, 229764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 0, Mask); 2298de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2299de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // For the mask from the second pair... 230064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel fillNewShuffleMask(Context, J, NumElemI, NumInElemI, NumInElemJ, 230164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel NumInElemI, Mask); 2302de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2303de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return ConstantVector::get(Mask); 2304de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2305de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 230664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool BBVectorize::expandIEChain(LLVMContext& Context, Instruction *I, 230764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *J, unsigned o, Value *&LOp, 230864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElemL, 230964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeL, Type *ArgTypeH, 231072465ea23d010507d3746adc126d719005981e05Hal Finkel bool IBeforeJ, unsigned IdxOff) { 231164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool ExpandedIEChain = false; 231264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (InsertElementInst *LIE = dyn_cast<InsertElementInst>(LOp)) { 231364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // If we have a pure insertelement chain, then this can be rewritten 231464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // into a chain that directly builds the larger type. 2315b2b2469a9178f7e22cd7a69f3093e54d67d6b712Hal Finkel if (isPureIEChain(LIE)) { 231664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel SmallVector<Value *, 8> VectElemts(numElemL, 231764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel UndefValue::get(ArgTypeL->getScalarType())); 231864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel InsertElementInst *LIENext = LIE; 231964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel do { 232064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned Idx = 232164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel cast<ConstantInt>(LIENext->getOperand(2))->getSExtValue(); 232264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel VectElemts[Idx] = LIENext->getOperand(1); 232364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } while ((LIENext = 232464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel dyn_cast<InsertElementInst>(LIENext->getOperand(0)))); 232564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 232664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel LIENext = 0; 232764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *LIEPrev = UndefValue::get(ArgTypeH); 232864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned i = 0; i < numElemL; ++i) { 232964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (isa<UndefValue>(VectElemts[i])) continue; 233064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel LIENext = InsertElementInst::Create(LIEPrev, VectElemts[i], 233164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantInt::get(Type::getInt32Ty(Context), 233264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel i + IdxOff), 233372465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 233472465ea23d010507d3746adc126d719005981e05Hal Finkel true, o, i+1)); 233572465ea23d010507d3746adc126d719005981e05Hal Finkel LIENext->insertBefore(IBeforeJ ? J : I); 233664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel LIEPrev = LIENext; 233764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 233864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 233964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel LOp = LIENext ? (Value*) LIENext : UndefValue::get(ArgTypeH); 234064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ExpandedIEChain = true; 234164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 234264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 234364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 234464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return ExpandedIEChain; 234564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 234664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 2347de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Returns the value to be used as the specified operand of the vector 2348de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // instruction that fuses I with J. 2349de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *BBVectorize::getReplacementInput(LLVMContext& Context, Instruction *I, 235072465ea23d010507d3746adc126d719005981e05Hal Finkel Instruction *J, unsigned o, bool IBeforeJ) { 2351de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0); 2352de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), 1); 2353de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 235464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // Compute the fused vector type for this operand 235564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeI = I->getOperand(o)->getType(); 235664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeJ = J->getOperand(o)->getType(); 235764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel VectorType *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); 2358de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2359de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *L = I, *H = J; 236064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeL = ArgTypeI, *ArgTypeH = ArgTypeJ; 2361de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 236264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElemL; 236364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (ArgTypeL->isVectorTy()) 236464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemL = cast<VectorType>(ArgTypeL)->getNumElements(); 236564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else 236664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemL = 1; 2367de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 236864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElemH; 236964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (ArgTypeH->isVectorTy()) 237064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemH = cast<VectorType>(ArgTypeH)->getNumElements(); 237164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else 237264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemH = 1; 237364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 237464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *LOp = L->getOperand(o); 237564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *HOp = H->getOperand(o); 237664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElem = VArgType->getNumElements(); 237764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 237864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // First, we check if we can reuse the "original" vector outputs (if these 237964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // exist). We might need a shuffle. 238064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ExtractElementInst *LEE = dyn_cast<ExtractElementInst>(LOp); 238164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ExtractElementInst *HEE = dyn_cast<ExtractElementInst>(HOp); 238264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ShuffleVectorInst *LSV = dyn_cast<ShuffleVectorInst>(LOp); 238364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ShuffleVectorInst *HSV = dyn_cast<ShuffleVectorInst>(HOp); 238464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 238564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // FIXME: If we're fusing shuffle instructions, then we can't apply this 238664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // optimization. The input vectors to the shuffle might be a different 238764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // length from the shuffle outputs. Unfortunately, the replacement 238864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // shuffle mask has already been formed, and the mask entries are sensitive 238964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // to the sizes of the inputs. 239064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool IsSizeChangeShuffle = 239164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel isa<ShuffleVectorInst>(L) && 239264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel (LOp->getType() != L->getType() || HOp->getType() != H->getType()); 239364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 239464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if ((LEE || LSV) && (HEE || HSV) && !IsSizeChangeShuffle) { 239564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // We can have at most two unique vector inputs. 239664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool CanUseInputs = true; 239764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *I1, *I2 = 0; 239864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (LEE) { 239964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I1 = LEE->getOperand(0); 240064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 240164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I1 = LSV->getOperand(0); 240264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2 = LSV->getOperand(1); 240364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (I2 == I1 || isa<UndefValue>(I2)) 240464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2 = 0; 240564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 240664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 240764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (HEE) { 240864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *I3 = HEE->getOperand(0); 240964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!I2 && I3 != I1) 241064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2 = I3; 241164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else if (I3 != I1 && I3 != I2) 241264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel CanUseInputs = false; 241364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 241464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *I3 = HSV->getOperand(0); 241564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!I2 && I3 != I1) 241664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2 = I3; 241764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else if (I3 != I1 && I3 != I2) 241864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel CanUseInputs = false; 241964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 242064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (CanUseInputs) { 242164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *I4 = HSV->getOperand(1); 242264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!isa<UndefValue>(I4)) { 242364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!I2 && I4 != I1) 242464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2 = I4; 242564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else if (I4 != I1 && I4 != I2) 242664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel CanUseInputs = false; 242764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 242864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 242964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 243064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 243164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (CanUseInputs) { 243264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned LOpElem = 243364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel cast<VectorType>(cast<Instruction>(LOp)->getOperand(0)->getType()) 243464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ->getNumElements(); 243564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned HOpElem = 243664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel cast<VectorType>(cast<Instruction>(HOp)->getOperand(0)->getType()) 243764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ->getNumElements(); 243864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 243964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // We have one or two input vectors. We need to map each index of the 244064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // operands to the index of the original vector. 244164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel SmallVector<std::pair<int, int>, 8> II(numElem); 244264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned i = 0; i < numElemL; ++i) { 244364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel int Idx, INum; 244464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (LEE) { 244564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Idx = 244664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel cast<ConstantInt>(LEE->getOperand(1))->getSExtValue(); 244764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel INum = LEE->getOperand(0) == I1 ? 0 : 1; 244864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 244964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Idx = LSV->getMaskValue(i); 245064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (Idx < (int) LOpElem) { 245164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel INum = LSV->getOperand(0) == I1 ? 0 : 1; 245264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 245364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Idx -= LOpElem; 245464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel INum = LSV->getOperand(1) == I1 ? 0 : 1; 245564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 245664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 245764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 245864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel II[i] = std::pair<int, int>(Idx, INum); 245964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 246064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned i = 0; i < numElemH; ++i) { 246164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel int Idx, INum; 246264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (HEE) { 246364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Idx = 246464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel cast<ConstantInt>(HEE->getOperand(1))->getSExtValue(); 246564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel INum = HEE->getOperand(0) == I1 ? 0 : 1; 246664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 246764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Idx = HSV->getMaskValue(i); 246864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (Idx < (int) HOpElem) { 246964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel INum = HSV->getOperand(0) == I1 ? 0 : 1; 247064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 247164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Idx -= HOpElem; 247264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel INum = HSV->getOperand(1) == I1 ? 0 : 1; 247364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 247464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 247564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 247664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel II[i + numElemL] = std::pair<int, int>(Idx, INum); 247764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 247864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 247964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // We now have an array which tells us from which index of which 248064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // input vector each element of the operand comes. 248164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel VectorType *I1T = cast<VectorType>(I1->getType()); 248264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned I1Elem = I1T->getNumElements(); 248364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 248464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (!I2) { 248564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // In this case there is only one underlying vector input. Check for 248664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // the trivial case where we can use the input directly. 248764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (I1Elem == numElem) { 248864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel bool ElemInOrder = true; 248964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned i = 0; i < numElem; ++i) { 249064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (II[i].first != (int) i && II[i].first != -1) { 249164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ElemInOrder = false; 249264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel break; 249364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 249464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 249564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 249664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (ElemInOrder) 249764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return I1; 249864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 249964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 250064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // A shuffle is needed. 250164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant *> Mask(numElem); 250264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned i = 0; i < numElem; ++i) { 250364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel int Idx = II[i].first; 250464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (Idx == -1) 250564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[i] = UndefValue::get(Type::getInt32Ty(Context)); 250664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else 250764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[i] = ConstantInt::get(Type::getInt32Ty(Context), Idx); 250864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 250964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 251064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *S = 251164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel new ShuffleVectorInst(I1, UndefValue::get(I1T), 251264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantVector::get(Mask), 251372465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 251472465ea23d010507d3746adc126d719005981e05Hal Finkel true, o)); 251572465ea23d010507d3746adc126d719005981e05Hal Finkel S->insertBefore(IBeforeJ ? J : I); 251664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return S; 251764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 251864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 251964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel VectorType *I2T = cast<VectorType>(I2->getType()); 252064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned I2Elem = I2T->getNumElements(); 252164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 252264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // This input comes from two distinct vectors. The first step is to 252364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // make sure that both vectors are the same length. If not, the 252464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // smaller one will need to grow before they can be shuffled together. 252564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (I1Elem < I2Elem) { 252664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant *> Mask(I2Elem); 252764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned v = 0; 252864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < I1Elem; ++v) 252964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); 253064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < I2Elem; ++v) 253164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); 253264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 253364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *NewI1 = 253464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel new ShuffleVectorInst(I1, UndefValue::get(I1T), 253564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantVector::get(Mask), 253672465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 253772465ea23d010507d3746adc126d719005981e05Hal Finkel true, o, 1)); 253872465ea23d010507d3746adc126d719005981e05Hal Finkel NewI1->insertBefore(IBeforeJ ? J : I); 253964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I1 = NewI1; 254064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I1T = I2T; 254164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I1Elem = I2Elem; 254264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else if (I1Elem > I2Elem) { 254364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant *> Mask(I1Elem); 254464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned v = 0; 254564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < I2Elem; ++v) 254664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); 254764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < I1Elem; ++v) 254864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); 254964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 255064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *NewI2 = 255164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel new ShuffleVectorInst(I2, UndefValue::get(I2T), 255264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantVector::get(Mask), 255372465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 255472465ea23d010507d3746adc126d719005981e05Hal Finkel true, o, 1)); 255572465ea23d010507d3746adc126d719005981e05Hal Finkel NewI2->insertBefore(IBeforeJ ? J : I); 255664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2 = NewI2; 255764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2T = I1T; 255864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel I2Elem = I1Elem; 255964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 256064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 256164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // Now that both I1 and I2 are the same length we can shuffle them 256264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // together (and use the result). 256364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant *> Mask(numElem); 256464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned v = 0; v < numElem; ++v) { 256564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (II[v].first == -1) { 256664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); 256764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 256864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel int Idx = II[v].first + II[v].second * I1Elem; 256964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), Idx); 257064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 257164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 257264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 257364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *NewOp = 257464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel new ShuffleVectorInst(I1, I2, ConstantVector::get(Mask), 257572465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, true, o)); 257672465ea23d010507d3746adc126d719005981e05Hal Finkel NewOp->insertBefore(IBeforeJ ? J : I); 257764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return NewOp; 257864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 2579de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2580de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 258164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgType = ArgTypeL; 258264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (numElemL < numElemH) { 258364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (numElemL == 1 && expandIEChain(Context, I, J, o, HOp, numElemH, 258472465ea23d010507d3746adc126d719005981e05Hal Finkel ArgTypeL, VArgType, IBeforeJ, 1)) { 258564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // This is another short-circuit case: we're combining a scalar into 258664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // a vector that is formed by an IE chain. We've just expanded the IE 258764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // chain, now insert the scalar and we're done. 258864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 258964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *S = InsertElementInst::Create(HOp, LOp, CV0, 259072465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, true, o)); 259172465ea23d010507d3746adc126d719005981e05Hal Finkel S->insertBefore(IBeforeJ ? J : I); 259264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return S; 259364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else if (!expandIEChain(Context, I, J, o, LOp, numElemL, ArgTypeL, 259472465ea23d010507d3746adc126d719005981e05Hal Finkel ArgTypeH, IBeforeJ)) { 259564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // The two vector inputs to the shuffle must be the same length, 259664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // so extend the smaller vector to be the same length as the larger one. 259764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *NLOp; 259864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (numElemL > 1) { 259964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 260064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant *> Mask(numElemH); 260164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned v = 0; 260264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < numElemL; ++v) 260364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); 260464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < numElemH; ++v) 260564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); 260664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 260764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel NLOp = new ShuffleVectorInst(LOp, UndefValue::get(ArgTypeL), 260864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantVector::get(Mask), 260972465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 261072465ea23d010507d3746adc126d719005981e05Hal Finkel true, o, 1)); 261164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 261264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel NLOp = InsertElementInst::Create(UndefValue::get(ArgTypeH), LOp, CV0, 261372465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 261472465ea23d010507d3746adc126d719005981e05Hal Finkel true, o, 1)); 261564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 261664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 261772465ea23d010507d3746adc126d719005981e05Hal Finkel NLOp->insertBefore(IBeforeJ ? J : I); 261864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel LOp = NLOp; 261964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 262064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 262164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ArgType = ArgTypeH; 262264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else if (numElemL > numElemH) { 262364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (numElemH == 1 && expandIEChain(Context, I, J, o, LOp, numElemL, 262472465ea23d010507d3746adc126d719005981e05Hal Finkel ArgTypeH, VArgType, IBeforeJ)) { 262564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *S = 262664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel InsertElementInst::Create(LOp, HOp, 262764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantInt::get(Type::getInt32Ty(Context), 262864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemL), 262972465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 263072465ea23d010507d3746adc126d719005981e05Hal Finkel true, o)); 263172465ea23d010507d3746adc126d719005981e05Hal Finkel S->insertBefore(IBeforeJ ? J : I); 263264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel return S; 263364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else if (!expandIEChain(Context, I, J, o, HOp, numElemH, ArgTypeH, 263472465ea23d010507d3746adc126d719005981e05Hal Finkel ArgTypeL, IBeforeJ)) { 263564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *NHOp; 263664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (numElemH > 1) { 263764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant *> Mask(numElemL); 263864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned v = 0; 263964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < numElemH; ++v) 264064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); 264164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (; v < numElemL; ++v) 264264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); 264364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 264464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel NHOp = new ShuffleVectorInst(HOp, UndefValue::get(ArgTypeH), 264564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel ConstantVector::get(Mask), 264672465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 264772465ea23d010507d3746adc126d719005981e05Hal Finkel true, o, 1)); 264864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 264964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel NHOp = InsertElementInst::Create(UndefValue::get(ArgTypeL), HOp, CV0, 265072465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 265172465ea23d010507d3746adc126d719005981e05Hal Finkel true, o, 1)); 265264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 265364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 265472465ea23d010507d3746adc126d719005981e05Hal Finkel NHOp->insertBefore(IBeforeJ ? J : I); 265564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel HOp = NHOp; 2656de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 265764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 2658de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 265964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (ArgType->isVectorTy()) { 266064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElem = cast<VectorType>(VArgType)->getNumElements(); 266164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant*> Mask(numElem); 266264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned v = 0; v < numElem; ++v) { 266364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned Idx = v; 266464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // If the low vector was expanded, we need to skip the extra 266564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel // undefined entries. 266664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (v >= numElemL && numElemH > numElemL) 266764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Idx += (numElemH - numElemL); 266864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), Idx); 266964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 2670de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 267164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *BV = new ShuffleVectorInst(LOp, HOp, 267272465ea23d010507d3746adc126d719005981e05Hal Finkel ConstantVector::get(Mask), 267372465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, true, o)); 267472465ea23d010507d3746adc126d719005981e05Hal Finkel BV->insertBefore(IBeforeJ ? J : I); 2675de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return BV; 2676de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2677de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2678de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *BV1 = InsertElementInst::Create( 267964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel UndefValue::get(VArgType), LOp, CV0, 268072465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 268172465ea23d010507d3746adc126d719005981e05Hal Finkel true, o, 1)); 268272465ea23d010507d3746adc126d719005981e05Hal Finkel BV1->insertBefore(IBeforeJ ? J : I); 268364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Instruction *BV2 = InsertElementInst::Create(BV1, HOp, CV1, 268472465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementName(IBeforeJ ? I : J, 268572465ea23d010507d3746adc126d719005981e05Hal Finkel true, o, 2)); 268672465ea23d010507d3746adc126d719005981e05Hal Finkel BV2->insertBefore(IBeforeJ ? J : I); 2687de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel return BV2; 2688de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2689de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2690de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function creates an array of values that will be used as the inputs 2691de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // to the vector instruction that fuses I with J. 2692de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::getReplacementInputsForPair(LLVMContext& Context, 2693de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J, 2694a0ec3f9b7b826b9b40b80199923b664bad808cceCraig Topper SmallVectorImpl<Value *> &ReplacedOperands, 269572465ea23d010507d3746adc126d719005981e05Hal Finkel bool IBeforeJ) { 2696de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned NumOperands = I->getNumOperands(); 2697de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2698de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) { 2699de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Iterate backward so that we look at the store pointer 2700de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // first and know whether or not we need to flip the inputs. 2701de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2702de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (isa<LoadInst>(I) || (o == 1 && isa<StoreInst>(I))) { 2703de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This is the pointer for a load/store instruction. 2704202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel ReplacedOperands[o] = getReplacementPointerInput(Context, I, J, o); 2705de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 27066173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel } else if (isa<CallInst>(I)) { 2707de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Function *F = cast<CallInst>(I)->getCalledFunction(); 2708a77728415857196035c0090f7b2749d7971811a2Hal Finkel Intrinsic::ID IID = (Intrinsic::ID) F->getIntrinsicID(); 27096173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel if (o == NumOperands-1) { 27106173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel BasicBlock &BB = *I->getParent(); 2711bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng 27126173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel Module *M = BB.getParent()->getParent(); 271364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeI = I->getType(); 271464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *ArgTypeJ = J->getType(); 271564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); 2716bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng 2717a77728415857196035c0090f7b2749d7971811a2Hal Finkel ReplacedOperands[o] = Intrinsic::getDeclaration(M, IID, VArgType); 27186173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel continue; 27196173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel } else if (IID == Intrinsic::powi && o == 1) { 27206173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel // The second argument of powi is a single integer and we've already 27216173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel // checked that both arguments are equal. As a result, we just keep 27226173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel // I's second argument. 27236173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel ReplacedOperands[o] = I->getOperand(o); 27246173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel continue; 27256173ed95daf2f209fe3883faee45967e4800ae75Hal Finkel } 2726de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else if (isa<ShuffleVectorInst>(I) && o == NumOperands-1) { 2727de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ReplacedOperands[o] = getReplacementShuffleMask(Context, I, J); 2728de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 2729de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2730de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 273172465ea23d010507d3746adc126d719005981e05Hal Finkel ReplacedOperands[o] = getReplacementInput(Context, I, J, o, IBeforeJ); 2732de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2733de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2734de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2735de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function creates two values that represent the outputs of the 2736de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // original I and J instructions. These are generally vector shuffles 2737de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // or extracts. In many cases, these will end up being unused and, thus, 2738de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // eliminated by later passes. 2739de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::replaceOutputsOfPair(LLVMContext& Context, Instruction *I, 2740de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *J, Instruction *K, 2741de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *&InsertionPt, 2742202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel Instruction *&K1, Instruction *&K2) { 2743de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (isa<StoreInst>(I)) { 2744de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AA->replaceWithNewValue(I, K); 2745de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AA->replaceWithNewValue(J, K); 2746de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 2747de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Type *IType = I->getType(); 274864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Type *JType = J->getType(); 274964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 275064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel VectorType *VType = getVecTypeForPair(IType, JType); 275164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElem = VType->getNumElements(); 275264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 275364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel unsigned numElemI, numElemJ; 275464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (IType->isVectorTy()) 275564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemI = cast<VectorType>(IType)->getNumElements(); 275664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else 275764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemI = 1; 275864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 275964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (JType->isVectorTy()) 276064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemJ = cast<VectorType>(JType)->getNumElements(); 276164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel else 276264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel numElemJ = 1; 2763de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2764de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (IType->isVectorTy()) { 276564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant*> Mask1(numElemI), Mask2(numElemI); 276664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned v = 0; v < numElemI; ++v) { 276764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v); 276864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemJ+v); 276964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 2770de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 277164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel K1 = new ShuffleVectorInst(K, UndefValue::get(VType), 2772202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel ConstantVector::get( Mask1), 277364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getReplacementName(K, false, 1)); 2774de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 277564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0); 2776202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel K1 = ExtractElementInst::Create(K, CV0, 2777de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel getReplacementName(K, false, 1)); 277864e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 277964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 278064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel if (JType->isVectorTy()) { 278164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel std::vector<Constant*> Mask1(numElemJ), Mask2(numElemJ); 278264e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel for (unsigned v = 0; v < numElemJ; ++v) { 278364e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v); 278464e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemI+v); 278564e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } 278664e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel 278764e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel K2 = new ShuffleVectorInst(K, UndefValue::get(VType), 2788202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel ConstantVector::get( Mask2), 278964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel getReplacementName(K, false, 2)); 279064e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel } else { 279164e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem-1); 2792202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel K2 = ExtractElementInst::Create(K, CV1, 2793de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel getReplacementName(K, false, 2)); 2794de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2795de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2796de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K1->insertAfter(K); 2797de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K2->insertAfter(K1); 2798de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel InsertionPt = K2; 2799de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2800de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2801de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2802de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Move all uses of the function I (including pairing-induced uses) after J. 2803de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel bool BBVectorize::canMoveUsesOfIAfterJ(BasicBlock &BB, 28042f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel DenseSet<ValuePair> &LoadMoveSetPairs, 2805de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J) { 2806de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Skip to the first instruction past I. 2807ded681d2725907c7de9db53d59cee0c51fad6fcbBenjamin Kramer BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I)); 2808de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2809de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> Users; 2810de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker WriteSet(*AA); 2811eaa8f5533f9f678fe3c56aec0201a34e46eaaf54Hal Finkel if (I->mayWriteToMemory()) WriteSet.add(I); 2812eaa8f5533f9f678fe3c56aec0201a34e46eaaf54Hal Finkel 2813de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (; cast<Instruction>(L) != J; ++L) 28142f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel (void) trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs); 2815de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2816de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel assert(cast<Instruction>(L) == J && 2817de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel "Tracking has not proceeded far enough to check for dependencies"); 2818de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // If J is now in the use set of I, then trackUsesOfI will return true 2819de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // and we have a dependency cycle (and the fusing operation must abort). 28202f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel return !trackUsesOfI(Users, WriteSet, I, J, true, &LoadMoveSetPairs); 2821de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2822de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2823de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Move all uses of the function I (including pairing-induced uses) after J. 2824de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::moveUsesOfIAfterJ(BasicBlock &BB, 28252f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel DenseSet<ValuePair> &LoadMoveSetPairs, 2826de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *&InsertionPt, 2827de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I, Instruction *J) { 2828de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Skip to the first instruction past I. 2829ded681d2725907c7de9db53d59cee0c51fad6fcbBenjamin Kramer BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I)); 2830de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2831de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> Users; 2832de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker WriteSet(*AA); 2833eaa8f5533f9f678fe3c56aec0201a34e46eaaf54Hal Finkel if (I->mayWriteToMemory()) WriteSet.add(I); 2834eaa8f5533f9f678fe3c56aec0201a34e46eaaf54Hal Finkel 2835de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (; cast<Instruction>(L) != J;) { 28362f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel if (trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs)) { 2837de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Move this instruction 2838de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *InstToMove = L; ++L; 2839de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2840de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: moving: " << *InstToMove << 2841de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " to after " << *InsertionPt << "\n"); 2842de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel InstToMove->removeFromParent(); 2843de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel InstToMove->insertAfter(InsertionPt); 2844de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel InsertionPt = InstToMove; 2845de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } else { 2846de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++L; 2847de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2848de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2849de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2850de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2851de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Collect all load instruction that are in the move set of a given first 2852de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pair member. These loads depend on the first instruction, I, and so need 2853de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // to be moved after J (the second instruction) when the pair is fused. 2854de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::collectPairLoadMoveSet(BasicBlock &BB, 2855de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 285697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, std::vector<Value *> > &LoadMoveSet, 28572f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel DenseSet<ValuePair> &LoadMoveSetPairs, 2858de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I) { 2859de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Skip to the first instruction past I. 2860ded681d2725907c7de9db53d59cee0c51fad6fcbBenjamin Kramer BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I)); 2861de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2862de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseSet<Value *> Users; 2863de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel AliasSetTracker WriteSet(*AA); 2864eaa8f5533f9f678fe3c56aec0201a34e46eaaf54Hal Finkel if (I->mayWriteToMemory()) WriteSet.add(I); 2865de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2866de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Note: We cannot end the loop when we reach J because J could be moved 2867de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // farther down the use chain by another instruction pairing. Also, J 2868de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // could be before I if this is an inverted input. 2869de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (BasicBlock::iterator E = BB.end(); cast<Instruction>(L) != E; ++L) { 2870de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (trackUsesOfI(Users, WriteSet, I, L)) { 28712f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel if (L->mayReadFromMemory()) { 287297a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel LoadMoveSet[L].push_back(I); 28732f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel LoadMoveSetPairs.insert(ValuePair(L, I)); 28742f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel } 2875de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2876de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2877de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2878de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2879de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // In cases where both load/stores and the computation of their pointers 2880de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // are chosen for vectorization, we can end up in a situation where the 2881de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // aliasing analysis starts returning different query results as the 2882de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // process of fusing instruction pairs continues. Because the algorithm 2883f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel // relies on finding the same use dags here as were found earlier, we'll 2884de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // need to precompute the necessary aliasing information here and then 2885de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // manually update it during the fusion process. 2886de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::collectLoadMoveSet(BasicBlock &BB, 2887de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<Value *> &PairableInsts, 2888de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *> &ChosenPairs, 288997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, std::vector<Value *> > &LoadMoveSet, 28902f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel DenseSet<ValuePair> &LoadMoveSetPairs) { 2891de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::vector<Value *>::iterator PI = PairableInsts.begin(), 2892de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PIE = PairableInsts.end(); PI != PIE; ++PI) { 2893de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(*PI); 2894de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (P == ChosenPairs.end()) continue; 2895de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2896de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I = cast<Instruction>(P->first); 28972f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel collectPairLoadMoveSet(BB, ChosenPairs, LoadMoveSet, 28982f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel LoadMoveSetPairs, I); 2899de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2900de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2901de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2902ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel // When the first instruction in each pair is cloned, it will inherit its 2903ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel // parent's metadata. This metadata must be combined with that of the other 2904ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel // instruction in a safe way. 2905ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel void BBVectorize::combineMetadata(Instruction *K, const Instruction *J) { 2906ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel SmallVector<std::pair<unsigned, MDNode*>, 4> Metadata; 2907ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel K->getAllMetadataOtherThanDebugLoc(Metadata); 2908ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel for (unsigned i = 0, n = Metadata.size(); i < n; ++i) { 2909ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel unsigned Kind = Metadata[i].first; 2910ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel MDNode *JMD = J->getMetadata(Kind); 2911ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel MDNode *KMD = Metadata[i].second; 2912ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel 2913ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel switch (Kind) { 2914ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel default: 2915ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel K->setMetadata(Kind, 0); // Remove unknown metadata 2916ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel break; 2917ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel case LLVMContext::MD_tbaa: 2918ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel K->setMetadata(Kind, MDNode::getMostGenericTBAA(JMD, KMD)); 2919ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel break; 2920ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel case LLVMContext::MD_fpmath: 2921ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel K->setMetadata(Kind, MDNode::getMostGenericFPMath(JMD, KMD)); 2922ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel break; 2923ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel } 2924ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel } 2925ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel } 2926ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel 2927de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // This function fuses the chosen instruction pairs into vector instructions, 2928de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // taking care preserve any needed scalar outputs and, then, it reorders the 2929de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // remaining instructions as needed (users of the first member of the pair 2930de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // need to be moved to after the location of the second member of the pair 2931de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // because the vector instruction is inserted in the location of the pair's 2932de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // second member). 2933de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel void BBVectorize::fuseChosenPairs(BasicBlock &BB, 293497a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel std::vector<Value *> &PairableInsts, 293597a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, Value *> &ChosenPairs, 293697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseSet<ValuePair> &FixedOrderPairs, 293797a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<VPPair, unsigned> &PairConnectionTypes, 293897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, 293997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps) { 2940de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel LLVMContext& Context = BB.getContext(); 2941de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2942de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // During the vectorization process, the order of the pairs to be fused 2943de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // could be flipped. So we'll add each pair, flipped, into the ChosenPairs 2944de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // list. After a pair is fused, the flipped pair is removed from the list. 294572465ea23d010507d3746adc126d719005981e05Hal Finkel DenseSet<ValuePair> FlippedPairs; 2946de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (DenseMap<Value *, Value *>::iterator P = ChosenPairs.begin(), 2947de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = ChosenPairs.end(); P != E; ++P) 294872465ea23d010507d3746adc126d719005981e05Hal Finkel FlippedPairs.insert(ValuePair(P->second, P->first)); 294972465ea23d010507d3746adc126d719005981e05Hal Finkel for (DenseSet<ValuePair>::iterator P = FlippedPairs.begin(), 2950de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel E = FlippedPairs.end(); P != E; ++P) 2951de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairs.insert(*P); 2952de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 295397a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, std::vector<Value *> > LoadMoveSet; 29542f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel DenseSet<ValuePair> LoadMoveSetPairs; 29552f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel collectLoadMoveSet(BB, PairableInsts, ChosenPairs, 29562f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel LoadMoveSet, LoadMoveSetPairs); 2957de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2958de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n"); 2959de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2960de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) { 2961de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(PI); 2962de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (P == ChosenPairs.end()) { 2963de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++PI; 2964de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 2965de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2966de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2967de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (getDepthFactor(P->first) == 0) { 2968de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // These instructions are not really fused, but are tracked as though 2969de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // they are. Any case in which it would be interesting to fuse them 2970de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // will be taken care of by InstCombine. 2971de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel --NumFusedOps; 2972de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++PI; 2973de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 2974de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2975de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2976de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *I = cast<Instruction>(P->first), 2977de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel *J = cast<Instruction>(P->second); 2978de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2979de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: fusing: " << *I << 2980de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " <-> " << *J << "\n"); 2981de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2982de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Remove the pair and flipped pair from the list. 2983de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DenseMap<Value *, Value *>::iterator FP = ChosenPairs.find(P->second); 2984de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel assert(FP != ChosenPairs.end() && "Flipped pair not found in list"); 2985de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairs.erase(FP); 2986de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ChosenPairs.erase(P); 2987de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 29882f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel if (!canMoveUsesOfIAfterJ(BB, LoadMoveSetPairs, I, J)) { 2989de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: fusion of: " << *I << 2990de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " <-> " << *J << 2991de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel " aborted because of non-trivial dependency cycle\n"); 2992de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel --NumFusedOps; 2993de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++PI; 2994de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel continue; 2995de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 2996de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 2997a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel // If the pair must have the other order, then flip it. 2998a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel bool FlipPairOrder = FixedOrderPairs.count(ValuePair(J, I)); 299972465ea23d010507d3746adc126d719005981e05Hal Finkel if (!FlipPairOrder && !FixedOrderPairs.count(ValuePair(I, J))) { 300072465ea23d010507d3746adc126d719005981e05Hal Finkel // This pair does not have a fixed order, and so we might want to 300172465ea23d010507d3746adc126d719005981e05Hal Finkel // flip it if that will yield fewer shuffles. We count the number 300272465ea23d010507d3746adc126d719005981e05Hal Finkel // of dependencies connected via swaps, and those directly connected, 300372465ea23d010507d3746adc126d719005981e05Hal Finkel // and flip the order if the number of swaps is greater. 300472465ea23d010507d3746adc126d719005981e05Hal Finkel bool OrigOrder = true; 300597a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> >::iterator IJ = 300697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel ConnectedPairDeps.find(ValuePair(I, J)); 300797a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel if (IJ == ConnectedPairDeps.end()) { 300897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel IJ = ConnectedPairDeps.find(ValuePair(J, I)); 300972465ea23d010507d3746adc126d719005981e05Hal Finkel OrigOrder = false; 301072465ea23d010507d3746adc126d719005981e05Hal Finkel } 301172465ea23d010507d3746adc126d719005981e05Hal Finkel 301297a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel if (IJ != ConnectedPairDeps.end()) { 301372465ea23d010507d3746adc126d719005981e05Hal Finkel unsigned NumDepsDirect = 0, NumDepsSwap = 0; 301497a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel for (std::vector<ValuePair>::iterator T = IJ->second.begin(), 301597a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel TE = IJ->second.end(); T != TE; ++T) { 301697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel VPPair Q(IJ->first, *T); 301772465ea23d010507d3746adc126d719005981e05Hal Finkel DenseMap<VPPair, unsigned>::iterator R = 301897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel PairConnectionTypes.find(VPPair(Q.second, Q.first)); 301972465ea23d010507d3746adc126d719005981e05Hal Finkel assert(R != PairConnectionTypes.end() && 302072465ea23d010507d3746adc126d719005981e05Hal Finkel "Cannot find pair connection type"); 302172465ea23d010507d3746adc126d719005981e05Hal Finkel if (R->second == PairConnectionDirect) 302272465ea23d010507d3746adc126d719005981e05Hal Finkel ++NumDepsDirect; 302372465ea23d010507d3746adc126d719005981e05Hal Finkel else if (R->second == PairConnectionSwap) 302472465ea23d010507d3746adc126d719005981e05Hal Finkel ++NumDepsSwap; 302572465ea23d010507d3746adc126d719005981e05Hal Finkel } 302672465ea23d010507d3746adc126d719005981e05Hal Finkel 302772465ea23d010507d3746adc126d719005981e05Hal Finkel if (!OrigOrder) 302872465ea23d010507d3746adc126d719005981e05Hal Finkel std::swap(NumDepsDirect, NumDepsSwap); 302972465ea23d010507d3746adc126d719005981e05Hal Finkel 303072465ea23d010507d3746adc126d719005981e05Hal Finkel if (NumDepsSwap > NumDepsDirect) { 303172465ea23d010507d3746adc126d719005981e05Hal Finkel FlipPairOrder = true; 303272465ea23d010507d3746adc126d719005981e05Hal Finkel DEBUG(dbgs() << "BBV: reordering pair: " << *I << 303372465ea23d010507d3746adc126d719005981e05Hal Finkel " <-> " << *J << "\n"); 303472465ea23d010507d3746adc126d719005981e05Hal Finkel } 303572465ea23d010507d3746adc126d719005981e05Hal Finkel } 303672465ea23d010507d3746adc126d719005981e05Hal Finkel } 3037282969ed3641ffa426e0440d3824dd219152b2d8Hal Finkel 3038202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel Instruction *L = I, *H = J; 3039a9779bfbc9ab0cf3f157453fd0afd110b04a9fdcHal Finkel if (FlipPairOrder) 3040202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel std::swap(H, L); 3041202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel 304272465ea23d010507d3746adc126d719005981e05Hal Finkel // If the pair being fused uses the opposite order from that in the pair 304372465ea23d010507d3746adc126d719005981e05Hal Finkel // connection map, then we need to flip the types. 304497a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<ValuePair, std::vector<ValuePair> >::iterator HL = 304597a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel ConnectedPairs.find(ValuePair(H, L)); 304697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel if (HL != ConnectedPairs.end()) 304797a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel for (std::vector<ValuePair>::iterator T = HL->second.begin(), 304897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel TE = HL->second.end(); T != TE; ++T) { 304997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel VPPair Q(HL->first, *T); 305097a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<VPPair, unsigned>::iterator R = PairConnectionTypes.find(Q); 305197a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel assert(R != PairConnectionTypes.end() && 305297a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel "Cannot find pair connection type"); 305397a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel if (R->second == PairConnectionDirect) 305497a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel R->second = PairConnectionSwap; 305597a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel else if (R->second == PairConnectionSwap) 305697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel R->second = PairConnectionDirect; 305797a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel } 305872465ea23d010507d3746adc126d719005981e05Hal Finkel 305972465ea23d010507d3746adc126d719005981e05Hal Finkel bool LBeforeH = !FlipPairOrder; 3060de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel unsigned NumOperands = I->getNumOperands(); 3061de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel SmallVector<Value *, 3> ReplacedOperands(NumOperands); 306272465ea23d010507d3746adc126d719005981e05Hal Finkel getReplacementInputsForPair(Context, L, H, ReplacedOperands, 306372465ea23d010507d3746adc126d719005981e05Hal Finkel LBeforeH); 3064de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 3065de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Make a copy of the original operation, change its type to the vector 3066de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // type and replace its operands with the vector operands. 306772465ea23d010507d3746adc126d719005981e05Hal Finkel Instruction *K = L->clone(); 306872465ea23d010507d3746adc126d719005981e05Hal Finkel if (L->hasName()) 306972465ea23d010507d3746adc126d719005981e05Hal Finkel K->takeName(L); 307072465ea23d010507d3746adc126d719005981e05Hal Finkel else if (H->hasName()) 307172465ea23d010507d3746adc126d719005981e05Hal Finkel K->takeName(H); 3072de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 3073de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!isa<StoreInst>(K)) 3074202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel K->mutateType(getVecTypeForPair(L->getType(), H->getType())); 3075de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 307672465ea23d010507d3746adc126d719005981e05Hal Finkel combineMetadata(K, H); 3077430b9079c614cd3f45015a6516590d33742cc802Hal Finkel K->intersectOptionalDataWith(H); 3078ab4684e26fe21857d8c8bc6ba7a5234c35117c83Hal Finkel 3079de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (unsigned o = 0; o < NumOperands; ++o) 3080de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K->setOperand(o, ReplacedOperands[o]); 3081de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 3082de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel K->insertAfter(J); 3083de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 3084de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Instruction insertion point: 3085de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *InsertionPt = K; 3086de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel Instruction *K1 = 0, *K2 = 0; 3087202d1cb8a587a9513d8bb65bf4a3d88a55132860Hal Finkel replaceOutputsOfPair(Context, L, H, K, InsertionPt, K1, K2); 3088de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 3089f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel // The use dag of the first original instruction must be moved to after 3090f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel // the location of the second instruction. The entire use dag of the 3091f64a7a83bea5f1d2ab1e71231616c6cb0487d56eHal Finkel // first instruction is disjoint from the input dag of the second 3092de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // (by definition), and so commutes with it. 3093de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 30942f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel moveUsesOfIAfterJ(BB, LoadMoveSetPairs, InsertionPt, I, J); 3095de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 3096de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (!isa<StoreInst>(I)) { 309772465ea23d010507d3746adc126d719005981e05Hal Finkel L->replaceAllUsesWith(K1); 309872465ea23d010507d3746adc126d719005981e05Hal Finkel H->replaceAllUsesWith(K2); 309972465ea23d010507d3746adc126d719005981e05Hal Finkel AA->replaceWithNewValue(L, K1); 310072465ea23d010507d3746adc126d719005981e05Hal Finkel AA->replaceWithNewValue(H, K2); 3101de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 3102de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 3103de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Instructions that may read from memory may be in the load move set. 3104de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Once an instruction is fused, we no longer need its move set, and so 3105de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // the values of the map never need to be updated. However, when a load 3106de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // is fused, we need to merge the entries from both instructions in the 3107de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // pair in case those instructions were in the move set of some other 3108de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // yet-to-be-fused pair. The loads in question are the keys of the map. 3109de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (I->mayReadFromMemory()) { 3110de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel std::vector<ValuePair> NewSetMembers; 311197a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, std::vector<Value *> >::iterator II = 311297a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel LoadMoveSet.find(I); 311397a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel if (II != LoadMoveSet.end()) 311497a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel for (std::vector<Value *>::iterator N = II->second.begin(), 311597a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel NE = II->second.end(); N != NE; ++N) 311697a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel NewSetMembers.push_back(ValuePair(K, *N)); 311797a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel DenseMap<Value *, std::vector<Value *> >::iterator JJ = 311897a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel LoadMoveSet.find(J); 311997a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel if (JJ != LoadMoveSet.end()) 312097a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel for (std::vector<Value *>::iterator N = JJ->second.begin(), 312197a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel NE = JJ->second.end(); N != NE; ++N) 312297a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel NewSetMembers.push_back(ValuePair(K, *N)); 3123de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel for (std::vector<ValuePair>::iterator A = NewSetMembers.begin(), 31242f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel AE = NewSetMembers.end(); A != AE; ++A) { 312597a241b173a1413df5a93fdd891ddfac36dabad9Hal Finkel LoadMoveSet[A->first].push_back(A->second); 31262f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel LoadMoveSetPairs.insert(*A); 31272f0e63cc16feb39480805bd00f53bbe5e3031d29Hal Finkel } 3128de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 3129de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 3130de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel // Before removing I, set the iterator to the next instruction. 3131de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel PI = llvm::next(BasicBlock::iterator(I)); 3132de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel if (cast<Instruction>(PI) == J) 3133de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel ++PI; 3134de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 3135de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel SE->forgetValue(I); 3136de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel SE->forgetValue(J); 3137de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel I->eraseFromParent(); 3138de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel J->eraseFromParent(); 313972465ea23d010507d3746adc126d719005981e05Hal Finkel 314072465ea23d010507d3746adc126d719005981e05Hal Finkel DEBUG(if (PrintAfterEveryPair) dbgs() << "BBV: block is now: \n" << 314172465ea23d010507d3746adc126d719005981e05Hal Finkel BB << "\n"); 3142de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 3143de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 3144de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel DEBUG(dbgs() << "BBV: final: \n" << BB << "\n"); 3145de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel } 3146de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel} 3147de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 3148de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelchar BBVectorize::ID = 0; 3149de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkelstatic const char bb_vectorize_name[] = "Basic-Block Vectorization"; 3150de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelINITIALIZE_PASS_BEGIN(BBVectorize, BBV_NAME, bb_vectorize_name, false, false) 3151de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelINITIALIZE_AG_DEPENDENCY(AliasAnalysis) 31528bd6c52396ab6e7955fdcc1bce099b7cba29a308Chandler CarruthINITIALIZE_AG_DEPENDENCY(TargetTransformInfo) 3153e29c19091cca58db668407dfc5dd86c70e8b3d49Hal FinkelINITIALIZE_PASS_DEPENDENCY(DominatorTree) 3154de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelINITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 3155de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal FinkelINITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false) 3156de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 3157bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin ZhengBasicBlockPass *llvm::createBBVectorizePass(const VectorizeConfig &C) { 3158bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng return new BBVectorize(C); 3159de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel} 3160de5e5ec3045a73a06b1054417f9ac6c02929e9ceHal Finkel 3161bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zhengbool 3162bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zhengllvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB, const VectorizeConfig &C) { 3163bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng BBVectorize BBVectorizer(P, C); 316487825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng return BBVectorizer.vectorizeBB(BB); 316587825e7970a361ce5a8bab19bc880ff7f6242ca2Hongbin Zheng} 3166bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng 3167bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng//===----------------------------------------------------------------------===// 3168bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin ZhengVectorizeConfig::VectorizeConfig() { 3169bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng VectorBits = ::VectorBits; 3170768edf3cd037aab10391abc279f71470df8e3156Hal Finkel VectorizeBools = !::NoBools; 317186312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeInts = !::NoInts; 317286312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeFloats = !::NoFloats; 3173f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel VectorizePointers = !::NoPointers; 317486312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeCasts = !::NoCasts; 317586312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeMath = !::NoMath; 317686312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeFMA = !::NoFMA; 3177fc3665c87519850f629c9565535e3be447e10addHal Finkel VectorizeSelect = !::NoSelect; 3178e415f96b6a43ac8861148a11a4258bc38c247e8fHal Finkel VectorizeCmp = !::NoCmp; 3179f3f5a1e6f77a842ccb24cc81766437da5197d712Hal Finkel VectorizeGEP = !::NoGEP; 318086312cc15f29ce2bbd9647b94862e068045280c3Hongbin Zheng VectorizeMemOps = !::NoMemOps; 3181bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng AlignedOnly = ::AlignedOnly; 3182bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng ReqChainDepth= ::ReqChainDepth; 3183bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng SearchLimit = ::SearchLimit; 3184bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng MaxCandPairsForCycleCheck = ::MaxCandPairsForCycleCheck; 3185bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng SplatBreaksChain = ::SplatBreaksChain; 3186bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng MaxInsts = ::MaxInsts; 3187ab90084bca42b74a5b5edad9b416bd81e105dad0Hal Finkel MaxPairs = ::MaxPairs; 3188bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng MaxIter = ::MaxIter; 318964e1b28643d87e70734deb5f3d2d298e859c2fd2Hal Finkel Pow2LenOnly = ::Pow2LenOnly; 3190bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng NoMemOpBoost = ::NoMemOpBoost; 3191bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng FastDep = ::FastDep; 3192bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1Hongbin Zheng} 3193