1//===- NaryReassociate.h - Reassociate n-ary expressions ------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass reassociates n-ary add expressions and eliminates the redundancy 11// exposed by the reassociation. 12// 13// A motivating example: 14// 15// void foo(int a, int b) { 16// bar(a + b); 17// bar((a + 2) + b); 18// } 19// 20// An ideal compiler should reassociate (a + 2) + b to (a + b) + 2 and simplify 21// the above code to 22// 23// int t = a + b; 24// bar(t); 25// bar(t + 2); 26// 27// However, the Reassociate pass is unable to do that because it processes each 28// instruction individually and believes (a + 2) + b is the best form according 29// to its rank system. 30// 31// To address this limitation, NaryReassociate reassociates an expression in a 32// form that reuses existing instructions. As a result, NaryReassociate can 33// reassociate (a + 2) + b in the example to (a + b) + 2 because it detects that 34// (a + b) is computed before. 35// 36// NaryReassociate works as follows. For every instruction in the form of (a + 37// b) + c, it checks whether a + c or b + c is already computed by a dominating 38// instruction. If so, it then reassociates (a + b) + c into (a + c) + b or (b + 39// c) + a and removes the redundancy accordingly. To efficiently look up whether 40// an expression is computed before, we store each instruction seen and its SCEV 41// into an SCEV-to-instruction map. 42// 43// Although the algorithm pattern-matches only ternary additions, it 44// automatically handles many >3-ary expressions by walking through the function 45// in the depth-first order. For example, given 46// 47// (a + c) + d 48// ((a + b) + c) + d 49// 50// NaryReassociate first rewrites (a + b) + c to (a + c) + b, and then rewrites 51// ((a + c) + b) + d into ((a + c) + d) + b. 52// 53// Finally, the above dominator-based algorithm may need to be run multiple 54// iterations before emitting optimal code. One source of this need is that we 55// only split an operand when it is used only once. The above algorithm can 56// eliminate an instruction and decrease the usage count of its operands. As a 57// result, an instruction that previously had multiple uses may become a 58// single-use instruction and thus eligible for split consideration. For 59// example, 60// 61// ac = a + c 62// ab = a + b 63// abc = ab + c 64// ab2 = ab + b 65// ab2c = ab2 + c 66// 67// In the first iteration, we cannot reassociate abc to ac+b because ab is used 68// twice. However, we can reassociate ab2c to abc+b in the first iteration. As a 69// result, ab2 becomes dead and ab will be used only once in the second 70// iteration. 71// 72// Limitations and TODO items: 73// 74// 1) We only considers n-ary adds and muls for now. This should be extended 75// and generalized. 76// 77//===----------------------------------------------------------------------===// 78 79#ifndef LLVM_TRANSFORMS_SCALAR_NARYREASSOCIATE_H 80#define LLVM_TRANSFORMS_SCALAR_NARYREASSOCIATE_H 81 82#include "llvm/ADT/DenseMap.h" 83#include "llvm/ADT/SmallVector.h" 84#include "llvm/Analysis/AssumptionCache.h" 85#include "llvm/Analysis/ScalarEvolution.h" 86#include "llvm/Analysis/TargetLibraryInfo.h" 87#include "llvm/Analysis/TargetTransformInfo.h" 88#include "llvm/IR/Dominators.h" 89#include "llvm/IR/Function.h" 90#include "llvm/IR/PassManager.h" 91 92namespace llvm { 93class NaryReassociatePass : public PassInfoMixin<NaryReassociatePass> { 94public: 95 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 96 97 // Glue for old PM. 98 bool runImpl(Function &F, AssumptionCache *AC_, DominatorTree *DT_, 99 ScalarEvolution *SE_, TargetLibraryInfo *TLI_, 100 TargetTransformInfo *TTI_); 101 102private: 103 // Runs only one iteration of the dominator-based algorithm. See the header 104 // comments for why we need multiple iterations. 105 bool doOneIteration(Function &F); 106 107 // Reassociates I for better CSE. 108 Instruction *tryReassociate(Instruction *I); 109 110 // Reassociate GEP for better CSE. 111 Instruction *tryReassociateGEP(GetElementPtrInst *GEP); 112 // Try splitting GEP at the I-th index and see whether either part can be 113 // CSE'ed. This is a helper function for tryReassociateGEP. 114 // 115 // \p IndexedType The element type indexed by GEP's I-th index. This is 116 // equivalent to 117 // GEP->getIndexedType(GEP->getPointerOperand(), 0-th index, 118 // ..., i-th index). 119 GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP, 120 unsigned I, Type *IndexedType); 121 // Given GEP's I-th index = LHS + RHS, see whether &Base[..][LHS][..] or 122 // &Base[..][RHS][..] can be CSE'ed and rewrite GEP accordingly. 123 GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP, 124 unsigned I, Value *LHS, 125 Value *RHS, Type *IndexedType); 126 127 // Reassociate binary operators for better CSE. 128 Instruction *tryReassociateBinaryOp(BinaryOperator *I); 129 130 // A helper function for tryReassociateBinaryOp. LHS and RHS are explicitly 131 // passed. 132 Instruction *tryReassociateBinaryOp(Value *LHS, Value *RHS, 133 BinaryOperator *I); 134 // Rewrites I to (LHS op RHS) if LHS is computed already. 135 Instruction *tryReassociatedBinaryOp(const SCEV *LHS, Value *RHS, 136 BinaryOperator *I); 137 138 // Tries to match Op1 and Op2 by using V. 139 bool matchTernaryOp(BinaryOperator *I, Value *V, Value *&Op1, Value *&Op2); 140 141 // Gets SCEV for (LHS op RHS). 142 const SCEV *getBinarySCEV(BinaryOperator *I, const SCEV *LHS, 143 const SCEV *RHS); 144 145 // Returns the closest dominator of \c Dominatee that computes 146 // \c CandidateExpr. Returns null if not found. 147 Instruction *findClosestMatchingDominator(const SCEV *CandidateExpr, 148 Instruction *Dominatee); 149 // GetElementPtrInst implicitly sign-extends an index if the index is shorter 150 // than the pointer size. This function returns whether Index is shorter than 151 // GEP's pointer size, i.e., whether Index needs to be sign-extended in order 152 // to be an index of GEP. 153 bool requiresSignExtension(Value *Index, GetElementPtrInst *GEP); 154 155 AssumptionCache *AC; 156 const DataLayout *DL; 157 DominatorTree *DT; 158 ScalarEvolution *SE; 159 TargetLibraryInfo *TLI; 160 TargetTransformInfo *TTI; 161 // A lookup table quickly telling which instructions compute the given SCEV. 162 // Note that there can be multiple instructions at different locations 163 // computing to the same SCEV, so we map a SCEV to an instruction list. For 164 // example, 165 // 166 // if (p1) 167 // foo(a + b); 168 // if (p2) 169 // bar(a + b); 170 DenseMap<const SCEV *, SmallVector<WeakTrackingVH, 2>> SeenExprs; 171}; 172} // namespace llvm 173 174#endif // LLVM_TRANSFORMS_SCALAR_NARYREASSOCIATE_H 175