Reassociate.cpp revision dac5dbadeb840ddded4665d144f31c5f88494d6e
145afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org//===- Reassociate.cpp - Reassociate binary expressions -------------------===// 245afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org// 345afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org// The LLVM Compiler Infrastructure 445afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org// 545afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org// This file is distributed under the University of Illinois Open Source 645afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org// License. See LICENSE.TXT for details. 745afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org// 845afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org//===----------------------------------------------------------------------===// 945afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org// 1045afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org// This pass reassociates commutative expressions in an order that is designed 1145afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org// to promote better constant propagation, GCSE, LICM, PRE, etc. 1245afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org// 1345afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org// For example: 4 + (x + 5) -> x + (4 + 5) 1445afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org// 1545afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org// In the implementation of this algorithm, constants are assigned rank = 0, 1645afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org// function arguments are rank = 1, and other values are assigned ranks 1745afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org// corresponding to the reverse post order traversal of current function 1845afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org// (starting at 2), which effectively gives values in deep loops higher rank 1945afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org// than values not in loops. 2045afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org// 2145afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org//===----------------------------------------------------------------------===// 2245afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org 2345afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org#define DEBUG_TYPE "reassociate" 2445afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org#include "llvm/Transforms/Scalar.h" 2545afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org#include "llvm/Transforms/Utils/Local.h" 2645afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org#include "llvm/Constants.h" 2745afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org#include "llvm/DerivedTypes.h" 2845afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org#include "llvm/Function.h" 2945afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org#include "llvm/Instructions.h" 3045afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org#include "llvm/IntrinsicInst.h" 3145afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org#include "llvm/Pass.h" 3245afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org#include "llvm/Assembly/Writer.h" 3345afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org#include "llvm/Support/CFG.h" 3445afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org#include "llvm/Support/Debug.h" 3545afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org#include "llvm/Support/ValueHandle.h" 3645afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org#include "llvm/Support/raw_ostream.h" 3745afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org#include "llvm/ADT/PostOrderIterator.h" 3845afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org#include "llvm/ADT/Statistic.h" 3945afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org#include "llvm/ADT/DenseMap.h" 4045afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org#include <algorithm> 4145afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.orgusing namespace llvm; 4245afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org 4345afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.orgSTATISTIC(NumLinear , "Number of insts linearized"); 4445afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.orgSTATISTIC(NumChanged, "Number of insts reassociated"); 4545afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.orgSTATISTIC(NumAnnihil, "Number of expr tree annihilated"); 4645afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.orgSTATISTIC(NumFactor , "Number of multiplies factored"); 4745afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org 4845afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.orgnamespace { 4945afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org struct ValueEntry { 5045afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org unsigned Rank; 5145afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org Value *Op; 5245afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org ValueEntry(unsigned R, Value *O) : Rank(R), Op(O) {} 5345afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org }; 5445afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org inline bool operator<(const ValueEntry &LHS, const ValueEntry &RHS) { 5545afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org return LHS.Rank > RHS.Rank; // Sort so that highest rank goes to start. 5645afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org } 5745afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org} 5845afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org 5945afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org#ifndef NDEBUG 6045afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org/// PrintOps - Print out the expression identified in the Ops list. 6145afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org/// 6245afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.orgstatic void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) { 6345afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org Module *M = I->getParent()->getParent()->getParent(); 6445afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " " 6545afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org << *Ops[0].Op->getType() << '\t'; 6645afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 6745afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org dbgs() << "[ "; 6845afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org WriteAsOperand(dbgs(), Ops[i].Op, false, M); 6945afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org dbgs() << ", #" << Ops[i].Rank << "] "; 7045afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org } 7145afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org} 7245afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org#endif 7345afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org 7445afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.orgnamespace { 7545afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org class Reassociate : public FunctionPass { 7645afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org DenseMap<BasicBlock*, unsigned> RankMap; 7745afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org DenseMap<AssertingVH<>, unsigned> ValueRankMap; 7845afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org SmallVector<WeakVH, 8> RedoInsts; 7945afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org SmallVector<WeakVH, 8> DeadInsts; 8045afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org bool MadeChange; 8145afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org public: 8245afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org static char ID; // Pass identification, replacement for typeid 8345afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org Reassociate() : FunctionPass(ID) { 8445afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org initializeReassociatePass(*PassRegistry::getPassRegistry()); 8545afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org } 8645afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org 8745afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org bool runOnFunction(Function &F); 8845afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org 8945afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org virtual void getAnalysisUsage(AnalysisUsage &AU) const { 9045afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org AU.setPreservesCFG(); 9145afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org } 9245afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org private: 9345afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org void BuildRankMap(Function &F); 9445afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org unsigned getRank(Value *V); 9545afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org Value *ReassociateExpression(BinaryOperator *I); 9645afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org void RewriteExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops, 9745afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org unsigned Idx = 0); 9845afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org Value *OptimizeExpression(BinaryOperator *I, 9945afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org SmallVectorImpl<ValueEntry> &Ops); 10045afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org Value *OptimizeAdd(Instruction *I, SmallVectorImpl<ValueEntry> &Ops); 10145afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org void LinearizeExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops); 10245afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org void LinearizeExpr(BinaryOperator *I); 10345afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org Value *RemoveFactorFromExpression(Value *V, Value *Factor); 10445afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org void ReassociateInst(BasicBlock::iterator &BBI); 10545afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org 10645afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org void RemoveDeadBinaryOp(Value *V); 10745afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org }; 10845afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org} 10945afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org 11045afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.orgchar Reassociate::ID = 0; 11145afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.orgINITIALIZE_PASS(Reassociate, "reassociate", 11245afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org "Reassociate expressions", false, false) 11345afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org 11445afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org// Public interface to the Reassociate pass 11545afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.orgFunctionPass *llvm::createReassociatePass() { return new Reassociate(); } 11645afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org 11745afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.orgvoid Reassociate::RemoveDeadBinaryOp(Value *V) { 11845afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org Instruction *Op = dyn_cast<Instruction>(V); 11945afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org if (!Op || !isa<BinaryOperator>(Op)) 12045afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org return; 12145afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org 12245afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org Value *LHS = Op->getOperand(0), *RHS = Op->getOperand(1); 12345afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org 12445afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org ValueRankMap.erase(Op); 12545afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org DeadInsts.push_back(Op); 12645afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org RemoveDeadBinaryOp(LHS); 12745afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org RemoveDeadBinaryOp(RHS); 12845afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org} 12945afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org 13045afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org 13145afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.orgstatic bool isUnmovableInstruction(Instruction *I) { 13245afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org if (I->getOpcode() == Instruction::PHI || 13345afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org I->getOpcode() == Instruction::Alloca || 13445afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org I->getOpcode() == Instruction::Load || 13545afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org I->getOpcode() == Instruction::Invoke || 13645afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org (I->getOpcode() == Instruction::Call && 13745afe016bed87b9c6946184709058b39ede3f77ajwong@chromium.org !isa<DbgInfoIntrinsic>(I)) || 138 I->getOpcode() == Instruction::UDiv || 139 I->getOpcode() == Instruction::SDiv || 140 I->getOpcode() == Instruction::FDiv || 141 I->getOpcode() == Instruction::URem || 142 I->getOpcode() == Instruction::SRem || 143 I->getOpcode() == Instruction::FRem) 144 return true; 145 return false; 146} 147 148void Reassociate::BuildRankMap(Function &F) { 149 unsigned i = 2; 150 151 // Assign distinct ranks to function arguments 152 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) 153 ValueRankMap[&*I] = ++i; 154 155 ReversePostOrderTraversal<Function*> RPOT(&F); 156 for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(), 157 E = RPOT.end(); I != E; ++I) { 158 BasicBlock *BB = *I; 159 unsigned BBRank = RankMap[BB] = ++i << 16; 160 161 // Walk the basic block, adding precomputed ranks for any instructions that 162 // we cannot move. This ensures that the ranks for these instructions are 163 // all different in the block. 164 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) 165 if (isUnmovableInstruction(I)) 166 ValueRankMap[&*I] = ++BBRank; 167 } 168} 169 170unsigned Reassociate::getRank(Value *V) { 171 Instruction *I = dyn_cast<Instruction>(V); 172 if (I == 0) { 173 if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument. 174 return 0; // Otherwise it's a global or constant, rank 0. 175 } 176 177 if (unsigned Rank = ValueRankMap[I]) 178 return Rank; // Rank already known? 179 180 // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that 181 // we can reassociate expressions for code motion! Since we do not recurse 182 // for PHI nodes, we cannot have infinite recursion here, because there 183 // cannot be loops in the value graph that do not go through PHI nodes. 184 unsigned Rank = 0, MaxRank = RankMap[I->getParent()]; 185 for (unsigned i = 0, e = I->getNumOperands(); 186 i != e && Rank != MaxRank; ++i) 187 Rank = std::max(Rank, getRank(I->getOperand(i))); 188 189 // If this is a not or neg instruction, do not count it for rank. This 190 // assures us that X and ~X will have the same rank. 191 if (!I->getType()->isIntegerTy() || 192 (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I))) 193 ++Rank; 194 195 //DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " 196 // << Rank << "\n"); 197 198 return ValueRankMap[I] = Rank; 199} 200 201/// isReassociableOp - Return true if V is an instruction of the specified 202/// opcode and if it only has one use. 203static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) { 204 if ((V->hasOneUse() || V->use_empty()) && isa<Instruction>(V) && 205 cast<Instruction>(V)->getOpcode() == Opcode) 206 return cast<BinaryOperator>(V); 207 return 0; 208} 209 210/// LowerNegateToMultiply - Replace 0-X with X*-1. 211/// 212static Instruction *LowerNegateToMultiply(Instruction *Neg, 213 DenseMap<AssertingVH<>, unsigned> &ValueRankMap) { 214 Constant *Cst = Constant::getAllOnesValue(Neg->getType()); 215 216 Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg); 217 ValueRankMap.erase(Neg); 218 Res->takeName(Neg); 219 Neg->replaceAllUsesWith(Res); 220 Neg->eraseFromParent(); 221 return Res; 222} 223 224// Given an expression of the form '(A+B)+(D+C)', turn it into '(((A+B)+C)+D)'. 225// Note that if D is also part of the expression tree that we recurse to 226// linearize it as well. Besides that case, this does not recurse into A,B, or 227// C. 228void Reassociate::LinearizeExpr(BinaryOperator *I) { 229 BinaryOperator *LHS = cast<BinaryOperator>(I->getOperand(0)); 230 BinaryOperator *RHS = cast<BinaryOperator>(I->getOperand(1)); 231 assert(isReassociableOp(LHS, I->getOpcode()) && 232 isReassociableOp(RHS, I->getOpcode()) && 233 "Not an expression that needs linearization?"); 234 235 DEBUG(dbgs() << "Linear" << *LHS << '\n' << *RHS << '\n' << *I << '\n'); 236 237 // Move the RHS instruction to live immediately before I, avoiding breaking 238 // dominator properties. 239 RHS->moveBefore(I); 240 241 // Move operands around to do the linearization. 242 I->setOperand(1, RHS->getOperand(0)); 243 RHS->setOperand(0, LHS); 244 I->setOperand(0, RHS); 245 246 // Conservatively clear all the optional flags, which may not hold 247 // after the reassociation. 248 I->clearSubclassOptionalData(); 249 LHS->clearSubclassOptionalData(); 250 RHS->clearSubclassOptionalData(); 251 252 ++NumLinear; 253 MadeChange = true; 254 DEBUG(dbgs() << "Linearized: " << *I << '\n'); 255 256 // If D is part of this expression tree, tail recurse. 257 if (isReassociableOp(I->getOperand(1), I->getOpcode())) 258 LinearizeExpr(I); 259} 260 261 262/// LinearizeExprTree - Given an associative binary expression tree, traverse 263/// all of the uses putting it into canonical form. This forces a left-linear 264/// form of the expression (((a+b)+c)+d), and collects information about the 265/// rank of the non-tree operands. 266/// 267/// NOTE: These intentionally destroys the expression tree operands (turning 268/// them into undef values) to reduce #uses of the values. This means that the 269/// caller MUST use something like RewriteExprTree to put the values back in. 270/// 271void Reassociate::LinearizeExprTree(BinaryOperator *I, 272 SmallVectorImpl<ValueEntry> &Ops) { 273 Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); 274 unsigned Opcode = I->getOpcode(); 275 276 // First step, linearize the expression if it is in ((A+B)+(C+D)) form. 277 BinaryOperator *LHSBO = isReassociableOp(LHS, Opcode); 278 BinaryOperator *RHSBO = isReassociableOp(RHS, Opcode); 279 280 // If this is a multiply expression tree and it contains internal negations, 281 // transform them into multiplies by -1 so they can be reassociated. 282 if (I->getOpcode() == Instruction::Mul) { 283 if (!LHSBO && LHS->hasOneUse() && BinaryOperator::isNeg(LHS)) { 284 LHS = LowerNegateToMultiply(cast<Instruction>(LHS), ValueRankMap); 285 LHSBO = isReassociableOp(LHS, Opcode); 286 } 287 if (!RHSBO && RHS->hasOneUse() && BinaryOperator::isNeg(RHS)) { 288 RHS = LowerNegateToMultiply(cast<Instruction>(RHS), ValueRankMap); 289 RHSBO = isReassociableOp(RHS, Opcode); 290 } 291 } 292 293 if (!LHSBO) { 294 if (!RHSBO) { 295 // Neither the LHS or RHS as part of the tree, thus this is a leaf. As 296 // such, just remember these operands and their rank. 297 Ops.push_back(ValueEntry(getRank(LHS), LHS)); 298 Ops.push_back(ValueEntry(getRank(RHS), RHS)); 299 300 // Clear the leaves out. 301 I->setOperand(0, UndefValue::get(I->getType())); 302 I->setOperand(1, UndefValue::get(I->getType())); 303 return; 304 } 305 306 // Turn X+(Y+Z) -> (Y+Z)+X 307 std::swap(LHSBO, RHSBO); 308 std::swap(LHS, RHS); 309 bool Success = !I->swapOperands(); 310 assert(Success && "swapOperands failed"); 311 Success = false; 312 MadeChange = true; 313 } else if (RHSBO) { 314 // Turn (A+B)+(C+D) -> (((A+B)+C)+D). This guarantees the RHS is not 315 // part of the expression tree. 316 LinearizeExpr(I); 317 LHS = LHSBO = cast<BinaryOperator>(I->getOperand(0)); 318 RHS = I->getOperand(1); 319 RHSBO = 0; 320 } 321 322 // Okay, now we know that the LHS is a nested expression and that the RHS is 323 // not. Perform reassociation. 324 assert(!isReassociableOp(RHS, Opcode) && "LinearizeExpr failed!"); 325 326 // Move LHS right before I to make sure that the tree expression dominates all 327 // values. 328 LHSBO->moveBefore(I); 329 330 // Linearize the expression tree on the LHS. 331 LinearizeExprTree(LHSBO, Ops); 332 333 // Remember the RHS operand and its rank. 334 Ops.push_back(ValueEntry(getRank(RHS), RHS)); 335 336 // Clear the RHS leaf out. 337 I->setOperand(1, UndefValue::get(I->getType())); 338} 339 340// RewriteExprTree - Now that the operands for this expression tree are 341// linearized and optimized, emit them in-order. This function is written to be 342// tail recursive. 343void Reassociate::RewriteExprTree(BinaryOperator *I, 344 SmallVectorImpl<ValueEntry> &Ops, 345 unsigned i) { 346 if (i+2 == Ops.size()) { 347 if (I->getOperand(0) != Ops[i].Op || 348 I->getOperand(1) != Ops[i+1].Op) { 349 Value *OldLHS = I->getOperand(0); 350 DEBUG(dbgs() << "RA: " << *I << '\n'); 351 I->setOperand(0, Ops[i].Op); 352 I->setOperand(1, Ops[i+1].Op); 353 354 // Clear all the optional flags, which may not hold after the 355 // reassociation if the expression involved more than just this operation. 356 if (Ops.size() != 2) 357 I->clearSubclassOptionalData(); 358 359 DEBUG(dbgs() << "TO: " << *I << '\n'); 360 MadeChange = true; 361 ++NumChanged; 362 363 // If we reassociated a tree to fewer operands (e.g. (1+a+2) -> (a+3) 364 // delete the extra, now dead, nodes. 365 RemoveDeadBinaryOp(OldLHS); 366 } 367 return; 368 } 369 assert(i+2 < Ops.size() && "Ops index out of range!"); 370 371 if (I->getOperand(1) != Ops[i].Op) { 372 DEBUG(dbgs() << "RA: " << *I << '\n'); 373 I->setOperand(1, Ops[i].Op); 374 375 // Conservatively clear all the optional flags, which may not hold 376 // after the reassociation. 377 I->clearSubclassOptionalData(); 378 379 DEBUG(dbgs() << "TO: " << *I << '\n'); 380 MadeChange = true; 381 ++NumChanged; 382 } 383 384 BinaryOperator *LHS = cast<BinaryOperator>(I->getOperand(0)); 385 assert(LHS->getOpcode() == I->getOpcode() && 386 "Improper expression tree!"); 387 388 // Compactify the tree instructions together with each other to guarantee 389 // that the expression tree is dominated by all of Ops. 390 LHS->moveBefore(I); 391 RewriteExprTree(LHS, Ops, i+1); 392} 393 394 395 396// NegateValue - Insert instructions before the instruction pointed to by BI, 397// that computes the negative version of the value specified. The negative 398// version of the value is returned, and BI is left pointing at the instruction 399// that should be processed next by the reassociation pass. 400// 401static Value *NegateValue(Value *V, Instruction *BI) { 402 if (Constant *C = dyn_cast<Constant>(V)) 403 return ConstantExpr::getNeg(C); 404 405 // We are trying to expose opportunity for reassociation. One of the things 406 // that we want to do to achieve this is to push a negation as deep into an 407 // expression chain as possible, to expose the add instructions. In practice, 408 // this means that we turn this: 409 // X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D 410 // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate 411 // the constants. We assume that instcombine will clean up the mess later if 412 // we introduce tons of unnecessary negation instructions. 413 // 414 if (Instruction *I = dyn_cast<Instruction>(V)) 415 if (I->getOpcode() == Instruction::Add && I->hasOneUse()) { 416 // Push the negates through the add. 417 I->setOperand(0, NegateValue(I->getOperand(0), BI)); 418 I->setOperand(1, NegateValue(I->getOperand(1), BI)); 419 420 // We must move the add instruction here, because the neg instructions do 421 // not dominate the old add instruction in general. By moving it, we are 422 // assured that the neg instructions we just inserted dominate the 423 // instruction we are about to insert after them. 424 // 425 I->moveBefore(BI); 426 I->setName(I->getName()+".neg"); 427 return I; 428 } 429 430 // Okay, we need to materialize a negated version of V with an instruction. 431 // Scan the use lists of V to see if we have one already. 432 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ 433 User *U = *UI; 434 if (!BinaryOperator::isNeg(U)) continue; 435 436 // We found one! Now we have to make sure that the definition dominates 437 // this use. We do this by moving it to the entry block (if it is a 438 // non-instruction value) or right after the definition. These negates will 439 // be zapped by reassociate later, so we don't need much finesse here. 440 BinaryOperator *TheNeg = cast<BinaryOperator>(U); 441 442 // Verify that the negate is in this function, V might be a constant expr. 443 if (TheNeg->getParent()->getParent() != BI->getParent()->getParent()) 444 continue; 445 446 BasicBlock::iterator InsertPt; 447 if (Instruction *InstInput = dyn_cast<Instruction>(V)) { 448 if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) { 449 InsertPt = II->getNormalDest()->begin(); 450 } else { 451 InsertPt = InstInput; 452 ++InsertPt; 453 } 454 while (isa<PHINode>(InsertPt)) ++InsertPt; 455 } else { 456 InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin(); 457 } 458 TheNeg->moveBefore(InsertPt); 459 return TheNeg; 460 } 461 462 // Insert a 'neg' instruction that subtracts the value from zero to get the 463 // negation. 464 return BinaryOperator::CreateNeg(V, V->getName() + ".neg", BI); 465} 466 467/// ShouldBreakUpSubtract - Return true if we should break up this subtract of 468/// X-Y into (X + -Y). 469static bool ShouldBreakUpSubtract(Instruction *Sub) { 470 // If this is a negation, we can't split it up! 471 if (BinaryOperator::isNeg(Sub)) 472 return false; 473 474 // Don't bother to break this up unless either the LHS is an associable add or 475 // subtract or if this is only used by one. 476 if (isReassociableOp(Sub->getOperand(0), Instruction::Add) || 477 isReassociableOp(Sub->getOperand(0), Instruction::Sub)) 478 return true; 479 if (isReassociableOp(Sub->getOperand(1), Instruction::Add) || 480 isReassociableOp(Sub->getOperand(1), Instruction::Sub)) 481 return true; 482 if (Sub->hasOneUse() && 483 (isReassociableOp(Sub->use_back(), Instruction::Add) || 484 isReassociableOp(Sub->use_back(), Instruction::Sub))) 485 return true; 486 487 return false; 488} 489 490/// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is 491/// only used by an add, transform this into (X+(0-Y)) to promote better 492/// reassociation. 493static Instruction *BreakUpSubtract(Instruction *Sub, 494 DenseMap<AssertingVH<>, unsigned> &ValueRankMap) { 495 // Convert a subtract into an add and a neg instruction. This allows sub 496 // instructions to be commuted with other add instructions. 497 // 498 // Calculate the negative value of Operand 1 of the sub instruction, 499 // and set it as the RHS of the add instruction we just made. 500 // 501 Value *NegVal = NegateValue(Sub->getOperand(1), Sub); 502 Instruction *New = 503 BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub); 504 New->takeName(Sub); 505 506 // Everyone now refers to the add instruction. 507 ValueRankMap.erase(Sub); 508 Sub->replaceAllUsesWith(New); 509 Sub->eraseFromParent(); 510 511 DEBUG(dbgs() << "Negated: " << *New << '\n'); 512 return New; 513} 514 515/// ConvertShiftToMul - If this is a shift of a reassociable multiply or is used 516/// by one, change this into a multiply by a constant to assist with further 517/// reassociation. 518static Instruction *ConvertShiftToMul(Instruction *Shl, 519 DenseMap<AssertingVH<>, unsigned> &ValueRankMap) { 520 // If an operand of this shift is a reassociable multiply, or if the shift 521 // is used by a reassociable multiply or add, turn into a multiply. 522 if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) || 523 (Shl->hasOneUse() && 524 (isReassociableOp(Shl->use_back(), Instruction::Mul) || 525 isReassociableOp(Shl->use_back(), Instruction::Add)))) { 526 Constant *MulCst = ConstantInt::get(Shl->getType(), 1); 527 MulCst = ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1))); 528 529 Instruction *Mul = 530 BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl); 531 ValueRankMap.erase(Shl); 532 Mul->takeName(Shl); 533 Shl->replaceAllUsesWith(Mul); 534 Shl->eraseFromParent(); 535 return Mul; 536 } 537 return 0; 538} 539 540// Scan backwards and forwards among values with the same rank as element i to 541// see if X exists. If X does not exist, return i. This is useful when 542// scanning for 'x' when we see '-x' because they both get the same rank. 543static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i, 544 Value *X) { 545 unsigned XRank = Ops[i].Rank; 546 unsigned e = Ops.size(); 547 for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j) 548 if (Ops[j].Op == X) 549 return j; 550 // Scan backwards. 551 for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j) 552 if (Ops[j].Op == X) 553 return j; 554 return i; 555} 556 557/// EmitAddTreeOfValues - Emit a tree of add instructions, summing Ops together 558/// and returning the result. Insert the tree before I. 559static Value *EmitAddTreeOfValues(Instruction *I, SmallVectorImpl<Value*> &Ops){ 560 if (Ops.size() == 1) return Ops.back(); 561 562 Value *V1 = Ops.back(); 563 Ops.pop_back(); 564 Value *V2 = EmitAddTreeOfValues(I, Ops); 565 return BinaryOperator::CreateAdd(V2, V1, "tmp", I); 566} 567 568/// RemoveFactorFromExpression - If V is an expression tree that is a 569/// multiplication sequence, and if this sequence contains a multiply by Factor, 570/// remove Factor from the tree and return the new tree. 571Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) { 572 BinaryOperator *BO = isReassociableOp(V, Instruction::Mul); 573 if (!BO) return 0; 574 575 SmallVector<ValueEntry, 8> Factors; 576 LinearizeExprTree(BO, Factors); 577 578 bool FoundFactor = false; 579 bool NeedsNegate = false; 580 for (unsigned i = 0, e = Factors.size(); i != e; ++i) { 581 if (Factors[i].Op == Factor) { 582 FoundFactor = true; 583 Factors.erase(Factors.begin()+i); 584 break; 585 } 586 587 // If this is a negative version of this factor, remove it. 588 if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor)) 589 if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op)) 590 if (FC1->getValue() == -FC2->getValue()) { 591 FoundFactor = NeedsNegate = true; 592 Factors.erase(Factors.begin()+i); 593 break; 594 } 595 } 596 597 if (!FoundFactor) { 598 // Make sure to restore the operands to the expression tree. 599 RewriteExprTree(BO, Factors); 600 return 0; 601 } 602 603 BasicBlock::iterator InsertPt = BO; ++InsertPt; 604 605 // If this was just a single multiply, remove the multiply and return the only 606 // remaining operand. 607 if (Factors.size() == 1) { 608 ValueRankMap.erase(BO); 609 DeadInsts.push_back(BO); 610 V = Factors[0].Op; 611 } else { 612 RewriteExprTree(BO, Factors); 613 V = BO; 614 } 615 616 if (NeedsNegate) 617 V = BinaryOperator::CreateNeg(V, "neg", InsertPt); 618 619 return V; 620} 621 622/// FindSingleUseMultiplyFactors - If V is a single-use multiply, recursively 623/// add its operands as factors, otherwise add V to the list of factors. 624/// 625/// Ops is the top-level list of add operands we're trying to factor. 626static void FindSingleUseMultiplyFactors(Value *V, 627 SmallVectorImpl<Value*> &Factors, 628 const SmallVectorImpl<ValueEntry> &Ops, 629 bool IsRoot) { 630 BinaryOperator *BO; 631 if (!(V->hasOneUse() || V->use_empty()) || // More than one use. 632 !(BO = dyn_cast<BinaryOperator>(V)) || 633 BO->getOpcode() != Instruction::Mul) { 634 Factors.push_back(V); 635 return; 636 } 637 638 // If this value has a single use because it is another input to the add 639 // tree we're reassociating and we dropped its use, it actually has two 640 // uses and we can't factor it. 641 if (!IsRoot) { 642 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 643 if (Ops[i].Op == V) { 644 Factors.push_back(V); 645 return; 646 } 647 } 648 649 650 // Otherwise, add the LHS and RHS to the list of factors. 651 FindSingleUseMultiplyFactors(BO->getOperand(1), Factors, Ops, false); 652 FindSingleUseMultiplyFactors(BO->getOperand(0), Factors, Ops, false); 653} 654 655/// OptimizeAndOrXor - Optimize a series of operands to an 'and', 'or', or 'xor' 656/// instruction. This optimizes based on identities. If it can be reduced to 657/// a single Value, it is returned, otherwise the Ops list is mutated as 658/// necessary. 659static Value *OptimizeAndOrXor(unsigned Opcode, 660 SmallVectorImpl<ValueEntry> &Ops) { 661 // Scan the operand lists looking for X and ~X pairs, along with X,X pairs. 662 // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1. 663 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 664 // First, check for X and ~X in the operand list. 665 assert(i < Ops.size()); 666 if (BinaryOperator::isNot(Ops[i].Op)) { // Cannot occur for ^. 667 Value *X = BinaryOperator::getNotArgument(Ops[i].Op); 668 unsigned FoundX = FindInOperandList(Ops, i, X); 669 if (FoundX != i) { 670 if (Opcode == Instruction::And) // ...&X&~X = 0 671 return Constant::getNullValue(X->getType()); 672 673 if (Opcode == Instruction::Or) // ...|X|~X = -1 674 return Constant::getAllOnesValue(X->getType()); 675 } 676 } 677 678 // Next, check for duplicate pairs of values, which we assume are next to 679 // each other, due to our sorting criteria. 680 assert(i < Ops.size()); 681 if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) { 682 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 683 // Drop duplicate values for And and Or. 684 Ops.erase(Ops.begin()+i); 685 --i; --e; 686 ++NumAnnihil; 687 continue; 688 } 689 690 // Drop pairs of values for Xor. 691 assert(Opcode == Instruction::Xor); 692 if (e == 2) 693 return Constant::getNullValue(Ops[0].Op->getType()); 694 695 // Y ^ X^X -> Y 696 Ops.erase(Ops.begin()+i, Ops.begin()+i+2); 697 i -= 1; e -= 2; 698 ++NumAnnihil; 699 } 700 } 701 return 0; 702} 703 704/// OptimizeAdd - Optimize a series of operands to an 'add' instruction. This 705/// optimizes based on identities. If it can be reduced to a single Value, it 706/// is returned, otherwise the Ops list is mutated as necessary. 707Value *Reassociate::OptimizeAdd(Instruction *I, 708 SmallVectorImpl<ValueEntry> &Ops) { 709 // Scan the operand lists looking for X and -X pairs. If we find any, we 710 // can simplify the expression. X+-X == 0. While we're at it, scan for any 711 // duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z. 712 // 713 // TODO: We could handle "X + ~X" -> "-1" if we wanted, since "-X = ~X+1". 714 // 715 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 716 Value *TheOp = Ops[i].Op; 717 // Check to see if we've seen this operand before. If so, we factor all 718 // instances of the operand together. Due to our sorting criteria, we know 719 // that these need to be next to each other in the vector. 720 if (i+1 != Ops.size() && Ops[i+1].Op == TheOp) { 721 // Rescan the list, remove all instances of this operand from the expr. 722 unsigned NumFound = 0; 723 do { 724 Ops.erase(Ops.begin()+i); 725 ++NumFound; 726 } while (i != Ops.size() && Ops[i].Op == TheOp); 727 728 DEBUG(errs() << "\nFACTORING [" << NumFound << "]: " << *TheOp << '\n'); 729 ++NumFactor; 730 731 // Insert a new multiply. 732 Value *Mul = ConstantInt::get(cast<IntegerType>(I->getType()), NumFound); 733 Mul = BinaryOperator::CreateMul(TheOp, Mul, "factor", I); 734 735 // Now that we have inserted a multiply, optimize it. This allows us to 736 // handle cases that require multiple factoring steps, such as this: 737 // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6 738 RedoInsts.push_back(Mul); 739 740 // If every add operand was a duplicate, return the multiply. 741 if (Ops.empty()) 742 return Mul; 743 744 // Otherwise, we had some input that didn't have the dupe, such as 745 // "A + A + B" -> "A*2 + B". Add the new multiply to the list of 746 // things being added by this operation. 747 Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul)); 748 749 --i; 750 e = Ops.size(); 751 continue; 752 } 753 754 // Check for X and -X in the operand list. 755 if (!BinaryOperator::isNeg(TheOp)) 756 continue; 757 758 Value *X = BinaryOperator::getNegArgument(TheOp); 759 unsigned FoundX = FindInOperandList(Ops, i, X); 760 if (FoundX == i) 761 continue; 762 763 // Remove X and -X from the operand list. 764 if (Ops.size() == 2) 765 return Constant::getNullValue(X->getType()); 766 767 Ops.erase(Ops.begin()+i); 768 if (i < FoundX) 769 --FoundX; 770 else 771 --i; // Need to back up an extra one. 772 Ops.erase(Ops.begin()+FoundX); 773 ++NumAnnihil; 774 --i; // Revisit element. 775 e -= 2; // Removed two elements. 776 } 777 778 // Scan the operand list, checking to see if there are any common factors 779 // between operands. Consider something like A*A+A*B*C+D. We would like to 780 // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies. 781 // To efficiently find this, we count the number of times a factor occurs 782 // for any ADD operands that are MULs. 783 DenseMap<Value*, unsigned> FactorOccurrences; 784 785 // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4) 786 // where they are actually the same multiply. 787 unsigned MaxOcc = 0; 788 Value *MaxOccVal = 0; 789 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 790 BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op); 791 if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty()) 792 continue; 793 794 // Compute all of the factors of this added value. 795 SmallVector<Value*, 8> Factors; 796 FindSingleUseMultiplyFactors(BOp, Factors, Ops, true); 797 assert(Factors.size() > 1 && "Bad linearize!"); 798 799 // Add one to FactorOccurrences for each unique factor in this op. 800 SmallPtrSet<Value*, 8> Duplicates; 801 for (unsigned i = 0, e = Factors.size(); i != e; ++i) { 802 Value *Factor = Factors[i]; 803 if (!Duplicates.insert(Factor)) continue; 804 805 unsigned Occ = ++FactorOccurrences[Factor]; 806 if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; } 807 808 // If Factor is a negative constant, add the negated value as a factor 809 // because we can percolate the negate out. Watch for minint, which 810 // cannot be positivified. 811 if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) 812 if (CI->getValue().isNegative() && !CI->getValue().isMinSignedValue()) { 813 Factor = ConstantInt::get(CI->getContext(), -CI->getValue()); 814 assert(!Duplicates.count(Factor) && 815 "Shouldn't have two constant factors, missed a canonicalize"); 816 817 unsigned Occ = ++FactorOccurrences[Factor]; 818 if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; } 819 } 820 } 821 } 822 823 // If any factor occurred more than one time, we can pull it out. 824 if (MaxOcc > 1) { 825 DEBUG(errs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << '\n'); 826 ++NumFactor; 827 828 // Create a new instruction that uses the MaxOccVal twice. If we don't do 829 // this, we could otherwise run into situations where removing a factor 830 // from an expression will drop a use of maxocc, and this can cause 831 // RemoveFactorFromExpression on successive values to behave differently. 832 Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal); 833 SmallVector<Value*, 4> NewMulOps; 834 for (unsigned i = 0; i != Ops.size(); ++i) { 835 // Only try to remove factors from expressions we're allowed to. 836 BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op); 837 if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty()) 838 continue; 839 840 if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) { 841 // The factorized operand may occur several times. Convert them all in 842 // one fell swoop. 843 for (unsigned j = Ops.size(); j != i;) { 844 --j; 845 if (Ops[j].Op == Ops[i].Op) { 846 NewMulOps.push_back(V); 847 Ops.erase(Ops.begin()+j); 848 } 849 } 850 --i; 851 } 852 } 853 854 // No need for extra uses anymore. 855 delete DummyInst; 856 857 unsigned NumAddedValues = NewMulOps.size(); 858 Value *V = EmitAddTreeOfValues(I, NewMulOps); 859 860 // Now that we have inserted the add tree, optimize it. This allows us to 861 // handle cases that require multiple factoring steps, such as this: 862 // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C)) 863 assert(NumAddedValues > 1 && "Each occurrence should contribute a value"); 864 (void)NumAddedValues; 865 V = ReassociateExpression(cast<BinaryOperator>(V)); 866 867 // Create the multiply. 868 Value *V2 = BinaryOperator::CreateMul(V, MaxOccVal, "tmp", I); 869 870 // Rerun associate on the multiply in case the inner expression turned into 871 // a multiply. We want to make sure that we keep things in canonical form. 872 V2 = ReassociateExpression(cast<BinaryOperator>(V2)); 873 874 // If every add operand included the factor (e.g. "A*B + A*C"), then the 875 // entire result expression is just the multiply "A*(B+C)". 876 if (Ops.empty()) 877 return V2; 878 879 // Otherwise, we had some input that didn't have the factor, such as 880 // "A*B + A*C + D" -> "A*(B+C) + D". Add the new multiply to the list of 881 // things being added by this operation. 882 Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2)); 883 } 884 885 return 0; 886} 887 888Value *Reassociate::OptimizeExpression(BinaryOperator *I, 889 SmallVectorImpl<ValueEntry> &Ops) { 890 // Now that we have the linearized expression tree, try to optimize it. 891 // Start by folding any constants that we found. 892 bool IterateOptimization = false; 893 if (Ops.size() == 1) return Ops[0].Op; 894 895 unsigned Opcode = I->getOpcode(); 896 897 if (Constant *V1 = dyn_cast<Constant>(Ops[Ops.size()-2].Op)) 898 if (Constant *V2 = dyn_cast<Constant>(Ops.back().Op)) { 899 Ops.pop_back(); 900 Ops.back().Op = ConstantExpr::get(Opcode, V1, V2); 901 return OptimizeExpression(I, Ops); 902 } 903 904 // Check for destructive annihilation due to a constant being used. 905 if (ConstantInt *CstVal = dyn_cast<ConstantInt>(Ops.back().Op)) 906 switch (Opcode) { 907 default: break; 908 case Instruction::And: 909 if (CstVal->isZero()) // X & 0 -> 0 910 return CstVal; 911 if (CstVal->isAllOnesValue()) // X & -1 -> X 912 Ops.pop_back(); 913 break; 914 case Instruction::Mul: 915 if (CstVal->isZero()) { // X * 0 -> 0 916 ++NumAnnihil; 917 return CstVal; 918 } 919 920 if (cast<ConstantInt>(CstVal)->isOne()) 921 Ops.pop_back(); // X * 1 -> X 922 break; 923 case Instruction::Or: 924 if (CstVal->isAllOnesValue()) // X | -1 -> -1 925 return CstVal; 926 // FALLTHROUGH! 927 case Instruction::Add: 928 case Instruction::Xor: 929 if (CstVal->isZero()) // X [|^+] 0 -> X 930 Ops.pop_back(); 931 break; 932 } 933 if (Ops.size() == 1) return Ops[0].Op; 934 935 // Handle destructive annihilation due to identities between elements in the 936 // argument list here. 937 switch (Opcode) { 938 default: break; 939 case Instruction::And: 940 case Instruction::Or: 941 case Instruction::Xor: { 942 unsigned NumOps = Ops.size(); 943 if (Value *Result = OptimizeAndOrXor(Opcode, Ops)) 944 return Result; 945 IterateOptimization |= Ops.size() != NumOps; 946 break; 947 } 948 949 case Instruction::Add: { 950 unsigned NumOps = Ops.size(); 951 if (Value *Result = OptimizeAdd(I, Ops)) 952 return Result; 953 IterateOptimization |= Ops.size() != NumOps; 954 } 955 956 break; 957 //case Instruction::Mul: 958 } 959 960 if (IterateOptimization) 961 return OptimizeExpression(I, Ops); 962 return 0; 963} 964 965 966/// ReassociateInst - Inspect and reassociate the instruction at the 967/// given position, post-incrementing the position. 968void Reassociate::ReassociateInst(BasicBlock::iterator &BBI) { 969 Instruction *BI = BBI++; 970 if (BI->getOpcode() == Instruction::Shl && 971 isa<ConstantInt>(BI->getOperand(1))) 972 if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) { 973 MadeChange = true; 974 BI = NI; 975 } 976 977 // Reject cases where it is pointless to do this. 978 if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() || 979 BI->getType()->isVectorTy()) 980 return; // Floating point ops are not associative. 981 982 // Do not reassociate boolean (i1) expressions. We want to preserve the 983 // original order of evaluation for short-circuited comparisons that 984 // SimplifyCFG has folded to AND/OR expressions. If the expression 985 // is not further optimized, it is likely to be transformed back to a 986 // short-circuited form for code gen, and the source order may have been 987 // optimized for the most likely conditions. 988 if (BI->getType()->isIntegerTy(1)) 989 return; 990 991 // If this is a subtract instruction which is not already in negate form, 992 // see if we can convert it to X+-Y. 993 if (BI->getOpcode() == Instruction::Sub) { 994 if (ShouldBreakUpSubtract(BI)) { 995 BI = BreakUpSubtract(BI, ValueRankMap); 996 // Reset the BBI iterator in case BreakUpSubtract changed the 997 // instruction it points to. 998 BBI = BI; 999 ++BBI; 1000 MadeChange = true; 1001 } else if (BinaryOperator::isNeg(BI)) { 1002 // Otherwise, this is a negation. See if the operand is a multiply tree 1003 // and if this is not an inner node of a multiply tree. 1004 if (isReassociableOp(BI->getOperand(1), Instruction::Mul) && 1005 (!BI->hasOneUse() || 1006 !isReassociableOp(BI->use_back(), Instruction::Mul))) { 1007 BI = LowerNegateToMultiply(BI, ValueRankMap); 1008 MadeChange = true; 1009 } 1010 } 1011 } 1012 1013 // If this instruction is a commutative binary operator, process it. 1014 if (!BI->isAssociative()) return; 1015 BinaryOperator *I = cast<BinaryOperator>(BI); 1016 1017 // If this is an interior node of a reassociable tree, ignore it until we 1018 // get to the root of the tree, to avoid N^2 analysis. 1019 if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode())) 1020 return; 1021 1022 // If this is an add tree that is used by a sub instruction, ignore it 1023 // until we process the subtract. 1024 if (I->hasOneUse() && I->getOpcode() == Instruction::Add && 1025 cast<Instruction>(I->use_back())->getOpcode() == Instruction::Sub) 1026 return; 1027 1028 ReassociateExpression(I); 1029} 1030 1031Value *Reassociate::ReassociateExpression(BinaryOperator *I) { 1032 1033 // First, walk the expression tree, linearizing the tree, collecting the 1034 // operand information. 1035 SmallVector<ValueEntry, 8> Ops; 1036 LinearizeExprTree(I, Ops); 1037 1038 DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n'); 1039 1040 // Now that we have linearized the tree to a list and have gathered all of 1041 // the operands and their ranks, sort the operands by their rank. Use a 1042 // stable_sort so that values with equal ranks will have their relative 1043 // positions maintained (and so the compiler is deterministic). Note that 1044 // this sorts so that the highest ranking values end up at the beginning of 1045 // the vector. 1046 std::stable_sort(Ops.begin(), Ops.end()); 1047 1048 // OptimizeExpression - Now that we have the expression tree in a convenient 1049 // sorted form, optimize it globally if possible. 1050 if (Value *V = OptimizeExpression(I, Ops)) { 1051 // This expression tree simplified to something that isn't a tree, 1052 // eliminate it. 1053 DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n'); 1054 I->replaceAllUsesWith(V); 1055 RemoveDeadBinaryOp(I); 1056 ++NumAnnihil; 1057 return V; 1058 } 1059 1060 // We want to sink immediates as deeply as possible except in the case where 1061 // this is a multiply tree used only by an add, and the immediate is a -1. 1062 // In this case we reassociate to put the negation on the outside so that we 1063 // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y 1064 if (I->getOpcode() == Instruction::Mul && I->hasOneUse() && 1065 cast<Instruction>(I->use_back())->getOpcode() == Instruction::Add && 1066 isa<ConstantInt>(Ops.back().Op) && 1067 cast<ConstantInt>(Ops.back().Op)->isAllOnesValue()) { 1068 ValueEntry Tmp = Ops.pop_back_val(); 1069 Ops.insert(Ops.begin(), Tmp); 1070 } 1071 1072 DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n'); 1073 1074 if (Ops.size() == 1) { 1075 // This expression tree simplified to something that isn't a tree, 1076 // eliminate it. 1077 I->replaceAllUsesWith(Ops[0].Op); 1078 RemoveDeadBinaryOp(I); 1079 return Ops[0].Op; 1080 } 1081 1082 // Now that we ordered and optimized the expressions, splat them back into 1083 // the expression tree, removing any unneeded nodes. 1084 RewriteExprTree(I, Ops); 1085 return I; 1086} 1087 1088 1089bool Reassociate::runOnFunction(Function &F) { 1090 // Recalculate the rank map for F 1091 BuildRankMap(F); 1092 1093 MadeChange = false; 1094 for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) 1095 for (BasicBlock::iterator BBI = FI->begin(); BBI != FI->end(); ) 1096 ReassociateInst(BBI); 1097 1098 // Now that we're done, revisit any instructions which are likely to 1099 // have secondary reassociation opportunities. 1100 while (!RedoInsts.empty()) 1101 if (Value *V = RedoInsts.pop_back_val()) { 1102 BasicBlock::iterator BBI = cast<Instruction>(V); 1103 ReassociateInst(BBI); 1104 } 1105 1106 // Now that we're done, delete any instructions which are no longer used. 1107 while (!DeadInsts.empty()) 1108 if (Value *V = DeadInsts.pop_back_val()) 1109 RecursivelyDeleteTriviallyDeadInstructions(V); 1110 1111 // We are done with the rank map. 1112 RankMap.clear(); 1113 ValueRankMap.clear(); 1114 return MadeChange; 1115} 1116 1117