1//===- Reassociate.cpp - Reassociate binary expressions -------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass reassociates commutative expressions in an order that is designed 11// to promote better constant propagation, GCSE, LICM, PRE, etc. 12// 13// For example: 4 + (x + 5) -> x + (4 + 5) 14// 15// In the implementation of this algorithm, constants are assigned rank = 0, 16// function arguments are rank = 1, and other values are assigned ranks 17// corresponding to the reverse post order traversal of current function 18// (starting at 2), which effectively gives values in deep loops higher rank 19// than values not in loops. 20// 21//===----------------------------------------------------------------------===// 22 23#define DEBUG_TYPE "reassociate" 24#include "llvm/Transforms/Scalar.h" 25#include "llvm/Transforms/Utils/Local.h" 26#include "llvm/Constants.h" 27#include "llvm/DerivedTypes.h" 28#include "llvm/Function.h" 29#include "llvm/Instructions.h" 30#include "llvm/IntrinsicInst.h" 31#include "llvm/Pass.h" 32#include "llvm/Assembly/Writer.h" 33#include "llvm/Support/CFG.h" 34#include "llvm/Support/Debug.h" 35#include "llvm/Support/ValueHandle.h" 36#include "llvm/Support/raw_ostream.h" 37#include "llvm/ADT/PostOrderIterator.h" 38#include "llvm/ADT/Statistic.h" 39#include "llvm/ADT/DenseMap.h" 40#include <algorithm> 41using namespace llvm; 42 43STATISTIC(NumLinear , "Number of insts linearized"); 44STATISTIC(NumChanged, "Number of insts reassociated"); 45STATISTIC(NumAnnihil, "Number of expr tree annihilated"); 46STATISTIC(NumFactor , "Number of multiplies factored"); 47 48namespace { 49 struct ValueEntry { 50 unsigned Rank; 51 Value *Op; 52 ValueEntry(unsigned R, Value *O) : Rank(R), Op(O) {} 53 }; 54 inline bool operator<(const ValueEntry &LHS, const ValueEntry &RHS) { 55 return LHS.Rank > RHS.Rank; // Sort so that highest rank goes to start. 56 } 57} 58 59#ifndef NDEBUG 60/// PrintOps - Print out the expression identified in the Ops list. 61/// 62static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) { 63 Module *M = I->getParent()->getParent()->getParent(); 64 dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " " 65 << *Ops[0].Op->getType() << '\t'; 66 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 67 dbgs() << "[ "; 68 WriteAsOperand(dbgs(), Ops[i].Op, false, M); 69 dbgs() << ", #" << Ops[i].Rank << "] "; 70 } 71} 72#endif 73 74namespace { 75 class Reassociate : public FunctionPass { 76 DenseMap<BasicBlock*, unsigned> RankMap; 77 DenseMap<AssertingVH<>, unsigned> ValueRankMap; 78 SmallVector<WeakVH, 8> RedoInsts; 79 SmallVector<WeakVH, 8> DeadInsts; 80 bool MadeChange; 81 public: 82 static char ID; // Pass identification, replacement for typeid 83 Reassociate() : FunctionPass(ID) { 84 initializeReassociatePass(*PassRegistry::getPassRegistry()); 85 } 86 87 bool runOnFunction(Function &F); 88 89 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 90 AU.setPreservesCFG(); 91 } 92 private: 93 void BuildRankMap(Function &F); 94 unsigned getRank(Value *V); 95 Value *ReassociateExpression(BinaryOperator *I); 96 void RewriteExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops, 97 unsigned Idx = 0); 98 Value *OptimizeExpression(BinaryOperator *I, 99 SmallVectorImpl<ValueEntry> &Ops); 100 Value *OptimizeAdd(Instruction *I, SmallVectorImpl<ValueEntry> &Ops); 101 void LinearizeExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops); 102 void LinearizeExpr(BinaryOperator *I); 103 Value *RemoveFactorFromExpression(Value *V, Value *Factor); 104 void ReassociateInst(BasicBlock::iterator &BBI); 105 106 void RemoveDeadBinaryOp(Value *V); 107 }; 108} 109 110char Reassociate::ID = 0; 111INITIALIZE_PASS(Reassociate, "reassociate", 112 "Reassociate expressions", false, false) 113 114// Public interface to the Reassociate pass 115FunctionPass *llvm::createReassociatePass() { return new Reassociate(); } 116 117void Reassociate::RemoveDeadBinaryOp(Value *V) { 118 Instruction *Op = dyn_cast<Instruction>(V); 119 if (!Op || !isa<BinaryOperator>(Op)) 120 return; 121 122 Value *LHS = Op->getOperand(0), *RHS = Op->getOperand(1); 123 124 ValueRankMap.erase(Op); 125 DeadInsts.push_back(Op); 126 RemoveDeadBinaryOp(LHS); 127 RemoveDeadBinaryOp(RHS); 128} 129 130 131static bool isUnmovableInstruction(Instruction *I) { 132 if (I->getOpcode() == Instruction::PHI || 133 I->getOpcode() == Instruction::Alloca || 134 I->getOpcode() == Instruction::Load || 135 I->getOpcode() == Instruction::Invoke || 136 (I->getOpcode() == Instruction::Call && 137 !isa<DbgInfoIntrinsic>(I)) || 138 I->getOpcode() == Instruction::UDiv || 139 I->getOpcode() == Instruction::SDiv || 140 I->getOpcode() == Instruction::FDiv || 141 I->getOpcode() == Instruction::URem || 142 I->getOpcode() == Instruction::SRem || 143 I->getOpcode() == Instruction::FRem) 144 return true; 145 return false; 146} 147 148void Reassociate::BuildRankMap(Function &F) { 149 unsigned i = 2; 150 151 // Assign distinct ranks to function arguments 152 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) 153 ValueRankMap[&*I] = ++i; 154 155 ReversePostOrderTraversal<Function*> RPOT(&F); 156 for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(), 157 E = RPOT.end(); I != E; ++I) { 158 BasicBlock *BB = *I; 159 unsigned BBRank = RankMap[BB] = ++i << 16; 160 161 // Walk the basic block, adding precomputed ranks for any instructions that 162 // we cannot move. This ensures that the ranks for these instructions are 163 // all different in the block. 164 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) 165 if (isUnmovableInstruction(I)) 166 ValueRankMap[&*I] = ++BBRank; 167 } 168} 169 170unsigned Reassociate::getRank(Value *V) { 171 Instruction *I = dyn_cast<Instruction>(V); 172 if (I == 0) { 173 if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument. 174 return 0; // Otherwise it's a global or constant, rank 0. 175 } 176 177 if (unsigned Rank = ValueRankMap[I]) 178 return Rank; // Rank already known? 179 180 // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that 181 // we can reassociate expressions for code motion! Since we do not recurse 182 // for PHI nodes, we cannot have infinite recursion here, because there 183 // cannot be loops in the value graph that do not go through PHI nodes. 184 unsigned Rank = 0, MaxRank = RankMap[I->getParent()]; 185 for (unsigned i = 0, e = I->getNumOperands(); 186 i != e && Rank != MaxRank; ++i) 187 Rank = std::max(Rank, getRank(I->getOperand(i))); 188 189 // If this is a not or neg instruction, do not count it for rank. This 190 // assures us that X and ~X will have the same rank. 191 if (!I->getType()->isIntegerTy() || 192 (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I))) 193 ++Rank; 194 195 //DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " 196 // << Rank << "\n"); 197 198 return ValueRankMap[I] = Rank; 199} 200 201/// isReassociableOp - Return true if V is an instruction of the specified 202/// opcode and if it only has one use. 203static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) { 204 if ((V->hasOneUse() || V->use_empty()) && isa<Instruction>(V) && 205 cast<Instruction>(V)->getOpcode() == Opcode) 206 return cast<BinaryOperator>(V); 207 return 0; 208} 209 210/// LowerNegateToMultiply - Replace 0-X with X*-1. 211/// 212static Instruction *LowerNegateToMultiply(Instruction *Neg, 213 DenseMap<AssertingVH<>, unsigned> &ValueRankMap) { 214 Constant *Cst = Constant::getAllOnesValue(Neg->getType()); 215 216 Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg); 217 ValueRankMap.erase(Neg); 218 Res->takeName(Neg); 219 Neg->replaceAllUsesWith(Res); 220 Res->setDebugLoc(Neg->getDebugLoc()); 221 Neg->eraseFromParent(); 222 return Res; 223} 224 225// Given an expression of the form '(A+B)+(D+C)', turn it into '(((A+B)+C)+D)'. 226// Note that if D is also part of the expression tree that we recurse to 227// linearize it as well. Besides that case, this does not recurse into A,B, or 228// C. 229void Reassociate::LinearizeExpr(BinaryOperator *I) { 230 BinaryOperator *LHS = cast<BinaryOperator>(I->getOperand(0)); 231 BinaryOperator *RHS = cast<BinaryOperator>(I->getOperand(1)); 232 assert(isReassociableOp(LHS, I->getOpcode()) && 233 isReassociableOp(RHS, I->getOpcode()) && 234 "Not an expression that needs linearization?"); 235 236 DEBUG(dbgs() << "Linear" << *LHS << '\n' << *RHS << '\n' << *I << '\n'); 237 238 // Move the RHS instruction to live immediately before I, avoiding breaking 239 // dominator properties. 240 RHS->moveBefore(I); 241 242 // Move operands around to do the linearization. 243 I->setOperand(1, RHS->getOperand(0)); 244 RHS->setOperand(0, LHS); 245 I->setOperand(0, RHS); 246 247 // Conservatively clear all the optional flags, which may not hold 248 // after the reassociation. 249 I->clearSubclassOptionalData(); 250 LHS->clearSubclassOptionalData(); 251 RHS->clearSubclassOptionalData(); 252 253 ++NumLinear; 254 MadeChange = true; 255 DEBUG(dbgs() << "Linearized: " << *I << '\n'); 256 257 // If D is part of this expression tree, tail recurse. 258 if (isReassociableOp(I->getOperand(1), I->getOpcode())) 259 LinearizeExpr(I); 260} 261 262 263/// LinearizeExprTree - Given an associative binary expression tree, traverse 264/// all of the uses putting it into canonical form. This forces a left-linear 265/// form of the expression (((a+b)+c)+d), and collects information about the 266/// rank of the non-tree operands. 267/// 268/// NOTE: These intentionally destroys the expression tree operands (turning 269/// them into undef values) to reduce #uses of the values. This means that the 270/// caller MUST use something like RewriteExprTree to put the values back in. 271/// 272void Reassociate::LinearizeExprTree(BinaryOperator *I, 273 SmallVectorImpl<ValueEntry> &Ops) { 274 Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); 275 unsigned Opcode = I->getOpcode(); 276 277 // First step, linearize the expression if it is in ((A+B)+(C+D)) form. 278 BinaryOperator *LHSBO = isReassociableOp(LHS, Opcode); 279 BinaryOperator *RHSBO = isReassociableOp(RHS, Opcode); 280 281 // If this is a multiply expression tree and it contains internal negations, 282 // transform them into multiplies by -1 so they can be reassociated. 283 if (I->getOpcode() == Instruction::Mul) { 284 if (!LHSBO && LHS->hasOneUse() && BinaryOperator::isNeg(LHS)) { 285 LHS = LowerNegateToMultiply(cast<Instruction>(LHS), ValueRankMap); 286 LHSBO = isReassociableOp(LHS, Opcode); 287 } 288 if (!RHSBO && RHS->hasOneUse() && BinaryOperator::isNeg(RHS)) { 289 RHS = LowerNegateToMultiply(cast<Instruction>(RHS), ValueRankMap); 290 RHSBO = isReassociableOp(RHS, Opcode); 291 } 292 } 293 294 if (!LHSBO) { 295 if (!RHSBO) { 296 // Neither the LHS or RHS as part of the tree, thus this is a leaf. As 297 // such, just remember these operands and their rank. 298 Ops.push_back(ValueEntry(getRank(LHS), LHS)); 299 Ops.push_back(ValueEntry(getRank(RHS), RHS)); 300 301 // Clear the leaves out. 302 I->setOperand(0, UndefValue::get(I->getType())); 303 I->setOperand(1, UndefValue::get(I->getType())); 304 return; 305 } 306 307 // Turn X+(Y+Z) -> (Y+Z)+X 308 std::swap(LHSBO, RHSBO); 309 std::swap(LHS, RHS); 310 bool Success = !I->swapOperands(); 311 assert(Success && "swapOperands failed"); 312 (void)Success; 313 MadeChange = true; 314 } else if (RHSBO) { 315 // Turn (A+B)+(C+D) -> (((A+B)+C)+D). This guarantees the RHS is not 316 // part of the expression tree. 317 LinearizeExpr(I); 318 LHS = LHSBO = cast<BinaryOperator>(I->getOperand(0)); 319 RHS = I->getOperand(1); 320 RHSBO = 0; 321 } 322 323 // Okay, now we know that the LHS is a nested expression and that the RHS is 324 // not. Perform reassociation. 325 assert(!isReassociableOp(RHS, Opcode) && "LinearizeExpr failed!"); 326 327 // Move LHS right before I to make sure that the tree expression dominates all 328 // values. 329 LHSBO->moveBefore(I); 330 331 // Linearize the expression tree on the LHS. 332 LinearizeExprTree(LHSBO, Ops); 333 334 // Remember the RHS operand and its rank. 335 Ops.push_back(ValueEntry(getRank(RHS), RHS)); 336 337 // Clear the RHS leaf out. 338 I->setOperand(1, UndefValue::get(I->getType())); 339} 340 341// RewriteExprTree - Now that the operands for this expression tree are 342// linearized and optimized, emit them in-order. This function is written to be 343// tail recursive. 344void Reassociate::RewriteExprTree(BinaryOperator *I, 345 SmallVectorImpl<ValueEntry> &Ops, 346 unsigned i) { 347 if (i+2 == Ops.size()) { 348 if (I->getOperand(0) != Ops[i].Op || 349 I->getOperand(1) != Ops[i+1].Op) { 350 Value *OldLHS = I->getOperand(0); 351 DEBUG(dbgs() << "RA: " << *I << '\n'); 352 I->setOperand(0, Ops[i].Op); 353 I->setOperand(1, Ops[i+1].Op); 354 355 // Clear all the optional flags, which may not hold after the 356 // reassociation if the expression involved more than just this operation. 357 if (Ops.size() != 2) 358 I->clearSubclassOptionalData(); 359 360 DEBUG(dbgs() << "TO: " << *I << '\n'); 361 MadeChange = true; 362 ++NumChanged; 363 364 // If we reassociated a tree to fewer operands (e.g. (1+a+2) -> (a+3) 365 // delete the extra, now dead, nodes. 366 RemoveDeadBinaryOp(OldLHS); 367 } 368 return; 369 } 370 assert(i+2 < Ops.size() && "Ops index out of range!"); 371 372 if (I->getOperand(1) != Ops[i].Op) { 373 DEBUG(dbgs() << "RA: " << *I << '\n'); 374 I->setOperand(1, Ops[i].Op); 375 376 // Conservatively clear all the optional flags, which may not hold 377 // after the reassociation. 378 I->clearSubclassOptionalData(); 379 380 DEBUG(dbgs() << "TO: " << *I << '\n'); 381 MadeChange = true; 382 ++NumChanged; 383 } 384 385 BinaryOperator *LHS = cast<BinaryOperator>(I->getOperand(0)); 386 assert(LHS->getOpcode() == I->getOpcode() && 387 "Improper expression tree!"); 388 389 // Compactify the tree instructions together with each other to guarantee 390 // that the expression tree is dominated by all of Ops. 391 LHS->moveBefore(I); 392 RewriteExprTree(LHS, Ops, i+1); 393} 394 395 396 397// NegateValue - Insert instructions before the instruction pointed to by BI, 398// that computes the negative version of the value specified. The negative 399// version of the value is returned, and BI is left pointing at the instruction 400// that should be processed next by the reassociation pass. 401// 402static Value *NegateValue(Value *V, Instruction *BI) { 403 if (Constant *C = dyn_cast<Constant>(V)) 404 return ConstantExpr::getNeg(C); 405 406 // We are trying to expose opportunity for reassociation. One of the things 407 // that we want to do to achieve this is to push a negation as deep into an 408 // expression chain as possible, to expose the add instructions. In practice, 409 // this means that we turn this: 410 // X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D 411 // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate 412 // the constants. We assume that instcombine will clean up the mess later if 413 // we introduce tons of unnecessary negation instructions. 414 // 415 if (Instruction *I = dyn_cast<Instruction>(V)) 416 if (I->getOpcode() == Instruction::Add && I->hasOneUse()) { 417 // Push the negates through the add. 418 I->setOperand(0, NegateValue(I->getOperand(0), BI)); 419 I->setOperand(1, NegateValue(I->getOperand(1), BI)); 420 421 // We must move the add instruction here, because the neg instructions do 422 // not dominate the old add instruction in general. By moving it, we are 423 // assured that the neg instructions we just inserted dominate the 424 // instruction we are about to insert after them. 425 // 426 I->moveBefore(BI); 427 I->setName(I->getName()+".neg"); 428 return I; 429 } 430 431 // Okay, we need to materialize a negated version of V with an instruction. 432 // Scan the use lists of V to see if we have one already. 433 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ 434 User *U = *UI; 435 if (!BinaryOperator::isNeg(U)) continue; 436 437 // We found one! Now we have to make sure that the definition dominates 438 // this use. We do this by moving it to the entry block (if it is a 439 // non-instruction value) or right after the definition. These negates will 440 // be zapped by reassociate later, so we don't need much finesse here. 441 BinaryOperator *TheNeg = cast<BinaryOperator>(U); 442 443 // Verify that the negate is in this function, V might be a constant expr. 444 if (TheNeg->getParent()->getParent() != BI->getParent()->getParent()) 445 continue; 446 447 BasicBlock::iterator InsertPt; 448 if (Instruction *InstInput = dyn_cast<Instruction>(V)) { 449 if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) { 450 InsertPt = II->getNormalDest()->begin(); 451 } else { 452 InsertPt = InstInput; 453 ++InsertPt; 454 } 455 while (isa<PHINode>(InsertPt)) ++InsertPt; 456 } else { 457 InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin(); 458 } 459 TheNeg->moveBefore(InsertPt); 460 return TheNeg; 461 } 462 463 // Insert a 'neg' instruction that subtracts the value from zero to get the 464 // negation. 465 return BinaryOperator::CreateNeg(V, V->getName() + ".neg", BI); 466} 467 468/// ShouldBreakUpSubtract - Return true if we should break up this subtract of 469/// X-Y into (X + -Y). 470static bool ShouldBreakUpSubtract(Instruction *Sub) { 471 // If this is a negation, we can't split it up! 472 if (BinaryOperator::isNeg(Sub)) 473 return false; 474 475 // Don't bother to break this up unless either the LHS is an associable add or 476 // subtract or if this is only used by one. 477 if (isReassociableOp(Sub->getOperand(0), Instruction::Add) || 478 isReassociableOp(Sub->getOperand(0), Instruction::Sub)) 479 return true; 480 if (isReassociableOp(Sub->getOperand(1), Instruction::Add) || 481 isReassociableOp(Sub->getOperand(1), Instruction::Sub)) 482 return true; 483 if (Sub->hasOneUse() && 484 (isReassociableOp(Sub->use_back(), Instruction::Add) || 485 isReassociableOp(Sub->use_back(), Instruction::Sub))) 486 return true; 487 488 return false; 489} 490 491/// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is 492/// only used by an add, transform this into (X+(0-Y)) to promote better 493/// reassociation. 494static Instruction *BreakUpSubtract(Instruction *Sub, 495 DenseMap<AssertingVH<>, unsigned> &ValueRankMap) { 496 // Convert a subtract into an add and a neg instruction. This allows sub 497 // instructions to be commuted with other add instructions. 498 // 499 // Calculate the negative value of Operand 1 of the sub instruction, 500 // and set it as the RHS of the add instruction we just made. 501 // 502 Value *NegVal = NegateValue(Sub->getOperand(1), Sub); 503 Instruction *New = 504 BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub); 505 New->takeName(Sub); 506 507 // Everyone now refers to the add instruction. 508 ValueRankMap.erase(Sub); 509 Sub->replaceAllUsesWith(New); 510 New->setDebugLoc(Sub->getDebugLoc()); 511 Sub->eraseFromParent(); 512 513 DEBUG(dbgs() << "Negated: " << *New << '\n'); 514 return New; 515} 516 517/// ConvertShiftToMul - If this is a shift of a reassociable multiply or is used 518/// by one, change this into a multiply by a constant to assist with further 519/// reassociation. 520static Instruction *ConvertShiftToMul(Instruction *Shl, 521 DenseMap<AssertingVH<>, unsigned> &ValueRankMap) { 522 // If an operand of this shift is a reassociable multiply, or if the shift 523 // is used by a reassociable multiply or add, turn into a multiply. 524 if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) || 525 (Shl->hasOneUse() && 526 (isReassociableOp(Shl->use_back(), Instruction::Mul) || 527 isReassociableOp(Shl->use_back(), Instruction::Add)))) { 528 Constant *MulCst = ConstantInt::get(Shl->getType(), 1); 529 MulCst = ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1))); 530 531 Instruction *Mul = 532 BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl); 533 ValueRankMap.erase(Shl); 534 Mul->takeName(Shl); 535 Shl->replaceAllUsesWith(Mul); 536 Mul->setDebugLoc(Shl->getDebugLoc()); 537 Shl->eraseFromParent(); 538 return Mul; 539 } 540 return 0; 541} 542 543// Scan backwards and forwards among values with the same rank as element i to 544// see if X exists. If X does not exist, return i. This is useful when 545// scanning for 'x' when we see '-x' because they both get the same rank. 546static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i, 547 Value *X) { 548 unsigned XRank = Ops[i].Rank; 549 unsigned e = Ops.size(); 550 for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j) 551 if (Ops[j].Op == X) 552 return j; 553 // Scan backwards. 554 for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j) 555 if (Ops[j].Op == X) 556 return j; 557 return i; 558} 559 560/// EmitAddTreeOfValues - Emit a tree of add instructions, summing Ops together 561/// and returning the result. Insert the tree before I. 562static Value *EmitAddTreeOfValues(Instruction *I, SmallVectorImpl<Value*> &Ops){ 563 if (Ops.size() == 1) return Ops.back(); 564 565 Value *V1 = Ops.back(); 566 Ops.pop_back(); 567 Value *V2 = EmitAddTreeOfValues(I, Ops); 568 return BinaryOperator::CreateAdd(V2, V1, "tmp", I); 569} 570 571/// RemoveFactorFromExpression - If V is an expression tree that is a 572/// multiplication sequence, and if this sequence contains a multiply by Factor, 573/// remove Factor from the tree and return the new tree. 574Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) { 575 BinaryOperator *BO = isReassociableOp(V, Instruction::Mul); 576 if (!BO) return 0; 577 578 SmallVector<ValueEntry, 8> Factors; 579 LinearizeExprTree(BO, Factors); 580 581 bool FoundFactor = false; 582 bool NeedsNegate = false; 583 for (unsigned i = 0, e = Factors.size(); i != e; ++i) { 584 if (Factors[i].Op == Factor) { 585 FoundFactor = true; 586 Factors.erase(Factors.begin()+i); 587 break; 588 } 589 590 // If this is a negative version of this factor, remove it. 591 if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor)) 592 if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op)) 593 if (FC1->getValue() == -FC2->getValue()) { 594 FoundFactor = NeedsNegate = true; 595 Factors.erase(Factors.begin()+i); 596 break; 597 } 598 } 599 600 if (!FoundFactor) { 601 // Make sure to restore the operands to the expression tree. 602 RewriteExprTree(BO, Factors); 603 return 0; 604 } 605 606 BasicBlock::iterator InsertPt = BO; ++InsertPt; 607 608 // If this was just a single multiply, remove the multiply and return the only 609 // remaining operand. 610 if (Factors.size() == 1) { 611 ValueRankMap.erase(BO); 612 DeadInsts.push_back(BO); 613 V = Factors[0].Op; 614 } else { 615 RewriteExprTree(BO, Factors); 616 V = BO; 617 } 618 619 if (NeedsNegate) 620 V = BinaryOperator::CreateNeg(V, "neg", InsertPt); 621 622 return V; 623} 624 625/// FindSingleUseMultiplyFactors - If V is a single-use multiply, recursively 626/// add its operands as factors, otherwise add V to the list of factors. 627/// 628/// Ops is the top-level list of add operands we're trying to factor. 629static void FindSingleUseMultiplyFactors(Value *V, 630 SmallVectorImpl<Value*> &Factors, 631 const SmallVectorImpl<ValueEntry> &Ops, 632 bool IsRoot) { 633 BinaryOperator *BO; 634 if (!(V->hasOneUse() || V->use_empty()) || // More than one use. 635 !(BO = dyn_cast<BinaryOperator>(V)) || 636 BO->getOpcode() != Instruction::Mul) { 637 Factors.push_back(V); 638 return; 639 } 640 641 // If this value has a single use because it is another input to the add 642 // tree we're reassociating and we dropped its use, it actually has two 643 // uses and we can't factor it. 644 if (!IsRoot) { 645 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 646 if (Ops[i].Op == V) { 647 Factors.push_back(V); 648 return; 649 } 650 } 651 652 653 // Otherwise, add the LHS and RHS to the list of factors. 654 FindSingleUseMultiplyFactors(BO->getOperand(1), Factors, Ops, false); 655 FindSingleUseMultiplyFactors(BO->getOperand(0), Factors, Ops, false); 656} 657 658/// OptimizeAndOrXor - Optimize a series of operands to an 'and', 'or', or 'xor' 659/// instruction. This optimizes based on identities. If it can be reduced to 660/// a single Value, it is returned, otherwise the Ops list is mutated as 661/// necessary. 662static Value *OptimizeAndOrXor(unsigned Opcode, 663 SmallVectorImpl<ValueEntry> &Ops) { 664 // Scan the operand lists looking for X and ~X pairs, along with X,X pairs. 665 // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1. 666 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 667 // First, check for X and ~X in the operand list. 668 assert(i < Ops.size()); 669 if (BinaryOperator::isNot(Ops[i].Op)) { // Cannot occur for ^. 670 Value *X = BinaryOperator::getNotArgument(Ops[i].Op); 671 unsigned FoundX = FindInOperandList(Ops, i, X); 672 if (FoundX != i) { 673 if (Opcode == Instruction::And) // ...&X&~X = 0 674 return Constant::getNullValue(X->getType()); 675 676 if (Opcode == Instruction::Or) // ...|X|~X = -1 677 return Constant::getAllOnesValue(X->getType()); 678 } 679 } 680 681 // Next, check for duplicate pairs of values, which we assume are next to 682 // each other, due to our sorting criteria. 683 assert(i < Ops.size()); 684 if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) { 685 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 686 // Drop duplicate values for And and Or. 687 Ops.erase(Ops.begin()+i); 688 --i; --e; 689 ++NumAnnihil; 690 continue; 691 } 692 693 // Drop pairs of values for Xor. 694 assert(Opcode == Instruction::Xor); 695 if (e == 2) 696 return Constant::getNullValue(Ops[0].Op->getType()); 697 698 // Y ^ X^X -> Y 699 Ops.erase(Ops.begin()+i, Ops.begin()+i+2); 700 i -= 1; e -= 2; 701 ++NumAnnihil; 702 } 703 } 704 return 0; 705} 706 707/// OptimizeAdd - Optimize a series of operands to an 'add' instruction. This 708/// optimizes based on identities. If it can be reduced to a single Value, it 709/// is returned, otherwise the Ops list is mutated as necessary. 710Value *Reassociate::OptimizeAdd(Instruction *I, 711 SmallVectorImpl<ValueEntry> &Ops) { 712 // Scan the operand lists looking for X and -X pairs. If we find any, we 713 // can simplify the expression. X+-X == 0. While we're at it, scan for any 714 // duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z. 715 // 716 // TODO: We could handle "X + ~X" -> "-1" if we wanted, since "-X = ~X+1". 717 // 718 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 719 Value *TheOp = Ops[i].Op; 720 // Check to see if we've seen this operand before. If so, we factor all 721 // instances of the operand together. Due to our sorting criteria, we know 722 // that these need to be next to each other in the vector. 723 if (i+1 != Ops.size() && Ops[i+1].Op == TheOp) { 724 // Rescan the list, remove all instances of this operand from the expr. 725 unsigned NumFound = 0; 726 do { 727 Ops.erase(Ops.begin()+i); 728 ++NumFound; 729 } while (i != Ops.size() && Ops[i].Op == TheOp); 730 731 DEBUG(errs() << "\nFACTORING [" << NumFound << "]: " << *TheOp << '\n'); 732 ++NumFactor; 733 734 // Insert a new multiply. 735 Value *Mul = ConstantInt::get(cast<IntegerType>(I->getType()), NumFound); 736 Mul = BinaryOperator::CreateMul(TheOp, Mul, "factor", I); 737 738 // Now that we have inserted a multiply, optimize it. This allows us to 739 // handle cases that require multiple factoring steps, such as this: 740 // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6 741 RedoInsts.push_back(Mul); 742 743 // If every add operand was a duplicate, return the multiply. 744 if (Ops.empty()) 745 return Mul; 746 747 // Otherwise, we had some input that didn't have the dupe, such as 748 // "A + A + B" -> "A*2 + B". Add the new multiply to the list of 749 // things being added by this operation. 750 Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul)); 751 752 --i; 753 e = Ops.size(); 754 continue; 755 } 756 757 // Check for X and -X in the operand list. 758 if (!BinaryOperator::isNeg(TheOp)) 759 continue; 760 761 Value *X = BinaryOperator::getNegArgument(TheOp); 762 unsigned FoundX = FindInOperandList(Ops, i, X); 763 if (FoundX == i) 764 continue; 765 766 // Remove X and -X from the operand list. 767 if (Ops.size() == 2) 768 return Constant::getNullValue(X->getType()); 769 770 Ops.erase(Ops.begin()+i); 771 if (i < FoundX) 772 --FoundX; 773 else 774 --i; // Need to back up an extra one. 775 Ops.erase(Ops.begin()+FoundX); 776 ++NumAnnihil; 777 --i; // Revisit element. 778 e -= 2; // Removed two elements. 779 } 780 781 // Scan the operand list, checking to see if there are any common factors 782 // between operands. Consider something like A*A+A*B*C+D. We would like to 783 // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies. 784 // To efficiently find this, we count the number of times a factor occurs 785 // for any ADD operands that are MULs. 786 DenseMap<Value*, unsigned> FactorOccurrences; 787 788 // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4) 789 // where they are actually the same multiply. 790 unsigned MaxOcc = 0; 791 Value *MaxOccVal = 0; 792 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 793 BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op); 794 if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty()) 795 continue; 796 797 // Compute all of the factors of this added value. 798 SmallVector<Value*, 8> Factors; 799 FindSingleUseMultiplyFactors(BOp, Factors, Ops, true); 800 assert(Factors.size() > 1 && "Bad linearize!"); 801 802 // Add one to FactorOccurrences for each unique factor in this op. 803 SmallPtrSet<Value*, 8> Duplicates; 804 for (unsigned i = 0, e = Factors.size(); i != e; ++i) { 805 Value *Factor = Factors[i]; 806 if (!Duplicates.insert(Factor)) continue; 807 808 unsigned Occ = ++FactorOccurrences[Factor]; 809 if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; } 810 811 // If Factor is a negative constant, add the negated value as a factor 812 // because we can percolate the negate out. Watch for minint, which 813 // cannot be positivified. 814 if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) 815 if (CI->isNegative() && !CI->isMinValue(true)) { 816 Factor = ConstantInt::get(CI->getContext(), -CI->getValue()); 817 assert(!Duplicates.count(Factor) && 818 "Shouldn't have two constant factors, missed a canonicalize"); 819 820 unsigned Occ = ++FactorOccurrences[Factor]; 821 if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; } 822 } 823 } 824 } 825 826 // If any factor occurred more than one time, we can pull it out. 827 if (MaxOcc > 1) { 828 DEBUG(errs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << '\n'); 829 ++NumFactor; 830 831 // Create a new instruction that uses the MaxOccVal twice. If we don't do 832 // this, we could otherwise run into situations where removing a factor 833 // from an expression will drop a use of maxocc, and this can cause 834 // RemoveFactorFromExpression on successive values to behave differently. 835 Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal); 836 SmallVector<Value*, 4> NewMulOps; 837 for (unsigned i = 0; i != Ops.size(); ++i) { 838 // Only try to remove factors from expressions we're allowed to. 839 BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op); 840 if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty()) 841 continue; 842 843 if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) { 844 // The factorized operand may occur several times. Convert them all in 845 // one fell swoop. 846 for (unsigned j = Ops.size(); j != i;) { 847 --j; 848 if (Ops[j].Op == Ops[i].Op) { 849 NewMulOps.push_back(V); 850 Ops.erase(Ops.begin()+j); 851 } 852 } 853 --i; 854 } 855 } 856 857 // No need for extra uses anymore. 858 delete DummyInst; 859 860 unsigned NumAddedValues = NewMulOps.size(); 861 Value *V = EmitAddTreeOfValues(I, NewMulOps); 862 863 // Now that we have inserted the add tree, optimize it. This allows us to 864 // handle cases that require multiple factoring steps, such as this: 865 // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C)) 866 assert(NumAddedValues > 1 && "Each occurrence should contribute a value"); 867 (void)NumAddedValues; 868 V = ReassociateExpression(cast<BinaryOperator>(V)); 869 870 // Create the multiply. 871 Value *V2 = BinaryOperator::CreateMul(V, MaxOccVal, "tmp", I); 872 873 // Rerun associate on the multiply in case the inner expression turned into 874 // a multiply. We want to make sure that we keep things in canonical form. 875 V2 = ReassociateExpression(cast<BinaryOperator>(V2)); 876 877 // If every add operand included the factor (e.g. "A*B + A*C"), then the 878 // entire result expression is just the multiply "A*(B+C)". 879 if (Ops.empty()) 880 return V2; 881 882 // Otherwise, we had some input that didn't have the factor, such as 883 // "A*B + A*C + D" -> "A*(B+C) + D". Add the new multiply to the list of 884 // things being added by this operation. 885 Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2)); 886 } 887 888 return 0; 889} 890 891Value *Reassociate::OptimizeExpression(BinaryOperator *I, 892 SmallVectorImpl<ValueEntry> &Ops) { 893 // Now that we have the linearized expression tree, try to optimize it. 894 // Start by folding any constants that we found. 895 bool IterateOptimization = false; 896 if (Ops.size() == 1) return Ops[0].Op; 897 898 unsigned Opcode = I->getOpcode(); 899 900 if (Constant *V1 = dyn_cast<Constant>(Ops[Ops.size()-2].Op)) 901 if (Constant *V2 = dyn_cast<Constant>(Ops.back().Op)) { 902 Ops.pop_back(); 903 Ops.back().Op = ConstantExpr::get(Opcode, V1, V2); 904 return OptimizeExpression(I, Ops); 905 } 906 907 // Check for destructive annihilation due to a constant being used. 908 if (ConstantInt *CstVal = dyn_cast<ConstantInt>(Ops.back().Op)) 909 switch (Opcode) { 910 default: break; 911 case Instruction::And: 912 if (CstVal->isZero()) // X & 0 -> 0 913 return CstVal; 914 if (CstVal->isAllOnesValue()) // X & -1 -> X 915 Ops.pop_back(); 916 break; 917 case Instruction::Mul: 918 if (CstVal->isZero()) { // X * 0 -> 0 919 ++NumAnnihil; 920 return CstVal; 921 } 922 923 if (cast<ConstantInt>(CstVal)->isOne()) 924 Ops.pop_back(); // X * 1 -> X 925 break; 926 case Instruction::Or: 927 if (CstVal->isAllOnesValue()) // X | -1 -> -1 928 return CstVal; 929 // FALLTHROUGH! 930 case Instruction::Add: 931 case Instruction::Xor: 932 if (CstVal->isZero()) // X [|^+] 0 -> X 933 Ops.pop_back(); 934 break; 935 } 936 if (Ops.size() == 1) return Ops[0].Op; 937 938 // Handle destructive annihilation due to identities between elements in the 939 // argument list here. 940 switch (Opcode) { 941 default: break; 942 case Instruction::And: 943 case Instruction::Or: 944 case Instruction::Xor: { 945 unsigned NumOps = Ops.size(); 946 if (Value *Result = OptimizeAndOrXor(Opcode, Ops)) 947 return Result; 948 IterateOptimization |= Ops.size() != NumOps; 949 break; 950 } 951 952 case Instruction::Add: { 953 unsigned NumOps = Ops.size(); 954 if (Value *Result = OptimizeAdd(I, Ops)) 955 return Result; 956 IterateOptimization |= Ops.size() != NumOps; 957 } 958 959 break; 960 //case Instruction::Mul: 961 } 962 963 if (IterateOptimization) 964 return OptimizeExpression(I, Ops); 965 return 0; 966} 967 968 969/// ReassociateInst - Inspect and reassociate the instruction at the 970/// given position, post-incrementing the position. 971void Reassociate::ReassociateInst(BasicBlock::iterator &BBI) { 972 Instruction *BI = BBI++; 973 if (BI->getOpcode() == Instruction::Shl && 974 isa<ConstantInt>(BI->getOperand(1))) 975 if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) { 976 MadeChange = true; 977 BI = NI; 978 } 979 980 // Reject cases where it is pointless to do this. 981 if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() || 982 BI->getType()->isVectorTy()) 983 return; // Floating point ops are not associative. 984 985 // Do not reassociate boolean (i1) expressions. We want to preserve the 986 // original order of evaluation for short-circuited comparisons that 987 // SimplifyCFG has folded to AND/OR expressions. If the expression 988 // is not further optimized, it is likely to be transformed back to a 989 // short-circuited form for code gen, and the source order may have been 990 // optimized for the most likely conditions. 991 if (BI->getType()->isIntegerTy(1)) 992 return; 993 994 // If this is a subtract instruction which is not already in negate form, 995 // see if we can convert it to X+-Y. 996 if (BI->getOpcode() == Instruction::Sub) { 997 if (ShouldBreakUpSubtract(BI)) { 998 BI = BreakUpSubtract(BI, ValueRankMap); 999 // Reset the BBI iterator in case BreakUpSubtract changed the 1000 // instruction it points to. 1001 BBI = BI; 1002 ++BBI; 1003 MadeChange = true; 1004 } else if (BinaryOperator::isNeg(BI)) { 1005 // Otherwise, this is a negation. See if the operand is a multiply tree 1006 // and if this is not an inner node of a multiply tree. 1007 if (isReassociableOp(BI->getOperand(1), Instruction::Mul) && 1008 (!BI->hasOneUse() || 1009 !isReassociableOp(BI->use_back(), Instruction::Mul))) { 1010 BI = LowerNegateToMultiply(BI, ValueRankMap); 1011 MadeChange = true; 1012 } 1013 } 1014 } 1015 1016 // If this instruction is a commutative binary operator, process it. 1017 if (!BI->isAssociative()) return; 1018 BinaryOperator *I = cast<BinaryOperator>(BI); 1019 1020 // If this is an interior node of a reassociable tree, ignore it until we 1021 // get to the root of the tree, to avoid N^2 analysis. 1022 if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode())) 1023 return; 1024 1025 // If this is an add tree that is used by a sub instruction, ignore it 1026 // until we process the subtract. 1027 if (I->hasOneUse() && I->getOpcode() == Instruction::Add && 1028 cast<Instruction>(I->use_back())->getOpcode() == Instruction::Sub) 1029 return; 1030 1031 ReassociateExpression(I); 1032} 1033 1034Value *Reassociate::ReassociateExpression(BinaryOperator *I) { 1035 1036 // First, walk the expression tree, linearizing the tree, collecting the 1037 // operand information. 1038 SmallVector<ValueEntry, 8> Ops; 1039 LinearizeExprTree(I, Ops); 1040 1041 DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n'); 1042 1043 // Now that we have linearized the tree to a list and have gathered all of 1044 // the operands and their ranks, sort the operands by their rank. Use a 1045 // stable_sort so that values with equal ranks will have their relative 1046 // positions maintained (and so the compiler is deterministic). Note that 1047 // this sorts so that the highest ranking values end up at the beginning of 1048 // the vector. 1049 std::stable_sort(Ops.begin(), Ops.end()); 1050 1051 // OptimizeExpression - Now that we have the expression tree in a convenient 1052 // sorted form, optimize it globally if possible. 1053 if (Value *V = OptimizeExpression(I, Ops)) { 1054 // This expression tree simplified to something that isn't a tree, 1055 // eliminate it. 1056 DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n'); 1057 I->replaceAllUsesWith(V); 1058 if (Instruction *VI = dyn_cast<Instruction>(V)) 1059 VI->setDebugLoc(I->getDebugLoc()); 1060 RemoveDeadBinaryOp(I); 1061 ++NumAnnihil; 1062 return V; 1063 } 1064 1065 // We want to sink immediates as deeply as possible except in the case where 1066 // this is a multiply tree used only by an add, and the immediate is a -1. 1067 // In this case we reassociate to put the negation on the outside so that we 1068 // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y 1069 if (I->getOpcode() == Instruction::Mul && I->hasOneUse() && 1070 cast<Instruction>(I->use_back())->getOpcode() == Instruction::Add && 1071 isa<ConstantInt>(Ops.back().Op) && 1072 cast<ConstantInt>(Ops.back().Op)->isAllOnesValue()) { 1073 ValueEntry Tmp = Ops.pop_back_val(); 1074 Ops.insert(Ops.begin(), Tmp); 1075 } 1076 1077 DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n'); 1078 1079 if (Ops.size() == 1) { 1080 // This expression tree simplified to something that isn't a tree, 1081 // eliminate it. 1082 I->replaceAllUsesWith(Ops[0].Op); 1083 if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op)) 1084 OI->setDebugLoc(I->getDebugLoc()); 1085 RemoveDeadBinaryOp(I); 1086 return Ops[0].Op; 1087 } 1088 1089 // Now that we ordered and optimized the expressions, splat them back into 1090 // the expression tree, removing any unneeded nodes. 1091 RewriteExprTree(I, Ops); 1092 return I; 1093} 1094 1095 1096bool Reassociate::runOnFunction(Function &F) { 1097 // Recalculate the rank map for F 1098 BuildRankMap(F); 1099 1100 MadeChange = false; 1101 for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) 1102 for (BasicBlock::iterator BBI = FI->begin(); BBI != FI->end(); ) 1103 ReassociateInst(BBI); 1104 1105 // Now that we're done, revisit any instructions which are likely to 1106 // have secondary reassociation opportunities. 1107 while (!RedoInsts.empty()) 1108 if (Value *V = RedoInsts.pop_back_val()) { 1109 BasicBlock::iterator BBI = cast<Instruction>(V); 1110 ReassociateInst(BBI); 1111 } 1112 1113 // Now that we're done, delete any instructions which are no longer used. 1114 while (!DeadInsts.empty()) 1115 if (Value *V = DeadInsts.pop_back_val()) 1116 RecursivelyDeleteTriviallyDeadInstructions(V); 1117 1118 // We are done with the rank map. 1119 RankMap.clear(); 1120 ValueRankMap.clear(); 1121 return MadeChange; 1122} 1123 1124