Reassociate.cpp revision fa0e6facc793d0a67e89f873e18cd35a9d7c02e0
1//===- Reassociate.cpp - Reassociate binary expressions -------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass reassociates commutative expressions in an order that is designed 11// to promote better constant propagation, GCSE, LICM, PRE, etc. 12// 13// For example: 4 + (x + 5) -> x + (4 + 5) 14// 15// In the implementation of this algorithm, constants are assigned rank = 0, 16// function arguments are rank = 1, and other values are assigned ranks 17// corresponding to the reverse post order traversal of current function 18// (starting at 2), which effectively gives values in deep loops higher rank 19// than values not in loops. 20// 21//===----------------------------------------------------------------------===// 22 23#define DEBUG_TYPE "reassociate" 24#include "llvm/Transforms/Scalar.h" 25#include "llvm/Transforms/Utils/Local.h" 26#include "llvm/Constants.h" 27#include "llvm/DerivedTypes.h" 28#include "llvm/Function.h" 29#include "llvm/Instructions.h" 30#include "llvm/IntrinsicInst.h" 31#include "llvm/Pass.h" 32#include "llvm/Assembly/Writer.h" 33#include "llvm/Support/CFG.h" 34#include "llvm/Support/Debug.h" 35#include "llvm/Support/ValueHandle.h" 36#include "llvm/Support/raw_ostream.h" 37#include "llvm/ADT/PostOrderIterator.h" 38#include "llvm/ADT/Statistic.h" 39#include "llvm/ADT/DenseMap.h" 40#include <algorithm> 41using namespace llvm; 42 43STATISTIC(NumLinear , "Number of insts linearized"); 44STATISTIC(NumChanged, "Number of insts reassociated"); 45STATISTIC(NumAnnihil, "Number of expr tree annihilated"); 46STATISTIC(NumFactor , "Number of multiplies factored"); 47 48namespace { 49 struct ValueEntry { 50 unsigned Rank; 51 Value *Op; 52 ValueEntry(unsigned R, Value *O) : Rank(R), Op(O) {} 53 }; 54 inline bool operator<(const ValueEntry &LHS, const ValueEntry &RHS) { 55 return LHS.Rank > RHS.Rank; // Sort so that highest rank goes to start. 56 } 57} 58 59#ifndef NDEBUG 60/// PrintOps - Print out the expression identified in the Ops list. 61/// 62static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) { 63 Module *M = I->getParent()->getParent()->getParent(); 64 dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " " 65 << *Ops[0].Op->getType() << '\t'; 66 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 67 dbgs() << "[ "; 68 WriteAsOperand(dbgs(), Ops[i].Op, false, M); 69 dbgs() << ", #" << Ops[i].Rank << "] "; 70 } 71} 72#endif 73 74namespace { 75 class Reassociate : public FunctionPass { 76 DenseMap<BasicBlock*, unsigned> RankMap; 77 DenseMap<AssertingVH<>, unsigned> ValueRankMap; 78 SmallVector<WeakVH, 8> DeadInsts; 79 bool MadeChange; 80 public: 81 static char ID; // Pass identification, replacement for typeid 82 Reassociate() : FunctionPass(ID) { 83 initializeReassociatePass(*PassRegistry::getPassRegistry()); 84 } 85 86 bool runOnFunction(Function &F); 87 88 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 89 AU.setPreservesCFG(); 90 } 91 private: 92 void BuildRankMap(Function &F); 93 unsigned getRank(Value *V); 94 Value *ReassociateExpression(BinaryOperator *I); 95 void RewriteExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops, 96 unsigned Idx = 0); 97 Value *OptimizeExpression(BinaryOperator *I, 98 SmallVectorImpl<ValueEntry> &Ops); 99 Value *OptimizeAdd(Instruction *I, SmallVectorImpl<ValueEntry> &Ops); 100 void LinearizeExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops); 101 void LinearizeExpr(BinaryOperator *I); 102 Value *RemoveFactorFromExpression(Value *V, Value *Factor); 103 void ReassociateBB(BasicBlock *BB); 104 105 void RemoveDeadBinaryOp(Value *V); 106 }; 107} 108 109char Reassociate::ID = 0; 110INITIALIZE_PASS(Reassociate, "reassociate", 111 "Reassociate expressions", false, false) 112 113// Public interface to the Reassociate pass 114FunctionPass *llvm::createReassociatePass() { return new Reassociate(); } 115 116void Reassociate::RemoveDeadBinaryOp(Value *V) { 117 Instruction *Op = dyn_cast<Instruction>(V); 118 if (!Op || !isa<BinaryOperator>(Op)) 119 return; 120 121 Value *LHS = Op->getOperand(0), *RHS = Op->getOperand(1); 122 123 ValueRankMap.erase(Op); 124 DeadInsts.push_back(Op); 125 RemoveDeadBinaryOp(LHS); 126 RemoveDeadBinaryOp(RHS); 127} 128 129 130static bool isUnmovableInstruction(Instruction *I) { 131 if (I->getOpcode() == Instruction::PHI || 132 I->getOpcode() == Instruction::Alloca || 133 I->getOpcode() == Instruction::Load || 134 I->getOpcode() == Instruction::Invoke || 135 (I->getOpcode() == Instruction::Call && 136 !isa<DbgInfoIntrinsic>(I)) || 137 I->getOpcode() == Instruction::UDiv || 138 I->getOpcode() == Instruction::SDiv || 139 I->getOpcode() == Instruction::FDiv || 140 I->getOpcode() == Instruction::URem || 141 I->getOpcode() == Instruction::SRem || 142 I->getOpcode() == Instruction::FRem) 143 return true; 144 return false; 145} 146 147void Reassociate::BuildRankMap(Function &F) { 148 unsigned i = 2; 149 150 // Assign distinct ranks to function arguments 151 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) 152 ValueRankMap[&*I] = ++i; 153 154 ReversePostOrderTraversal<Function*> RPOT(&F); 155 for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(), 156 E = RPOT.end(); I != E; ++I) { 157 BasicBlock *BB = *I; 158 unsigned BBRank = RankMap[BB] = ++i << 16; 159 160 // Walk the basic block, adding precomputed ranks for any instructions that 161 // we cannot move. This ensures that the ranks for these instructions are 162 // all different in the block. 163 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) 164 if (isUnmovableInstruction(I)) 165 ValueRankMap[&*I] = ++BBRank; 166 } 167} 168 169unsigned Reassociate::getRank(Value *V) { 170 Instruction *I = dyn_cast<Instruction>(V); 171 if (I == 0) { 172 if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument. 173 return 0; // Otherwise it's a global or constant, rank 0. 174 } 175 176 if (unsigned Rank = ValueRankMap[I]) 177 return Rank; // Rank already known? 178 179 // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that 180 // we can reassociate expressions for code motion! Since we do not recurse 181 // for PHI nodes, we cannot have infinite recursion here, because there 182 // cannot be loops in the value graph that do not go through PHI nodes. 183 unsigned Rank = 0, MaxRank = RankMap[I->getParent()]; 184 for (unsigned i = 0, e = I->getNumOperands(); 185 i != e && Rank != MaxRank; ++i) 186 Rank = std::max(Rank, getRank(I->getOperand(i))); 187 188 // If this is a not or neg instruction, do not count it for rank. This 189 // assures us that X and ~X will have the same rank. 190 if (!I->getType()->isIntegerTy() || 191 (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I))) 192 ++Rank; 193 194 //DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " 195 // << Rank << "\n"); 196 197 return ValueRankMap[I] = Rank; 198} 199 200/// isReassociableOp - Return true if V is an instruction of the specified 201/// opcode and if it only has one use. 202static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) { 203 if ((V->hasOneUse() || V->use_empty()) && isa<Instruction>(V) && 204 cast<Instruction>(V)->getOpcode() == Opcode) 205 return cast<BinaryOperator>(V); 206 return 0; 207} 208 209/// LowerNegateToMultiply - Replace 0-X with X*-1. 210/// 211static Instruction *LowerNegateToMultiply(Instruction *Neg, 212 DenseMap<AssertingVH<>, unsigned> &ValueRankMap) { 213 Constant *Cst = Constant::getAllOnesValue(Neg->getType()); 214 215 Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg); 216 ValueRankMap.erase(Neg); 217 Res->takeName(Neg); 218 Neg->replaceAllUsesWith(Res); 219 Neg->eraseFromParent(); 220 return Res; 221} 222 223// Given an expression of the form '(A+B)+(D+C)', turn it into '(((A+B)+C)+D)'. 224// Note that if D is also part of the expression tree that we recurse to 225// linearize it as well. Besides that case, this does not recurse into A,B, or 226// C. 227void Reassociate::LinearizeExpr(BinaryOperator *I) { 228 BinaryOperator *LHS = cast<BinaryOperator>(I->getOperand(0)); 229 BinaryOperator *RHS = cast<BinaryOperator>(I->getOperand(1)); 230 assert(isReassociableOp(LHS, I->getOpcode()) && 231 isReassociableOp(RHS, I->getOpcode()) && 232 "Not an expression that needs linearization?"); 233 234 DEBUG(dbgs() << "Linear" << *LHS << '\n' << *RHS << '\n' << *I << '\n'); 235 236 // Move the RHS instruction to live immediately before I, avoiding breaking 237 // dominator properties. 238 RHS->moveBefore(I); 239 240 // Move operands around to do the linearization. 241 I->setOperand(1, RHS->getOperand(0)); 242 RHS->setOperand(0, LHS); 243 I->setOperand(0, RHS); 244 245 // Conservatively clear all the optional flags, which may not hold 246 // after the reassociation. 247 I->clearSubclassOptionalData(); 248 LHS->clearSubclassOptionalData(); 249 RHS->clearSubclassOptionalData(); 250 251 ++NumLinear; 252 MadeChange = true; 253 DEBUG(dbgs() << "Linearized: " << *I << '\n'); 254 255 // If D is part of this expression tree, tail recurse. 256 if (isReassociableOp(I->getOperand(1), I->getOpcode())) 257 LinearizeExpr(I); 258} 259 260 261/// LinearizeExprTree - Given an associative binary expression tree, traverse 262/// all of the uses putting it into canonical form. This forces a left-linear 263/// form of the expression (((a+b)+c)+d), and collects information about the 264/// rank of the non-tree operands. 265/// 266/// NOTE: These intentionally destroys the expression tree operands (turning 267/// them into undef values) to reduce #uses of the values. This means that the 268/// caller MUST use something like RewriteExprTree to put the values back in. 269/// 270void Reassociate::LinearizeExprTree(BinaryOperator *I, 271 SmallVectorImpl<ValueEntry> &Ops) { 272 Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); 273 unsigned Opcode = I->getOpcode(); 274 275 // First step, linearize the expression if it is in ((A+B)+(C+D)) form. 276 BinaryOperator *LHSBO = isReassociableOp(LHS, Opcode); 277 BinaryOperator *RHSBO = isReassociableOp(RHS, Opcode); 278 279 // If this is a multiply expression tree and it contains internal negations, 280 // transform them into multiplies by -1 so they can be reassociated. 281 if (I->getOpcode() == Instruction::Mul) { 282 if (!LHSBO && LHS->hasOneUse() && BinaryOperator::isNeg(LHS)) { 283 LHS = LowerNegateToMultiply(cast<Instruction>(LHS), ValueRankMap); 284 LHSBO = isReassociableOp(LHS, Opcode); 285 } 286 if (!RHSBO && RHS->hasOneUse() && BinaryOperator::isNeg(RHS)) { 287 RHS = LowerNegateToMultiply(cast<Instruction>(RHS), ValueRankMap); 288 RHSBO = isReassociableOp(RHS, Opcode); 289 } 290 } 291 292 if (!LHSBO) { 293 if (!RHSBO) { 294 // Neither the LHS or RHS as part of the tree, thus this is a leaf. As 295 // such, just remember these operands and their rank. 296 Ops.push_back(ValueEntry(getRank(LHS), LHS)); 297 Ops.push_back(ValueEntry(getRank(RHS), RHS)); 298 299 // Clear the leaves out. 300 I->setOperand(0, UndefValue::get(I->getType())); 301 I->setOperand(1, UndefValue::get(I->getType())); 302 return; 303 } 304 305 // Turn X+(Y+Z) -> (Y+Z)+X 306 std::swap(LHSBO, RHSBO); 307 std::swap(LHS, RHS); 308 bool Success = !I->swapOperands(); 309 assert(Success && "swapOperands failed"); 310 Success = false; 311 MadeChange = true; 312 } else if (RHSBO) { 313 // Turn (A+B)+(C+D) -> (((A+B)+C)+D). This guarantees the RHS is not 314 // part of the expression tree. 315 LinearizeExpr(I); 316 LHS = LHSBO = cast<BinaryOperator>(I->getOperand(0)); 317 RHS = I->getOperand(1); 318 RHSBO = 0; 319 } 320 321 // Okay, now we know that the LHS is a nested expression and that the RHS is 322 // not. Perform reassociation. 323 assert(!isReassociableOp(RHS, Opcode) && "LinearizeExpr failed!"); 324 325 // Move LHS right before I to make sure that the tree expression dominates all 326 // values. 327 LHSBO->moveBefore(I); 328 329 // Linearize the expression tree on the LHS. 330 LinearizeExprTree(LHSBO, Ops); 331 332 // Remember the RHS operand and its rank. 333 Ops.push_back(ValueEntry(getRank(RHS), RHS)); 334 335 // Clear the RHS leaf out. 336 I->setOperand(1, UndefValue::get(I->getType())); 337} 338 339// RewriteExprTree - Now that the operands for this expression tree are 340// linearized and optimized, emit them in-order. This function is written to be 341// tail recursive. 342void Reassociate::RewriteExprTree(BinaryOperator *I, 343 SmallVectorImpl<ValueEntry> &Ops, 344 unsigned i) { 345 if (i+2 == Ops.size()) { 346 if (I->getOperand(0) != Ops[i].Op || 347 I->getOperand(1) != Ops[i+1].Op) { 348 Value *OldLHS = I->getOperand(0); 349 DEBUG(dbgs() << "RA: " << *I << '\n'); 350 I->setOperand(0, Ops[i].Op); 351 I->setOperand(1, Ops[i+1].Op); 352 353 // Clear all the optional flags, which may not hold after the 354 // reassociation if the expression involved more than just this operation. 355 if (Ops.size() != 2) 356 I->clearSubclassOptionalData(); 357 358 DEBUG(dbgs() << "TO: " << *I << '\n'); 359 MadeChange = true; 360 ++NumChanged; 361 362 // If we reassociated a tree to fewer operands (e.g. (1+a+2) -> (a+3) 363 // delete the extra, now dead, nodes. 364 RemoveDeadBinaryOp(OldLHS); 365 } 366 return; 367 } 368 assert(i+2 < Ops.size() && "Ops index out of range!"); 369 370 if (I->getOperand(1) != Ops[i].Op) { 371 DEBUG(dbgs() << "RA: " << *I << '\n'); 372 I->setOperand(1, Ops[i].Op); 373 374 // Conservatively clear all the optional flags, which may not hold 375 // after the reassociation. 376 I->clearSubclassOptionalData(); 377 378 DEBUG(dbgs() << "TO: " << *I << '\n'); 379 MadeChange = true; 380 ++NumChanged; 381 } 382 383 BinaryOperator *LHS = cast<BinaryOperator>(I->getOperand(0)); 384 assert(LHS->getOpcode() == I->getOpcode() && 385 "Improper expression tree!"); 386 387 // Compactify the tree instructions together with each other to guarantee 388 // that the expression tree is dominated by all of Ops. 389 LHS->moveBefore(I); 390 RewriteExprTree(LHS, Ops, i+1); 391} 392 393 394 395// NegateValue - Insert instructions before the instruction pointed to by BI, 396// that computes the negative version of the value specified. The negative 397// version of the value is returned, and BI is left pointing at the instruction 398// that should be processed next by the reassociation pass. 399// 400static Value *NegateValue(Value *V, Instruction *BI) { 401 if (Constant *C = dyn_cast<Constant>(V)) 402 return ConstantExpr::getNeg(C); 403 404 // We are trying to expose opportunity for reassociation. One of the things 405 // that we want to do to achieve this is to push a negation as deep into an 406 // expression chain as possible, to expose the add instructions. In practice, 407 // this means that we turn this: 408 // X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D 409 // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate 410 // the constants. We assume that instcombine will clean up the mess later if 411 // we introduce tons of unnecessary negation instructions. 412 // 413 if (Instruction *I = dyn_cast<Instruction>(V)) 414 if (I->getOpcode() == Instruction::Add && I->hasOneUse()) { 415 // Push the negates through the add. 416 I->setOperand(0, NegateValue(I->getOperand(0), BI)); 417 I->setOperand(1, NegateValue(I->getOperand(1), BI)); 418 419 // We must move the add instruction here, because the neg instructions do 420 // not dominate the old add instruction in general. By moving it, we are 421 // assured that the neg instructions we just inserted dominate the 422 // instruction we are about to insert after them. 423 // 424 I->moveBefore(BI); 425 I->setName(I->getName()+".neg"); 426 return I; 427 } 428 429 // Okay, we need to materialize a negated version of V with an instruction. 430 // Scan the use lists of V to see if we have one already. 431 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ 432 User *U = *UI; 433 if (!BinaryOperator::isNeg(U)) continue; 434 435 // We found one! Now we have to make sure that the definition dominates 436 // this use. We do this by moving it to the entry block (if it is a 437 // non-instruction value) or right after the definition. These negates will 438 // be zapped by reassociate later, so we don't need much finesse here. 439 BinaryOperator *TheNeg = cast<BinaryOperator>(U); 440 441 // Verify that the negate is in this function, V might be a constant expr. 442 if (TheNeg->getParent()->getParent() != BI->getParent()->getParent()) 443 continue; 444 445 BasicBlock::iterator InsertPt; 446 if (Instruction *InstInput = dyn_cast<Instruction>(V)) { 447 if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) { 448 InsertPt = II->getNormalDest()->begin(); 449 } else { 450 InsertPt = InstInput; 451 ++InsertPt; 452 } 453 while (isa<PHINode>(InsertPt)) ++InsertPt; 454 } else { 455 InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin(); 456 } 457 TheNeg->moveBefore(InsertPt); 458 return TheNeg; 459 } 460 461 // Insert a 'neg' instruction that subtracts the value from zero to get the 462 // negation. 463 return BinaryOperator::CreateNeg(V, V->getName() + ".neg", BI); 464} 465 466/// ShouldBreakUpSubtract - Return true if we should break up this subtract of 467/// X-Y into (X + -Y). 468static bool ShouldBreakUpSubtract(Instruction *Sub) { 469 // If this is a negation, we can't split it up! 470 if (BinaryOperator::isNeg(Sub)) 471 return false; 472 473 // Don't bother to break this up unless either the LHS is an associable add or 474 // subtract or if this is only used by one. 475 if (isReassociableOp(Sub->getOperand(0), Instruction::Add) || 476 isReassociableOp(Sub->getOperand(0), Instruction::Sub)) 477 return true; 478 if (isReassociableOp(Sub->getOperand(1), Instruction::Add) || 479 isReassociableOp(Sub->getOperand(1), Instruction::Sub)) 480 return true; 481 if (Sub->hasOneUse() && 482 (isReassociableOp(Sub->use_back(), Instruction::Add) || 483 isReassociableOp(Sub->use_back(), Instruction::Sub))) 484 return true; 485 486 return false; 487} 488 489/// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is 490/// only used by an add, transform this into (X+(0-Y)) to promote better 491/// reassociation. 492static Instruction *BreakUpSubtract(Instruction *Sub, 493 DenseMap<AssertingVH<>, unsigned> &ValueRankMap) { 494 // Convert a subtract into an add and a neg instruction. This allows sub 495 // instructions to be commuted with other add instructions. 496 // 497 // Calculate the negative value of Operand 1 of the sub instruction, 498 // and set it as the RHS of the add instruction we just made. 499 // 500 Value *NegVal = NegateValue(Sub->getOperand(1), Sub); 501 Instruction *New = 502 BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub); 503 New->takeName(Sub); 504 505 // Everyone now refers to the add instruction. 506 ValueRankMap.erase(Sub); 507 Sub->replaceAllUsesWith(New); 508 Sub->eraseFromParent(); 509 510 DEBUG(dbgs() << "Negated: " << *New << '\n'); 511 return New; 512} 513 514/// ConvertShiftToMul - If this is a shift of a reassociable multiply or is used 515/// by one, change this into a multiply by a constant to assist with further 516/// reassociation. 517static Instruction *ConvertShiftToMul(Instruction *Shl, 518 DenseMap<AssertingVH<>, unsigned> &ValueRankMap) { 519 // If an operand of this shift is a reassociable multiply, or if the shift 520 // is used by a reassociable multiply or add, turn into a multiply. 521 if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) || 522 (Shl->hasOneUse() && 523 (isReassociableOp(Shl->use_back(), Instruction::Mul) || 524 isReassociableOp(Shl->use_back(), Instruction::Add)))) { 525 Constant *MulCst = ConstantInt::get(Shl->getType(), 1); 526 MulCst = ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1))); 527 528 Instruction *Mul = 529 BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl); 530 ValueRankMap.erase(Shl); 531 Mul->takeName(Shl); 532 Shl->replaceAllUsesWith(Mul); 533 Shl->eraseFromParent(); 534 return Mul; 535 } 536 return 0; 537} 538 539// Scan backwards and forwards among values with the same rank as element i to 540// see if X exists. If X does not exist, return i. This is useful when 541// scanning for 'x' when we see '-x' because they both get the same rank. 542static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i, 543 Value *X) { 544 unsigned XRank = Ops[i].Rank; 545 unsigned e = Ops.size(); 546 for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j) 547 if (Ops[j].Op == X) 548 return j; 549 // Scan backwards. 550 for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j) 551 if (Ops[j].Op == X) 552 return j; 553 return i; 554} 555 556/// EmitAddTreeOfValues - Emit a tree of add instructions, summing Ops together 557/// and returning the result. Insert the tree before I. 558static Value *EmitAddTreeOfValues(Instruction *I, SmallVectorImpl<Value*> &Ops){ 559 if (Ops.size() == 1) return Ops.back(); 560 561 Value *V1 = Ops.back(); 562 Ops.pop_back(); 563 Value *V2 = EmitAddTreeOfValues(I, Ops); 564 return BinaryOperator::CreateAdd(V2, V1, "tmp", I); 565} 566 567/// RemoveFactorFromExpression - If V is an expression tree that is a 568/// multiplication sequence, and if this sequence contains a multiply by Factor, 569/// remove Factor from the tree and return the new tree. 570Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) { 571 BinaryOperator *BO = isReassociableOp(V, Instruction::Mul); 572 if (!BO) return 0; 573 574 SmallVector<ValueEntry, 8> Factors; 575 LinearizeExprTree(BO, Factors); 576 577 bool FoundFactor = false; 578 bool NeedsNegate = false; 579 for (unsigned i = 0, e = Factors.size(); i != e; ++i) { 580 if (Factors[i].Op == Factor) { 581 FoundFactor = true; 582 Factors.erase(Factors.begin()+i); 583 break; 584 } 585 586 // If this is a negative version of this factor, remove it. 587 if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor)) 588 if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op)) 589 if (FC1->getValue() == -FC2->getValue()) { 590 FoundFactor = NeedsNegate = true; 591 Factors.erase(Factors.begin()+i); 592 break; 593 } 594 } 595 596 if (!FoundFactor) { 597 // Make sure to restore the operands to the expression tree. 598 RewriteExprTree(BO, Factors); 599 return 0; 600 } 601 602 BasicBlock::iterator InsertPt = BO; ++InsertPt; 603 604 // If this was just a single multiply, remove the multiply and return the only 605 // remaining operand. 606 if (Factors.size() == 1) { 607 ValueRankMap.erase(BO); 608 DeadInsts.push_back(BO); 609 V = Factors[0].Op; 610 } else { 611 RewriteExprTree(BO, Factors); 612 V = BO; 613 } 614 615 if (NeedsNegate) 616 V = BinaryOperator::CreateNeg(V, "neg", InsertPt); 617 618 return V; 619} 620 621/// FindSingleUseMultiplyFactors - If V is a single-use multiply, recursively 622/// add its operands as factors, otherwise add V to the list of factors. 623/// 624/// Ops is the top-level list of add operands we're trying to factor. 625static void FindSingleUseMultiplyFactors(Value *V, 626 SmallVectorImpl<Value*> &Factors, 627 const SmallVectorImpl<ValueEntry> &Ops, 628 bool IsRoot) { 629 BinaryOperator *BO; 630 if (!(V->hasOneUse() || V->use_empty()) || // More than one use. 631 !(BO = dyn_cast<BinaryOperator>(V)) || 632 BO->getOpcode() != Instruction::Mul) { 633 Factors.push_back(V); 634 return; 635 } 636 637 // If this value has a single use because it is another input to the add 638 // tree we're reassociating and we dropped its use, it actually has two 639 // uses and we can't factor it. 640 if (!IsRoot) { 641 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 642 if (Ops[i].Op == V) { 643 Factors.push_back(V); 644 return; 645 } 646 } 647 648 649 // Otherwise, add the LHS and RHS to the list of factors. 650 FindSingleUseMultiplyFactors(BO->getOperand(1), Factors, Ops, false); 651 FindSingleUseMultiplyFactors(BO->getOperand(0), Factors, Ops, false); 652} 653 654/// OptimizeAndOrXor - Optimize a series of operands to an 'and', 'or', or 'xor' 655/// instruction. This optimizes based on identities. If it can be reduced to 656/// a single Value, it is returned, otherwise the Ops list is mutated as 657/// necessary. 658static Value *OptimizeAndOrXor(unsigned Opcode, 659 SmallVectorImpl<ValueEntry> &Ops) { 660 // Scan the operand lists looking for X and ~X pairs, along with X,X pairs. 661 // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1. 662 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 663 // First, check for X and ~X in the operand list. 664 assert(i < Ops.size()); 665 if (BinaryOperator::isNot(Ops[i].Op)) { // Cannot occur for ^. 666 Value *X = BinaryOperator::getNotArgument(Ops[i].Op); 667 unsigned FoundX = FindInOperandList(Ops, i, X); 668 if (FoundX != i) { 669 if (Opcode == Instruction::And) // ...&X&~X = 0 670 return Constant::getNullValue(X->getType()); 671 672 if (Opcode == Instruction::Or) // ...|X|~X = -1 673 return Constant::getAllOnesValue(X->getType()); 674 } 675 } 676 677 // Next, check for duplicate pairs of values, which we assume are next to 678 // each other, due to our sorting criteria. 679 assert(i < Ops.size()); 680 if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) { 681 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 682 // Drop duplicate values for And and Or. 683 Ops.erase(Ops.begin()+i); 684 --i; --e; 685 ++NumAnnihil; 686 continue; 687 } 688 689 // Drop pairs of values for Xor. 690 assert(Opcode == Instruction::Xor); 691 if (e == 2) 692 return Constant::getNullValue(Ops[0].Op->getType()); 693 694 // Y ^ X^X -> Y 695 Ops.erase(Ops.begin()+i, Ops.begin()+i+2); 696 i -= 1; e -= 2; 697 ++NumAnnihil; 698 } 699 } 700 return 0; 701} 702 703/// OptimizeAdd - Optimize a series of operands to an 'add' instruction. This 704/// optimizes based on identities. If it can be reduced to a single Value, it 705/// is returned, otherwise the Ops list is mutated as necessary. 706Value *Reassociate::OptimizeAdd(Instruction *I, 707 SmallVectorImpl<ValueEntry> &Ops) { 708 // Scan the operand lists looking for X and -X pairs. If we find any, we 709 // can simplify the expression. X+-X == 0. While we're at it, scan for any 710 // duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z. 711 // 712 // TODO: We could handle "X + ~X" -> "-1" if we wanted, since "-X = ~X+1". 713 // 714 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 715 Value *TheOp = Ops[i].Op; 716 // Check to see if we've seen this operand before. If so, we factor all 717 // instances of the operand together. Due to our sorting criteria, we know 718 // that these need to be next to each other in the vector. 719 if (i+1 != Ops.size() && Ops[i+1].Op == TheOp) { 720 // Rescan the list, remove all instances of this operand from the expr. 721 unsigned NumFound = 0; 722 do { 723 Ops.erase(Ops.begin()+i); 724 ++NumFound; 725 } while (i != Ops.size() && Ops[i].Op == TheOp); 726 727 DEBUG(errs() << "\nFACTORING [" << NumFound << "]: " << *TheOp << '\n'); 728 ++NumFactor; 729 730 // Insert a new multiply. 731 Value *Mul = ConstantInt::get(cast<IntegerType>(I->getType()), NumFound); 732 Mul = BinaryOperator::CreateMul(TheOp, Mul, "factor", I); 733 734 // Now that we have inserted a multiply, optimize it. This allows us to 735 // handle cases that require multiple factoring steps, such as this: 736 // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6 737 Mul = ReassociateExpression(cast<BinaryOperator>(Mul)); 738 739 // If every add operand was a duplicate, return the multiply. 740 if (Ops.empty()) 741 return Mul; 742 743 // Otherwise, we had some input that didn't have the dupe, such as 744 // "A + A + B" -> "A*2 + B". Add the new multiply to the list of 745 // things being added by this operation. 746 Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul)); 747 748 --i; 749 e = Ops.size(); 750 continue; 751 } 752 753 // Check for X and -X in the operand list. 754 if (!BinaryOperator::isNeg(TheOp)) 755 continue; 756 757 Value *X = BinaryOperator::getNegArgument(TheOp); 758 unsigned FoundX = FindInOperandList(Ops, i, X); 759 if (FoundX == i) 760 continue; 761 762 // Remove X and -X from the operand list. 763 if (Ops.size() == 2) 764 return Constant::getNullValue(X->getType()); 765 766 Ops.erase(Ops.begin()+i); 767 if (i < FoundX) 768 --FoundX; 769 else 770 --i; // Need to back up an extra one. 771 Ops.erase(Ops.begin()+FoundX); 772 ++NumAnnihil; 773 --i; // Revisit element. 774 e -= 2; // Removed two elements. 775 } 776 777 // Scan the operand list, checking to see if there are any common factors 778 // between operands. Consider something like A*A+A*B*C+D. We would like to 779 // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies. 780 // To efficiently find this, we count the number of times a factor occurs 781 // for any ADD operands that are MULs. 782 DenseMap<Value*, unsigned> FactorOccurrences; 783 784 // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4) 785 // where they are actually the same multiply. 786 unsigned MaxOcc = 0; 787 Value *MaxOccVal = 0; 788 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 789 BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op); 790 if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty()) 791 continue; 792 793 // Compute all of the factors of this added value. 794 SmallVector<Value*, 8> Factors; 795 FindSingleUseMultiplyFactors(BOp, Factors, Ops, true); 796 assert(Factors.size() > 1 && "Bad linearize!"); 797 798 // Add one to FactorOccurrences for each unique factor in this op. 799 SmallPtrSet<Value*, 8> Duplicates; 800 for (unsigned i = 0, e = Factors.size(); i != e; ++i) { 801 Value *Factor = Factors[i]; 802 if (!Duplicates.insert(Factor)) continue; 803 804 unsigned Occ = ++FactorOccurrences[Factor]; 805 if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; } 806 807 // If Factor is a negative constant, add the negated value as a factor 808 // because we can percolate the negate out. Watch for minint, which 809 // cannot be positivified. 810 if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) 811 if (CI->getValue().isNegative() && !CI->getValue().isMinSignedValue()) { 812 Factor = ConstantInt::get(CI->getContext(), -CI->getValue()); 813 assert(!Duplicates.count(Factor) && 814 "Shouldn't have two constant factors, missed a canonicalize"); 815 816 unsigned Occ = ++FactorOccurrences[Factor]; 817 if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; } 818 } 819 } 820 } 821 822 // If any factor occurred more than one time, we can pull it out. 823 if (MaxOcc > 1) { 824 DEBUG(errs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << '\n'); 825 ++NumFactor; 826 827 // Create a new instruction that uses the MaxOccVal twice. If we don't do 828 // this, we could otherwise run into situations where removing a factor 829 // from an expression will drop a use of maxocc, and this can cause 830 // RemoveFactorFromExpression on successive values to behave differently. 831 Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal); 832 SmallVector<Value*, 4> NewMulOps; 833 for (unsigned i = 0; i != Ops.size(); ++i) { 834 // Only try to remove factors from expressions we're allowed to. 835 BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op); 836 if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty()) 837 continue; 838 839 if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) { 840 // The factorized operand may occur several times. Convert them all in 841 // one fell swoop. 842 for (unsigned j = Ops.size(); j != i;) { 843 --j; 844 if (Ops[j].Op == Ops[i].Op) { 845 NewMulOps.push_back(V); 846 Ops.erase(Ops.begin()+j); 847 } 848 } 849 --i; 850 } 851 } 852 853 // No need for extra uses anymore. 854 delete DummyInst; 855 856 unsigned NumAddedValues = NewMulOps.size(); 857 Value *V = EmitAddTreeOfValues(I, NewMulOps); 858 859 // Now that we have inserted the add tree, optimize it. This allows us to 860 // handle cases that require multiple factoring steps, such as this: 861 // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C)) 862 assert(NumAddedValues > 1 && "Each occurrence should contribute a value"); 863 (void)NumAddedValues; 864 V = ReassociateExpression(cast<BinaryOperator>(V)); 865 866 // Create the multiply. 867 Value *V2 = BinaryOperator::CreateMul(V, MaxOccVal, "tmp", I); 868 869 // Rerun associate on the multiply in case the inner expression turned into 870 // a multiply. We want to make sure that we keep things in canonical form. 871 V2 = ReassociateExpression(cast<BinaryOperator>(V2)); 872 873 // If every add operand included the factor (e.g. "A*B + A*C"), then the 874 // entire result expression is just the multiply "A*(B+C)". 875 if (Ops.empty()) 876 return V2; 877 878 // Otherwise, we had some input that didn't have the factor, such as 879 // "A*B + A*C + D" -> "A*(B+C) + D". Add the new multiply to the list of 880 // things being added by this operation. 881 Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2)); 882 } 883 884 return 0; 885} 886 887Value *Reassociate::OptimizeExpression(BinaryOperator *I, 888 SmallVectorImpl<ValueEntry> &Ops) { 889 // Now that we have the linearized expression tree, try to optimize it. 890 // Start by folding any constants that we found. 891 bool IterateOptimization = false; 892 if (Ops.size() == 1) return Ops[0].Op; 893 894 unsigned Opcode = I->getOpcode(); 895 896 if (Constant *V1 = dyn_cast<Constant>(Ops[Ops.size()-2].Op)) 897 if (Constant *V2 = dyn_cast<Constant>(Ops.back().Op)) { 898 Ops.pop_back(); 899 Ops.back().Op = ConstantExpr::get(Opcode, V1, V2); 900 return OptimizeExpression(I, Ops); 901 } 902 903 // Check for destructive annihilation due to a constant being used. 904 if (ConstantInt *CstVal = dyn_cast<ConstantInt>(Ops.back().Op)) 905 switch (Opcode) { 906 default: break; 907 case Instruction::And: 908 if (CstVal->isZero()) // X & 0 -> 0 909 return CstVal; 910 if (CstVal->isAllOnesValue()) // X & -1 -> X 911 Ops.pop_back(); 912 break; 913 case Instruction::Mul: 914 if (CstVal->isZero()) { // X * 0 -> 0 915 ++NumAnnihil; 916 return CstVal; 917 } 918 919 if (cast<ConstantInt>(CstVal)->isOne()) 920 Ops.pop_back(); // X * 1 -> X 921 break; 922 case Instruction::Or: 923 if (CstVal->isAllOnesValue()) // X | -1 -> -1 924 return CstVal; 925 // FALLTHROUGH! 926 case Instruction::Add: 927 case Instruction::Xor: 928 if (CstVal->isZero()) // X [|^+] 0 -> X 929 Ops.pop_back(); 930 break; 931 } 932 if (Ops.size() == 1) return Ops[0].Op; 933 934 // Handle destructive annihilation due to identities between elements in the 935 // argument list here. 936 switch (Opcode) { 937 default: break; 938 case Instruction::And: 939 case Instruction::Or: 940 case Instruction::Xor: { 941 unsigned NumOps = Ops.size(); 942 if (Value *Result = OptimizeAndOrXor(Opcode, Ops)) 943 return Result; 944 IterateOptimization |= Ops.size() != NumOps; 945 break; 946 } 947 948 case Instruction::Add: { 949 unsigned NumOps = Ops.size(); 950 if (Value *Result = OptimizeAdd(I, Ops)) 951 return Result; 952 IterateOptimization |= Ops.size() != NumOps; 953 } 954 955 break; 956 //case Instruction::Mul: 957 } 958 959 if (IterateOptimization) 960 return OptimizeExpression(I, Ops); 961 return 0; 962} 963 964 965/// ReassociateBB - Inspect all of the instructions in this basic block, 966/// reassociating them as we go. 967void Reassociate::ReassociateBB(BasicBlock *BB) { 968 for (BasicBlock::iterator BBI = BB->begin(); BBI != BB->end(); ) { 969 Instruction *BI = BBI++; 970 if (BI->getOpcode() == Instruction::Shl && 971 isa<ConstantInt>(BI->getOperand(1))) 972 if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) { 973 MadeChange = true; 974 BI = NI; 975 } 976 977 // Reject cases where it is pointless to do this. 978 if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() || 979 BI->getType()->isVectorTy()) 980 continue; // Floating point ops are not associative. 981 982 // Do not reassociate boolean (i1) expressions. We want to preserve the 983 // original order of evaluation for short-circuited comparisons that 984 // SimplifyCFG has folded to AND/OR expressions. If the expression 985 // is not further optimized, it is likely to be transformed back to a 986 // short-circuited form for code gen, and the source order may have been 987 // optimized for the most likely conditions. 988 if (BI->getType()->isIntegerTy(1)) 989 continue; 990 991 // If this is a subtract instruction which is not already in negate form, 992 // see if we can convert it to X+-Y. 993 if (BI->getOpcode() == Instruction::Sub) { 994 if (ShouldBreakUpSubtract(BI)) { 995 BI = BreakUpSubtract(BI, ValueRankMap); 996 // Reset the BBI iterator in case BreakUpSubtract changed the 997 // instruction it points to. 998 BBI = BI; 999 ++BBI; 1000 MadeChange = true; 1001 } else if (BinaryOperator::isNeg(BI)) { 1002 // Otherwise, this is a negation. See if the operand is a multiply tree 1003 // and if this is not an inner node of a multiply tree. 1004 if (isReassociableOp(BI->getOperand(1), Instruction::Mul) && 1005 (!BI->hasOneUse() || 1006 !isReassociableOp(BI->use_back(), Instruction::Mul))) { 1007 BI = LowerNegateToMultiply(BI, ValueRankMap); 1008 MadeChange = true; 1009 } 1010 } 1011 } 1012 1013 // If this instruction is a commutative binary operator, process it. 1014 if (!BI->isAssociative()) continue; 1015 BinaryOperator *I = cast<BinaryOperator>(BI); 1016 1017 // If this is an interior node of a reassociable tree, ignore it until we 1018 // get to the root of the tree, to avoid N^2 analysis. 1019 if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode())) 1020 continue; 1021 1022 // If this is an add tree that is used by a sub instruction, ignore it 1023 // until we process the subtract. 1024 if (I->hasOneUse() && I->getOpcode() == Instruction::Add && 1025 cast<Instruction>(I->use_back())->getOpcode() == Instruction::Sub) 1026 continue; 1027 1028 ReassociateExpression(I); 1029 } 1030} 1031 1032Value *Reassociate::ReassociateExpression(BinaryOperator *I) { 1033 1034 // First, walk the expression tree, linearizing the tree, collecting the 1035 // operand information. 1036 SmallVector<ValueEntry, 8> Ops; 1037 LinearizeExprTree(I, Ops); 1038 1039 DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n'); 1040 1041 // Now that we have linearized the tree to a list and have gathered all of 1042 // the operands and their ranks, sort the operands by their rank. Use a 1043 // stable_sort so that values with equal ranks will have their relative 1044 // positions maintained (and so the compiler is deterministic). Note that 1045 // this sorts so that the highest ranking values end up at the beginning of 1046 // the vector. 1047 std::stable_sort(Ops.begin(), Ops.end()); 1048 1049 // OptimizeExpression - Now that we have the expression tree in a convenient 1050 // sorted form, optimize it globally if possible. 1051 if (Value *V = OptimizeExpression(I, Ops)) { 1052 // This expression tree simplified to something that isn't a tree, 1053 // eliminate it. 1054 DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n'); 1055 I->replaceAllUsesWith(V); 1056 RemoveDeadBinaryOp(I); 1057 ++NumAnnihil; 1058 return V; 1059 } 1060 1061 // We want to sink immediates as deeply as possible except in the case where 1062 // this is a multiply tree used only by an add, and the immediate is a -1. 1063 // In this case we reassociate to put the negation on the outside so that we 1064 // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y 1065 if (I->getOpcode() == Instruction::Mul && I->hasOneUse() && 1066 cast<Instruction>(I->use_back())->getOpcode() == Instruction::Add && 1067 isa<ConstantInt>(Ops.back().Op) && 1068 cast<ConstantInt>(Ops.back().Op)->isAllOnesValue()) { 1069 ValueEntry Tmp = Ops.pop_back_val(); 1070 Ops.insert(Ops.begin(), Tmp); 1071 } 1072 1073 DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n'); 1074 1075 if (Ops.size() == 1) { 1076 // This expression tree simplified to something that isn't a tree, 1077 // eliminate it. 1078 I->replaceAllUsesWith(Ops[0].Op); 1079 RemoveDeadBinaryOp(I); 1080 return Ops[0].Op; 1081 } 1082 1083 // Now that we ordered and optimized the expressions, splat them back into 1084 // the expression tree, removing any unneeded nodes. 1085 RewriteExprTree(I, Ops); 1086 return I; 1087} 1088 1089 1090bool Reassociate::runOnFunction(Function &F) { 1091 // Recalculate the rank map for F 1092 BuildRankMap(F); 1093 1094 MadeChange = false; 1095 for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) 1096 ReassociateBB(FI); 1097 1098 // Now that we're done, delete any instructions which are no longer used. 1099 while (!DeadInsts.empty()) 1100 if (Instruction *Inst = 1101 cast_or_null<Instruction>((Value *)DeadInsts.pop_back_val())) 1102 RecursivelyDeleteTriviallyDeadInstructions(Inst); 1103 1104 // We are done with the rank map. 1105 RankMap.clear(); 1106 ValueRankMap.clear(); 1107 return MadeChange; 1108} 1109 1110