Reassociate.cpp revision 0fd120b970fe9a036ae664ad1bfbf04e55b3b8a7
1//===- Reassociate.cpp - Reassociate binary expressions -------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass reassociates commutative expressions in an order that is designed
11// to promote better constant propagation, GCSE, LICM, PRE, etc.
12//
13// For example: 4 + (x + 5) -> x + (4 + 5)
14//
15// In the implementation of this algorithm, constants are assigned rank = 0,
16// function arguments are rank = 1, and other values are assigned ranks
17// corresponding to the reverse post order traversal of current function
18// (starting at 2), which effectively gives values in deep loops higher rank
19// than values not in loops.
20//
21//===----------------------------------------------------------------------===//
22
23#define DEBUG_TYPE "reassociate"
24#include "llvm/Transforms/Scalar.h"
25#include "llvm/Transforms/Utils/Local.h"
26#include "llvm/Constants.h"
27#include "llvm/DerivedTypes.h"
28#include "llvm/Function.h"
29#include "llvm/Instructions.h"
30#include "llvm/IntrinsicInst.h"
31#include "llvm/Pass.h"
32#include "llvm/Assembly/Writer.h"
33#include "llvm/Support/CFG.h"
34#include "llvm/Support/IRBuilder.h"
35#include "llvm/Support/Debug.h"
36#include "llvm/Support/ValueHandle.h"
37#include "llvm/Support/raw_ostream.h"
38#include "llvm/ADT/DenseMap.h"
39#include "llvm/ADT/PostOrderIterator.h"
40#include "llvm/ADT/SmallMap.h"
41#include "llvm/ADT/STLExtras.h"
42#include "llvm/ADT/Statistic.h"
43#include <algorithm>
44using namespace llvm;
45
46STATISTIC(NumChanged, "Number of insts reassociated");
47STATISTIC(NumAnnihil, "Number of expr tree annihilated");
48STATISTIC(NumFactor , "Number of multiplies factored");
49
50namespace {
51  struct ValueEntry {
52    unsigned Rank;
53    Value *Op;
54    ValueEntry(unsigned R, Value *O) : Rank(R), Op(O) {}
55  };
56  inline bool operator<(const ValueEntry &LHS, const ValueEntry &RHS) {
57    return LHS.Rank > RHS.Rank;   // Sort so that highest rank goes to start.
58  }
59}
60
61#ifndef NDEBUG
62/// PrintOps - Print out the expression identified in the Ops list.
63///
64static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
65  Module *M = I->getParent()->getParent()->getParent();
66  dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
67       << *Ops[0].Op->getType() << '\t';
68  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
69    dbgs() << "[ ";
70    WriteAsOperand(dbgs(), Ops[i].Op, false, M);
71    dbgs() << ", #" << Ops[i].Rank << "] ";
72  }
73}
74#endif
75
76namespace {
77  /// \brief Utility class representing a base and exponent pair which form one
78  /// factor of some product.
79  struct Factor {
80    Value *Base;
81    unsigned Power;
82
83    Factor(Value *Base, unsigned Power) : Base(Base), Power(Power) {}
84
85    /// \brief Sort factors by their Base.
86    struct BaseSorter {
87      bool operator()(const Factor &LHS, const Factor &RHS) {
88        return LHS.Base < RHS.Base;
89      }
90    };
91
92    /// \brief Compare factors for equal bases.
93    struct BaseEqual {
94      bool operator()(const Factor &LHS, const Factor &RHS) {
95        return LHS.Base == RHS.Base;
96      }
97    };
98
99    /// \brief Sort factors in descending order by their power.
100    struct PowerDescendingSorter {
101      bool operator()(const Factor &LHS, const Factor &RHS) {
102        return LHS.Power > RHS.Power;
103      }
104    };
105
106    /// \brief Compare factors for equal powers.
107    struct PowerEqual {
108      bool operator()(const Factor &LHS, const Factor &RHS) {
109        return LHS.Power == RHS.Power;
110      }
111    };
112  };
113}
114
115namespace {
116  class Reassociate : public FunctionPass {
117    DenseMap<BasicBlock*, unsigned> RankMap;
118    DenseMap<AssertingVH<Value>, unsigned> ValueRankMap;
119    SmallVector<WeakVH, 8> RedoInsts;
120    SmallVector<WeakVH, 8> DeadInsts;
121    bool MadeChange;
122  public:
123    static char ID; // Pass identification, replacement for typeid
124    Reassociate() : FunctionPass(ID) {
125      initializeReassociatePass(*PassRegistry::getPassRegistry());
126    }
127
128    bool runOnFunction(Function &F);
129
130    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
131      AU.setPreservesCFG();
132    }
133  private:
134    void BuildRankMap(Function &F);
135    unsigned getRank(Value *V);
136    Value *ReassociateExpression(BinaryOperator *I);
137    void RewriteExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops);
138    Value *OptimizeExpression(BinaryOperator *I,
139                              SmallVectorImpl<ValueEntry> &Ops);
140    Value *OptimizeAdd(Instruction *I, SmallVectorImpl<ValueEntry> &Ops);
141    bool collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops,
142                                SmallVectorImpl<Factor> &Factors);
143    Value *buildMinimalMultiplyDAG(IRBuilder<> &Builder,
144                                   SmallVectorImpl<Factor> &Factors);
145    Value *OptimizeMul(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops);
146    void LinearizeExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops);
147    Value *RemoveFactorFromExpression(Value *V, Value *Factor);
148    void ReassociateInst(BasicBlock::iterator &BBI);
149
150    void RemoveDeadBinaryOp(Value *V);
151  };
152}
153
154char Reassociate::ID = 0;
155INITIALIZE_PASS(Reassociate, "reassociate",
156                "Reassociate expressions", false, false)
157
158// Public interface to the Reassociate pass
159FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
160
161/// isReassociableOp - Return true if V is an instruction of the specified
162/// opcode and if it only has one use.
163static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
164  if (V->hasOneUse() && isa<Instruction>(V) &&
165      cast<Instruction>(V)->getOpcode() == Opcode)
166    return cast<BinaryOperator>(V);
167  return 0;
168}
169
170void Reassociate::RemoveDeadBinaryOp(Value *V) {
171  BinaryOperator *Op = dyn_cast<BinaryOperator>(V);
172  if (!Op)
173    return;
174
175  ValueRankMap.erase(Op);
176  DeadInsts.push_back(Op);
177
178  BinaryOperator *LHS = isReassociableOp(Op->getOperand(0), Op->getOpcode());
179  BinaryOperator *RHS = isReassociableOp(Op->getOperand(1), Op->getOpcode());
180  Op->setOperand(0, UndefValue::get(Op->getType()));
181  Op->setOperand(1, UndefValue::get(Op->getType()));
182
183  if (LHS)
184    RemoveDeadBinaryOp(LHS);
185  if (RHS)
186    RemoveDeadBinaryOp(RHS);
187}
188
189static bool isUnmovableInstruction(Instruction *I) {
190  if (I->getOpcode() == Instruction::PHI ||
191      I->getOpcode() == Instruction::LandingPad ||
192      I->getOpcode() == Instruction::Alloca ||
193      I->getOpcode() == Instruction::Load ||
194      I->getOpcode() == Instruction::Invoke ||
195      (I->getOpcode() == Instruction::Call &&
196       !isa<DbgInfoIntrinsic>(I)) ||
197      I->getOpcode() == Instruction::UDiv ||
198      I->getOpcode() == Instruction::SDiv ||
199      I->getOpcode() == Instruction::FDiv ||
200      I->getOpcode() == Instruction::URem ||
201      I->getOpcode() == Instruction::SRem ||
202      I->getOpcode() == Instruction::FRem)
203    return true;
204  return false;
205}
206
207void Reassociate::BuildRankMap(Function &F) {
208  unsigned i = 2;
209
210  // Assign distinct ranks to function arguments
211  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
212    ValueRankMap[&*I] = ++i;
213
214  ReversePostOrderTraversal<Function*> RPOT(&F);
215  for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(),
216         E = RPOT.end(); I != E; ++I) {
217    BasicBlock *BB = *I;
218    unsigned BBRank = RankMap[BB] = ++i << 16;
219
220    // Walk the basic block, adding precomputed ranks for any instructions that
221    // we cannot move.  This ensures that the ranks for these instructions are
222    // all different in the block.
223    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
224      if (isUnmovableInstruction(I))
225        ValueRankMap[&*I] = ++BBRank;
226  }
227}
228
229unsigned Reassociate::getRank(Value *V) {
230  Instruction *I = dyn_cast<Instruction>(V);
231  if (I == 0) {
232    if (isa<Argument>(V)) return ValueRankMap[V];   // Function argument.
233    return 0;  // Otherwise it's a global or constant, rank 0.
234  }
235
236  if (unsigned Rank = ValueRankMap[I])
237    return Rank;    // Rank already known?
238
239  // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that
240  // we can reassociate expressions for code motion!  Since we do not recurse
241  // for PHI nodes, we cannot have infinite recursion here, because there
242  // cannot be loops in the value graph that do not go through PHI nodes.
243  unsigned Rank = 0, MaxRank = RankMap[I->getParent()];
244  for (unsigned i = 0, e = I->getNumOperands();
245       i != e && Rank != MaxRank; ++i)
246    Rank = std::max(Rank, getRank(I->getOperand(i)));
247
248  // If this is a not or neg instruction, do not count it for rank.  This
249  // assures us that X and ~X will have the same rank.
250  if (!I->getType()->isIntegerTy() ||
251      (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I)))
252    ++Rank;
253
254  //DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = "
255  //     << Rank << "\n");
256
257  return ValueRankMap[I] = Rank;
258}
259
260/// LowerNegateToMultiply - Replace 0-X with X*-1.
261///
262static BinaryOperator *LowerNegateToMultiply(Instruction *Neg,
263                         DenseMap<AssertingVH<Value>, unsigned> &ValueRankMap) {
264  Constant *Cst = Constant::getAllOnesValue(Neg->getType());
265
266  BinaryOperator *Res =
267    BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg);
268  ValueRankMap.erase(Neg);
269  Res->takeName(Neg);
270  Neg->replaceAllUsesWith(Res);
271  Res->setDebugLoc(Neg->getDebugLoc());
272  Neg->eraseFromParent();
273  return Res;
274}
275
276/// LinearizeExprTree - Given an associative binary expression, return the leaf
277/// nodes in Ops.  The original expression is the same as Ops[0] op ... Ops[N].
278/// Note that a node may occur multiple times in Ops, but if so all occurrences
279/// are consecutive in the vector.
280///
281/// A leaf node is either not a binary operation of the same kind as the root
282/// node 'I' (i.e. is not a binary operator at all, or is, but with a different
283/// opcode), or is the same kind of binary operator but has a use which either
284/// does not belong to the expression, or does belong to the expression but is
285/// a leaf node.  Every leaf node has at least one use that is a non-leaf node
286/// of the expression, while for non-leaf nodes (except for the root 'I') every
287/// use is a non-leaf node of the expression.
288///
289/// For example:
290///           expression graph        node names
291///
292///                     +        |        I
293///                    / \       |
294///                   +   +      |      A,  B
295///                  / \ / \     |
296///                 *   +   *    |    C,  D,  E
297///                / \ / \ / \   |
298///                   +   *      |      F,  G
299///
300/// The leaf nodes are C, E, F and G.  The Ops array will contain (maybe not in
301/// that order) C, E, F, F, G, G.
302///
303/// The expression is maximal: if some instruction is a binary operator of the
304/// same kind as 'I', and all of its uses are non-leaf nodes of the expression,
305/// then the instruction also belongs to the expression, is not a leaf node of
306/// it, and its operands also belong to the expression (but may be leaf nodes).
307///
308/// NOTE: This routine will set operands of non-leaf non-root nodes to undef in
309/// order to ensure that every non-root node in the expression has *exactly one*
310/// use by a non-leaf node of the expression.  This destruction means that the
311/// caller MUST use something like RewriteExprTree to put the values back in.
312///
313/// In the above example either the right operand of A or the left operand of B
314/// will be replaced by undef.  If it is B's operand then this gives:
315///
316///                     +        |        I
317///                    / \       |
318///                   +   +      |      A,  B - operand of B replaced with undef
319///                  / \   \     |
320///                 *   +   *    |    C,  D,  E
321///                / \ / \ / \   |
322///                   +   *      |      F,  G
323///
324/// Note that if you visit operands recursively starting from a leaf node then
325/// you will never encounter such an undef operand unless you get back to 'I',
326/// which requires passing through a phi node.
327///
328/// Note that this routine may also mutate binary operators of the wrong type
329/// that have all uses inside the expression (i.e. only used by non-leaf nodes
330/// of the expression) if it can turn them into binary operators of the right
331/// type and thus make the expression bigger.
332
333void Reassociate::LinearizeExprTree(BinaryOperator *I,
334                                    SmallVectorImpl<ValueEntry> &Ops) {
335  DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
336
337  // Visit all operands of the expression, keeping track of their weight (the
338  // number of paths from the expression root to the operand, or if you like
339  // the number of times that operand occurs in the linearized expression).
340  // For example, if I = X + A, where X = A + B, then I, X and B have weight 1
341  // while A has weight two.
342
343  // Worklist of non-leaf nodes (their operands are in the expression too) along
344  // with their weights, representing a certain number of paths to the operator.
345  // If an operator occurs in the worklist multiple times then we found multiple
346  // ways to get to it.
347  SmallVector<std::pair<BinaryOperator*, unsigned>, 8> Worklist; // (Op, Weight)
348  Worklist.push_back(std::make_pair(I, 1));
349  unsigned Opcode = I->getOpcode();
350
351  // Leaves of the expression are values that either aren't the right kind of
352  // operation (eg: a constant, or a multiply in an add tree), or are, but have
353  // some uses that are not inside the expression.  For example, in I = X + X,
354  // X = A + B, the value X has two uses (by I) that are in the expression.  If
355  // X has any other uses, for example in a return instruction, then we consider
356  // X to be a leaf, and won't analyze it further.  When we first visit a value,
357  // if it has more than one use then at first we conservatively consider it to
358  // be a leaf.  Later, as the expression is explored, we may discover some more
359  // uses of the value from inside the expression.  If all uses turn out to be
360  // from within the expression (and the value is a binary operator of the right
361  // kind) then the value is no longer considered to be a leaf, and its operands
362  // are explored.
363
364  // Leaves - Keeps track of the set of putative leaves as well as the number of
365  // paths to each leaf seen so far.
366  typedef SmallMap<Value*, unsigned, 8> LeafMap;
367  LeafMap Leaves; // Leaf -> Total weight so far.
368  SmallVector<Value*, 8> LeafOrder; // Ensure deterministic leaf output order.
369
370#ifndef NDEBUG
371  SmallPtrSet<Value*, 8> Visited; // For sanity checking the iteration scheme.
372#endif
373  while (!Worklist.empty()) {
374    std::pair<BinaryOperator*, unsigned> P = Worklist.pop_back_val();
375    I = P.first; // We examine the operands of this binary operator.
376    assert(P.second >= 1 && "No paths to here, so how did we get here?!");
377
378    for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) { // Visit operands.
379      Value *Op = I->getOperand(OpIdx);
380      unsigned Weight = P.second; // Number of paths to this operand.
381      DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n");
382      assert(!Op->use_empty() && "No uses, so how did we get to it?!");
383
384      // If this is a binary operation of the right kind with only one use then
385      // add its operands to the expression.
386      if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
387        assert(Visited.insert(Op) && "Not first visit!");
388        DEBUG(dbgs() << "DIRECT ADD: " << *Op << " (" << Weight << ")\n");
389        Worklist.push_back(std::make_pair(BO, Weight));
390        continue;
391      }
392
393      // Appears to be a leaf.  Is the operand already in the set of leaves?
394      LeafMap::iterator It = Leaves.find(Op);
395      if (It == Leaves.end()) {
396        // Not in the leaf map.  Must be the first time we saw this operand.
397        assert(Visited.insert(Op) && "Not first visit!");
398        if (!Op->hasOneUse()) {
399          // This value has uses not accounted for by the expression, so it is
400          // not safe to modify.  Mark it as being a leaf.
401          DEBUG(dbgs() << "ADD USES LEAF: " << *Op << " (" << Weight << ")\n");
402          LeafOrder.push_back(Op);
403          Leaves[Op] = Weight;
404          continue;
405        }
406        // No uses outside the expression, try morphing it.
407      } else if (It != Leaves.end()) {
408        // Already in the leaf map.
409        assert(Visited.count(Op) && "In leaf map but not visited!");
410
411        // Update the number of paths to the leaf.
412        It->second += Weight;
413
414        // The leaf already has one use from inside the expression.  As we want
415        // exactly one such use, drop this new use of the leaf.
416        assert(!Op->hasOneUse() && "Only one use, but we got here twice!");
417        I->setOperand(OpIdx, UndefValue::get(I->getType()));
418        MadeChange = true;
419
420        // If the leaf is a binary operation of the right kind and we now see
421        // that its multiple original uses were in fact all by nodes belonging
422        // to the expression, then no longer consider it to be a leaf and add
423        // its operands to the expression.
424        if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
425          DEBUG(dbgs() << "UNLEAF: " << *Op << " (" << It->second << ")\n");
426          Worklist.push_back(std::make_pair(BO, It->second));
427          Leaves.erase(It);
428          continue;
429        }
430
431        // If we still have uses that are not accounted for by the expression
432        // then it is not safe to modify the value.
433        if (!Op->hasOneUse())
434          continue;
435
436        // No uses outside the expression, try morphing it.
437        Weight = It->second;
438        Leaves.erase(It); // Since the value may be morphed below.
439      }
440
441      // At this point we have a value which, first of all, is not a binary
442      // expression of the right kind, and secondly, is only used inside the
443      // expression.  This means that it can safely be modified.  See if we
444      // can usefully morph it into an expression of the right kind.
445      assert((!isa<Instruction>(Op) ||
446              cast<Instruction>(Op)->getOpcode() != Opcode) &&
447             "Should have been handled above!");
448      assert(Op->hasOneUse() && "Has uses outside the expression tree!");
449
450      // If this is a multiply expression, turn any internal negations into
451      // multiplies by -1 so they can be reassociated.
452      BinaryOperator *BO = dyn_cast<BinaryOperator>(Op);
453      if (Opcode == Instruction::Mul && BO && BinaryOperator::isNeg(BO)) {
454        DEBUG(dbgs() << "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
455        BO = LowerNegateToMultiply(BO, ValueRankMap);
456        DEBUG(dbgs() << *BO << 'n');
457        Worklist.push_back(std::make_pair(BO, Weight));
458        MadeChange = true;
459        continue;
460      }
461
462      // Failed to morph into an expression of the right type.  This really is
463      // a leaf.
464      DEBUG(dbgs() << "ADD LEAF: " << *Op << " (" << Weight << ")\n");
465      assert(!isReassociableOp(Op, Opcode) && "Value was morphed?");
466      LeafOrder.push_back(Op);
467      Leaves[Op] = Weight;
468    }
469  }
470
471  // The leaves, repeated according to their weights, represent the linearized
472  // form of the expression.
473  for (unsigned i = 0, e = LeafOrder.size(); i != e; ++i) {
474    Value *V = LeafOrder[i];
475    LeafMap::iterator It = Leaves.find(V);
476    if (It == Leaves.end())
477      // Leaf already output, or node initially thought to be a leaf wasn't.
478      continue;
479    assert(!isReassociableOp(V, Opcode) && "Shouldn't be a leaf!");
480    unsigned Weight = It->second;
481    assert(Weight > 0 && "No paths to this value!");
482    // FIXME: Rather than repeating values Weight times, use a vector of
483    // (ValueEntry, multiplicity) pairs.
484    Ops.append(Weight, ValueEntry(getRank(V), V));
485    // Ensure the leaf is only output once.
486    Leaves.erase(It);
487  }
488}
489
490// RewriteExprTree - Now that the operands for this expression tree are
491// linearized and optimized, emit them in-order.
492void Reassociate::RewriteExprTree(BinaryOperator *I,
493                                  SmallVectorImpl<ValueEntry> &Ops) {
494  assert(Ops.size() > 1 && "Single values should be used directly!");
495
496  // Since our optimizations never increase the number of operations, the new
497  // expression can always be written by reusing the existing binary operators
498  // from the original expression tree, without creating any new instructions,
499  // though the rewritten expression may have a completely different topology.
500  // We take care to not change anything if the new expression will be the same
501  // as the original.  If more than trivial changes (like commuting operands)
502  // were made then we are obliged to clear out any optional subclass data like
503  // nsw flags.
504
505  /// NodesToRewrite - Nodes from the original expression available for writing
506  /// the new expression into.
507  SmallVector<BinaryOperator*, 8> NodesToRewrite;
508  unsigned Opcode = I->getOpcode();
509  NodesToRewrite.push_back(I);
510
511  // ExpressionChanged - Whether the rewritten expression differs non-trivially
512  // from the original, requiring the clearing of all optional flags.
513  bool ExpressionChanged = false;
514  BinaryOperator *Previous;
515  BinaryOperator *Op = 0;
516  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
517    assert(!NodesToRewrite.empty() &&
518           "Optimized expressions has more nodes than original!");
519    Previous = Op; Op = NodesToRewrite.pop_back_val();
520    // Compactify the tree instructions together with each other to guarantee
521    // that the expression tree is dominated by all of Ops.
522    if (Previous)
523      Op->moveBefore(Previous);
524
525    // The last operation (which comes earliest in the IR) is special as both
526    // operands will come from Ops, rather than just one with the other being
527    // a subexpression.
528    if (i+2 == Ops.size()) {
529      Value *NewLHS = Ops[i].Op;
530      Value *NewRHS = Ops[i+1].Op;
531      Value *OldLHS = Op->getOperand(0);
532      Value *OldRHS = Op->getOperand(1);
533
534      if (NewLHS == OldLHS && NewRHS == OldRHS)
535        // Nothing changed, leave it alone.
536        break;
537
538      if (NewLHS == OldRHS && NewRHS == OldLHS) {
539        // The order of the operands was reversed.  Swap them.
540        DEBUG(dbgs() << "RA: " << *Op << '\n');
541        Op->swapOperands();
542        DEBUG(dbgs() << "TO: " << *Op << '\n');
543        MadeChange = true;
544        ++NumChanged;
545        break;
546      }
547
548      // The new operation differs non-trivially from the original. Overwrite
549      // the old operands with the new ones.
550      DEBUG(dbgs() << "RA: " << *Op << '\n');
551      if (NewLHS != OldLHS) {
552        if (BinaryOperator *BO = isReassociableOp(OldLHS, Opcode))
553          NodesToRewrite.push_back(BO);
554        Op->setOperand(0, NewLHS);
555      }
556      if (NewRHS != OldRHS) {
557        if (BinaryOperator *BO = isReassociableOp(OldRHS, Opcode))
558          NodesToRewrite.push_back(BO);
559        Op->setOperand(1, NewRHS);
560      }
561      DEBUG(dbgs() << "TO: " << *Op << '\n');
562
563      ExpressionChanged = true;
564      MadeChange = true;
565      ++NumChanged;
566
567      break;
568    }
569
570    // Not the last operation.  The left-hand side will be a sub-expression
571    // while the right-hand side will be the current element of Ops.
572    Value *NewRHS = Ops[i].Op;
573    if (NewRHS != Op->getOperand(1)) {
574      DEBUG(dbgs() << "RA: " << *Op << '\n');
575      if (NewRHS == Op->getOperand(0)) {
576        // The new right-hand side was already present as the left operand.  If
577        // we are lucky then swapping the operands will sort out both of them.
578        Op->swapOperands();
579      } else {
580        // Overwrite with the new right-hand side.
581        if (BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode))
582          NodesToRewrite.push_back(BO);
583        Op->setOperand(1, NewRHS);
584        ExpressionChanged = true;
585      }
586      DEBUG(dbgs() << "TO: " << *Op << '\n');
587      MadeChange = true;
588      ++NumChanged;
589    }
590
591    // Now deal with the left-hand side.  If this is already an operation node
592    // from the original expression then just rewrite the rest of the expression
593    // into it.
594    if (BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode)) {
595      NodesToRewrite.push_back(BO);
596      continue;
597    }
598
599    // Otherwise, grab a spare node from the original expression and use that as
600    // the left-hand side.
601    assert(!NodesToRewrite.empty() &&
602           "Optimized expressions has more nodes than original!");
603    DEBUG(dbgs() << "RA: " << *Op << '\n');
604    Op->setOperand(0, NodesToRewrite.back());
605    DEBUG(dbgs() << "TO: " << *Op << '\n');
606    ExpressionChanged = true;
607    MadeChange = true;
608    ++NumChanged;
609  }
610
611  // If the expression changed non-trivially then clear out all subclass data in
612  // the entire rewritten expression.
613  if (ExpressionChanged) {
614    do {
615      Op->clearSubclassOptionalData();
616      if (Op == I)
617        break;
618      Op = cast<BinaryOperator>(*Op->use_begin());
619    } while (1);
620  }
621
622  // Throw away any left over nodes from the original expression.
623  for (unsigned i = 0, e = NodesToRewrite.size(); i != e; ++i)
624    RemoveDeadBinaryOp(NodesToRewrite[i]);
625}
626
627/// NegateValue - Insert instructions before the instruction pointed to by BI,
628/// that computes the negative version of the value specified.  The negative
629/// version of the value is returned, and BI is left pointing at the instruction
630/// that should be processed next by the reassociation pass.
631static Value *NegateValue(Value *V, Instruction *BI) {
632  if (Constant *C = dyn_cast<Constant>(V))
633    return ConstantExpr::getNeg(C);
634
635  // We are trying to expose opportunity for reassociation.  One of the things
636  // that we want to do to achieve this is to push a negation as deep into an
637  // expression chain as possible, to expose the add instructions.  In practice,
638  // this means that we turn this:
639  //   X = -(A+12+C+D)   into    X = -A + -12 + -C + -D = -12 + -A + -C + -D
640  // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate
641  // the constants.  We assume that instcombine will clean up the mess later if
642  // we introduce tons of unnecessary negation instructions.
643  //
644  if (BinaryOperator *I = isReassociableOp(V, Instruction::Add)) {
645    // Push the negates through the add.
646    I->setOperand(0, NegateValue(I->getOperand(0), BI));
647    I->setOperand(1, NegateValue(I->getOperand(1), BI));
648
649    // We must move the add instruction here, because the neg instructions do
650    // not dominate the old add instruction in general.  By moving it, we are
651    // assured that the neg instructions we just inserted dominate the
652    // instruction we are about to insert after them.
653    //
654    I->moveBefore(BI);
655    I->setName(I->getName()+".neg");
656    return I;
657  }
658
659  // Okay, we need to materialize a negated version of V with an instruction.
660  // Scan the use lists of V to see if we have one already.
661  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
662    User *U = *UI;
663    if (!BinaryOperator::isNeg(U)) continue;
664
665    // We found one!  Now we have to make sure that the definition dominates
666    // this use.  We do this by moving it to the entry block (if it is a
667    // non-instruction value) or right after the definition.  These negates will
668    // be zapped by reassociate later, so we don't need much finesse here.
669    BinaryOperator *TheNeg = cast<BinaryOperator>(U);
670
671    // Verify that the negate is in this function, V might be a constant expr.
672    if (TheNeg->getParent()->getParent() != BI->getParent()->getParent())
673      continue;
674
675    BasicBlock::iterator InsertPt;
676    if (Instruction *InstInput = dyn_cast<Instruction>(V)) {
677      if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) {
678        InsertPt = II->getNormalDest()->begin();
679      } else {
680        InsertPt = InstInput;
681        ++InsertPt;
682      }
683      while (isa<PHINode>(InsertPt)) ++InsertPt;
684    } else {
685      InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin();
686    }
687    TheNeg->moveBefore(InsertPt);
688    return TheNeg;
689  }
690
691  // Insert a 'neg' instruction that subtracts the value from zero to get the
692  // negation.
693  return BinaryOperator::CreateNeg(V, V->getName() + ".neg", BI);
694}
695
696/// ShouldBreakUpSubtract - Return true if we should break up this subtract of
697/// X-Y into (X + -Y).
698static bool ShouldBreakUpSubtract(Instruction *Sub) {
699  // If this is a negation, we can't split it up!
700  if (BinaryOperator::isNeg(Sub))
701    return false;
702
703  // Don't bother to break this up unless either the LHS is an associable add or
704  // subtract or if this is only used by one.
705  if (isReassociableOp(Sub->getOperand(0), Instruction::Add) ||
706      isReassociableOp(Sub->getOperand(0), Instruction::Sub))
707    return true;
708  if (isReassociableOp(Sub->getOperand(1), Instruction::Add) ||
709      isReassociableOp(Sub->getOperand(1), Instruction::Sub))
710    return true;
711  if (Sub->hasOneUse() &&
712      (isReassociableOp(Sub->use_back(), Instruction::Add) ||
713       isReassociableOp(Sub->use_back(), Instruction::Sub)))
714    return true;
715
716  return false;
717}
718
719/// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is
720/// only used by an add, transform this into (X+(0-Y)) to promote better
721/// reassociation.
722static Instruction *BreakUpSubtract(Instruction *Sub,
723                         DenseMap<AssertingVH<Value>, unsigned> &ValueRankMap) {
724  // Convert a subtract into an add and a neg instruction. This allows sub
725  // instructions to be commuted with other add instructions.
726  //
727  // Calculate the negative value of Operand 1 of the sub instruction,
728  // and set it as the RHS of the add instruction we just made.
729  //
730  Value *NegVal = NegateValue(Sub->getOperand(1), Sub);
731  Instruction *New =
732    BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub);
733  New->takeName(Sub);
734
735  // Everyone now refers to the add instruction.
736  ValueRankMap.erase(Sub);
737  Sub->replaceAllUsesWith(New);
738  New->setDebugLoc(Sub->getDebugLoc());
739  Sub->eraseFromParent();
740
741  DEBUG(dbgs() << "Negated: " << *New << '\n');
742  return New;
743}
744
745/// ConvertShiftToMul - If this is a shift of a reassociable multiply or is used
746/// by one, change this into a multiply by a constant to assist with further
747/// reassociation.
748static Instruction *ConvertShiftToMul(Instruction *Shl,
749                         DenseMap<AssertingVH<Value>, unsigned> &ValueRankMap) {
750  // If an operand of this shift is a reassociable multiply, or if the shift
751  // is used by a reassociable multiply or add, turn into a multiply.
752  if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) ||
753      (Shl->hasOneUse() &&
754       (isReassociableOp(Shl->use_back(), Instruction::Mul) ||
755        isReassociableOp(Shl->use_back(), Instruction::Add)))) {
756    Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
757    MulCst = ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1)));
758
759    Instruction *Mul =
760      BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl);
761    ValueRankMap.erase(Shl);
762    Mul->takeName(Shl);
763    Shl->replaceAllUsesWith(Mul);
764    Mul->setDebugLoc(Shl->getDebugLoc());
765    Shl->eraseFromParent();
766    return Mul;
767  }
768  return 0;
769}
770
771/// FindInOperandList - Scan backwards and forwards among values with the same
772/// rank as element i to see if X exists.  If X does not exist, return i.  This
773/// is useful when scanning for 'x' when we see '-x' because they both get the
774/// same rank.
775static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i,
776                                  Value *X) {
777  unsigned XRank = Ops[i].Rank;
778  unsigned e = Ops.size();
779  for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j)
780    if (Ops[j].Op == X)
781      return j;
782  // Scan backwards.
783  for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j)
784    if (Ops[j].Op == X)
785      return j;
786  return i;
787}
788
789/// EmitAddTreeOfValues - Emit a tree of add instructions, summing Ops together
790/// and returning the result.  Insert the tree before I.
791static Value *EmitAddTreeOfValues(Instruction *I,
792                                  SmallVectorImpl<WeakVH> &Ops){
793  if (Ops.size() == 1) return Ops.back();
794
795  Value *V1 = Ops.back();
796  Ops.pop_back();
797  Value *V2 = EmitAddTreeOfValues(I, Ops);
798  return BinaryOperator::CreateAdd(V2, V1, "tmp", I);
799}
800
801/// RemoveFactorFromExpression - If V is an expression tree that is a
802/// multiplication sequence, and if this sequence contains a multiply by Factor,
803/// remove Factor from the tree and return the new tree.
804Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
805  BinaryOperator *BO = isReassociableOp(V, Instruction::Mul);
806  if (!BO) return 0;
807
808  SmallVector<ValueEntry, 8> Factors;
809  LinearizeExprTree(BO, Factors);
810
811  bool FoundFactor = false;
812  bool NeedsNegate = false;
813  for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
814    if (Factors[i].Op == Factor) {
815      FoundFactor = true;
816      Factors.erase(Factors.begin()+i);
817      break;
818    }
819
820    // If this is a negative version of this factor, remove it.
821    if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor))
822      if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op))
823        if (FC1->getValue() == -FC2->getValue()) {
824          FoundFactor = NeedsNegate = true;
825          Factors.erase(Factors.begin()+i);
826          break;
827        }
828  }
829
830  if (!FoundFactor) {
831    // Make sure to restore the operands to the expression tree.
832    RewriteExprTree(BO, Factors);
833    return 0;
834  }
835
836  BasicBlock::iterator InsertPt = BO; ++InsertPt;
837
838  // If this was just a single multiply, remove the multiply and return the only
839  // remaining operand.
840  if (Factors.size() == 1) {
841    RemoveDeadBinaryOp(BO);
842    V = Factors[0].Op;
843  } else {
844    RewriteExprTree(BO, Factors);
845    V = BO;
846  }
847
848  if (NeedsNegate)
849    V = BinaryOperator::CreateNeg(V, "neg", InsertPt);
850
851  return V;
852}
853
854/// FindSingleUseMultiplyFactors - If V is a single-use multiply, recursively
855/// add its operands as factors, otherwise add V to the list of factors.
856///
857/// Ops is the top-level list of add operands we're trying to factor.
858static void FindSingleUseMultiplyFactors(Value *V,
859                                         SmallVectorImpl<Value*> &Factors,
860                                       const SmallVectorImpl<ValueEntry> &Ops) {
861  BinaryOperator *BO = isReassociableOp(V, Instruction::Mul);
862  if (!BO) {
863    Factors.push_back(V);
864    return;
865  }
866
867  // Otherwise, add the LHS and RHS to the list of factors.
868  FindSingleUseMultiplyFactors(BO->getOperand(1), Factors, Ops);
869  FindSingleUseMultiplyFactors(BO->getOperand(0), Factors, Ops);
870}
871
872/// OptimizeAndOrXor - Optimize a series of operands to an 'and', 'or', or 'xor'
873/// instruction.  This optimizes based on identities.  If it can be reduced to
874/// a single Value, it is returned, otherwise the Ops list is mutated as
875/// necessary.
876static Value *OptimizeAndOrXor(unsigned Opcode,
877                               SmallVectorImpl<ValueEntry> &Ops) {
878  // Scan the operand lists looking for X and ~X pairs, along with X,X pairs.
879  // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1.
880  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
881    // First, check for X and ~X in the operand list.
882    assert(i < Ops.size());
883    if (BinaryOperator::isNot(Ops[i].Op)) {    // Cannot occur for ^.
884      Value *X = BinaryOperator::getNotArgument(Ops[i].Op);
885      unsigned FoundX = FindInOperandList(Ops, i, X);
886      if (FoundX != i) {
887        if (Opcode == Instruction::And)   // ...&X&~X = 0
888          return Constant::getNullValue(X->getType());
889
890        if (Opcode == Instruction::Or)    // ...|X|~X = -1
891          return Constant::getAllOnesValue(X->getType());
892      }
893    }
894
895    // Next, check for duplicate pairs of values, which we assume are next to
896    // each other, due to our sorting criteria.
897    assert(i < Ops.size());
898    if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) {
899      if (Opcode == Instruction::And || Opcode == Instruction::Or) {
900        // Drop duplicate values for And and Or.
901        Ops.erase(Ops.begin()+i);
902        --i; --e;
903        ++NumAnnihil;
904        continue;
905      }
906
907      // Drop pairs of values for Xor.
908      assert(Opcode == Instruction::Xor);
909      if (e == 2)
910        return Constant::getNullValue(Ops[0].Op->getType());
911
912      // Y ^ X^X -> Y
913      Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
914      i -= 1; e -= 2;
915      ++NumAnnihil;
916    }
917  }
918  return 0;
919}
920
921/// OptimizeAdd - Optimize a series of operands to an 'add' instruction.  This
922/// optimizes based on identities.  If it can be reduced to a single Value, it
923/// is returned, otherwise the Ops list is mutated as necessary.
924Value *Reassociate::OptimizeAdd(Instruction *I,
925                                SmallVectorImpl<ValueEntry> &Ops) {
926  // Scan the operand lists looking for X and -X pairs.  If we find any, we
927  // can simplify the expression. X+-X == 0.  While we're at it, scan for any
928  // duplicates.  We want to canonicalize Y+Y+Y+Z -> 3*Y+Z.
929  //
930  // TODO: We could handle "X + ~X" -> "-1" if we wanted, since "-X = ~X+1".
931  //
932  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
933    Value *TheOp = Ops[i].Op;
934    // Check to see if we've seen this operand before.  If so, we factor all
935    // instances of the operand together.  Due to our sorting criteria, we know
936    // that these need to be next to each other in the vector.
937    if (i+1 != Ops.size() && Ops[i+1].Op == TheOp) {
938      // Rescan the list, remove all instances of this operand from the expr.
939      unsigned NumFound = 0;
940      do {
941        Ops.erase(Ops.begin()+i);
942        ++NumFound;
943      } while (i != Ops.size() && Ops[i].Op == TheOp);
944
945      DEBUG(errs() << "\nFACTORING [" << NumFound << "]: " << *TheOp << '\n');
946      ++NumFactor;
947
948      // Insert a new multiply.
949      Value *Mul = ConstantInt::get(cast<IntegerType>(I->getType()), NumFound);
950      Mul = BinaryOperator::CreateMul(TheOp, Mul, "factor", I);
951
952      // Now that we have inserted a multiply, optimize it. This allows us to
953      // handle cases that require multiple factoring steps, such as this:
954      // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6
955      RedoInsts.push_back(Mul);
956
957      // If every add operand was a duplicate, return the multiply.
958      if (Ops.empty())
959        return Mul;
960
961      // Otherwise, we had some input that didn't have the dupe, such as
962      // "A + A + B" -> "A*2 + B".  Add the new multiply to the list of
963      // things being added by this operation.
964      Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul));
965
966      --i;
967      e = Ops.size();
968      continue;
969    }
970
971    // Check for X and -X in the operand list.
972    if (!BinaryOperator::isNeg(TheOp))
973      continue;
974
975    Value *X = BinaryOperator::getNegArgument(TheOp);
976    unsigned FoundX = FindInOperandList(Ops, i, X);
977    if (FoundX == i)
978      continue;
979
980    // Remove X and -X from the operand list.
981    if (Ops.size() == 2)
982      return Constant::getNullValue(X->getType());
983
984    Ops.erase(Ops.begin()+i);
985    if (i < FoundX)
986      --FoundX;
987    else
988      --i;   // Need to back up an extra one.
989    Ops.erase(Ops.begin()+FoundX);
990    ++NumAnnihil;
991    --i;     // Revisit element.
992    e -= 2;  // Removed two elements.
993  }
994
995  // Scan the operand list, checking to see if there are any common factors
996  // between operands.  Consider something like A*A+A*B*C+D.  We would like to
997  // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies.
998  // To efficiently find this, we count the number of times a factor occurs
999  // for any ADD operands that are MULs.
1000  DenseMap<Value*, unsigned> FactorOccurrences;
1001
1002  // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4)
1003  // where they are actually the same multiply.
1004  unsigned MaxOcc = 0;
1005  Value *MaxOccVal = 0;
1006  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1007    BinaryOperator *BOp = isReassociableOp(Ops[i].Op, Instruction::Mul);
1008    if (!BOp)
1009      continue;
1010
1011    // Compute all of the factors of this added value.
1012    SmallVector<Value*, 8> Factors;
1013    FindSingleUseMultiplyFactors(BOp, Factors, Ops);
1014    assert(Factors.size() > 1 && "Bad linearize!");
1015
1016    // Add one to FactorOccurrences for each unique factor in this op.
1017    SmallPtrSet<Value*, 8> Duplicates;
1018    for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
1019      Value *Factor = Factors[i];
1020      if (!Duplicates.insert(Factor)) continue;
1021
1022      unsigned Occ = ++FactorOccurrences[Factor];
1023      if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; }
1024
1025      // If Factor is a negative constant, add the negated value as a factor
1026      // because we can percolate the negate out.  Watch for minint, which
1027      // cannot be positivified.
1028      if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor))
1029        if (CI->isNegative() && !CI->isMinValue(true)) {
1030          Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
1031          assert(!Duplicates.count(Factor) &&
1032                 "Shouldn't have two constant factors, missed a canonicalize");
1033
1034          unsigned Occ = ++FactorOccurrences[Factor];
1035          if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; }
1036        }
1037    }
1038  }
1039
1040  // If any factor occurred more than one time, we can pull it out.
1041  if (MaxOcc > 1) {
1042    DEBUG(errs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << '\n');
1043    ++NumFactor;
1044
1045    // Create a new instruction that uses the MaxOccVal twice.  If we don't do
1046    // this, we could otherwise run into situations where removing a factor
1047    // from an expression will drop a use of maxocc, and this can cause
1048    // RemoveFactorFromExpression on successive values to behave differently.
1049    Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal);
1050    SmallVector<WeakVH, 4> NewMulOps;
1051    for (unsigned i = 0; i != Ops.size(); ++i) {
1052      // Only try to remove factors from expressions we're allowed to.
1053      BinaryOperator *BOp = isReassociableOp(Ops[i].Op, Instruction::Mul);
1054      if (!BOp)
1055        continue;
1056
1057      if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) {
1058        // The factorized operand may occur several times.  Convert them all in
1059        // one fell swoop.
1060        for (unsigned j = Ops.size(); j != i;) {
1061          --j;
1062          if (Ops[j].Op == Ops[i].Op) {
1063            NewMulOps.push_back(V);
1064            Ops.erase(Ops.begin()+j);
1065          }
1066        }
1067        --i;
1068      }
1069    }
1070
1071    // No need for extra uses anymore.
1072    delete DummyInst;
1073
1074    unsigned NumAddedValues = NewMulOps.size();
1075    Value *V = EmitAddTreeOfValues(I, NewMulOps);
1076
1077    // Now that we have inserted the add tree, optimize it. This allows us to
1078    // handle cases that require multiple factoring steps, such as this:
1079    // A*A*B + A*A*C   -->   A*(A*B+A*C)   -->   A*(A*(B+C))
1080    assert(NumAddedValues > 1 && "Each occurrence should contribute a value");
1081    (void)NumAddedValues;
1082    RedoInsts.push_back(V);
1083
1084    // Create the multiply.
1085    Value *V2 = BinaryOperator::CreateMul(V, MaxOccVal, "tmp", I);
1086
1087    // Rerun associate on the multiply in case the inner expression turned into
1088    // a multiply.  We want to make sure that we keep things in canonical form.
1089    RedoInsts.push_back(V2);
1090
1091    // If every add operand included the factor (e.g. "A*B + A*C"), then the
1092    // entire result expression is just the multiply "A*(B+C)".
1093    if (Ops.empty())
1094      return V2;
1095
1096    // Otherwise, we had some input that didn't have the factor, such as
1097    // "A*B + A*C + D" -> "A*(B+C) + D".  Add the new multiply to the list of
1098    // things being added by this operation.
1099    Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2));
1100  }
1101
1102  return 0;
1103}
1104
1105namespace {
1106  /// \brief Predicate tests whether a ValueEntry's op is in a map.
1107  struct IsValueInMap {
1108    const DenseMap<Value *, unsigned> &Map;
1109
1110    IsValueInMap(const DenseMap<Value *, unsigned> &Map) : Map(Map) {}
1111
1112    bool operator()(const ValueEntry &Entry) {
1113      return Map.find(Entry.Op) != Map.end();
1114    }
1115  };
1116}
1117
1118/// \brief Build up a vector of value/power pairs factoring a product.
1119///
1120/// Given a series of multiplication operands, build a vector of factors and
1121/// the powers each is raised to when forming the final product. Sort them in
1122/// the order of descending power.
1123///
1124///      (x*x)          -> [(x, 2)]
1125///     ((x*x)*x)       -> [(x, 3)]
1126///   ((((x*y)*x)*y)*x) -> [(x, 3), (y, 2)]
1127///
1128/// \returns Whether any factors have a power greater than one.
1129bool Reassociate::collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops,
1130                                         SmallVectorImpl<Factor> &Factors) {
1131  // FIXME: Have Ops be (ValueEntry, Multiplicity) pairs, simplifying this.
1132  // Compute the sum of powers of simplifiable factors.
1133  unsigned FactorPowerSum = 0;
1134  for (unsigned Idx = 1, Size = Ops.size(); Idx < Size; ++Idx) {
1135    Value *Op = Ops[Idx-1].Op;
1136
1137    // Count the number of occurrences of this value.
1138    unsigned Count = 1;
1139    for (; Idx < Size && Ops[Idx].Op == Op; ++Idx)
1140      ++Count;
1141    // Track for simplification all factors which occur 2 or more times.
1142    if (Count > 1)
1143      FactorPowerSum += Count;
1144  }
1145
1146  // We can only simplify factors if the sum of the powers of our simplifiable
1147  // factors is 4 or higher. When that is the case, we will *always* have
1148  // a simplification. This is an important invariant to prevent cyclicly
1149  // trying to simplify already minimal formations.
1150  if (FactorPowerSum < 4)
1151    return false;
1152
1153  // Now gather the simplifiable factors, removing them from Ops.
1154  FactorPowerSum = 0;
1155  for (unsigned Idx = 1; Idx < Ops.size(); ++Idx) {
1156    Value *Op = Ops[Idx-1].Op;
1157
1158    // Count the number of occurrences of this value.
1159    unsigned Count = 1;
1160    for (; Idx < Ops.size() && Ops[Idx].Op == Op; ++Idx)
1161      ++Count;
1162    if (Count == 1)
1163      continue;
1164    // Move an even number of occurences to Factors.
1165    Count &= ~1U;
1166    Idx -= Count;
1167    FactorPowerSum += Count;
1168    Factors.push_back(Factor(Op, Count));
1169    Ops.erase(Ops.begin()+Idx, Ops.begin()+Idx+Count);
1170  }
1171
1172  // None of the adjustments above should have reduced the sum of factor powers
1173  // below our mininum of '4'.
1174  assert(FactorPowerSum >= 4);
1175
1176  std::sort(Factors.begin(), Factors.end(), Factor::PowerDescendingSorter());
1177  return true;
1178}
1179
1180/// \brief Build a tree of multiplies, computing the product of Ops.
1181static Value *buildMultiplyTree(IRBuilder<> &Builder,
1182                                SmallVectorImpl<Value*> &Ops) {
1183  if (Ops.size() == 1)
1184    return Ops.back();
1185
1186  Value *LHS = Ops.pop_back_val();
1187  do {
1188    LHS = Builder.CreateMul(LHS, Ops.pop_back_val());
1189  } while (!Ops.empty());
1190
1191  return LHS;
1192}
1193
1194/// \brief Build a minimal multiplication DAG for (a^x)*(b^y)*(c^z)*...
1195///
1196/// Given a vector of values raised to various powers, where no two values are
1197/// equal and the powers are sorted in decreasing order, compute the minimal
1198/// DAG of multiplies to compute the final product, and return that product
1199/// value.
1200Value *Reassociate::buildMinimalMultiplyDAG(IRBuilder<> &Builder,
1201                                            SmallVectorImpl<Factor> &Factors) {
1202  assert(Factors[0].Power);
1203  SmallVector<Value *, 4> OuterProduct;
1204  for (unsigned LastIdx = 0, Idx = 1, Size = Factors.size();
1205       Idx < Size && Factors[Idx].Power > 0; ++Idx) {
1206    if (Factors[Idx].Power != Factors[LastIdx].Power) {
1207      LastIdx = Idx;
1208      continue;
1209    }
1210
1211    // We want to multiply across all the factors with the same power so that
1212    // we can raise them to that power as a single entity. Build a mini tree
1213    // for that.
1214    SmallVector<Value *, 4> InnerProduct;
1215    InnerProduct.push_back(Factors[LastIdx].Base);
1216    do {
1217      InnerProduct.push_back(Factors[Idx].Base);
1218      ++Idx;
1219    } while (Idx < Size && Factors[Idx].Power == Factors[LastIdx].Power);
1220
1221    // Reset the base value of the first factor to the new expression tree.
1222    // We'll remove all the factors with the same power in a second pass.
1223    Factors[LastIdx].Base = buildMultiplyTree(Builder, InnerProduct);
1224    RedoInsts.push_back(Factors[LastIdx].Base);
1225
1226    LastIdx = Idx;
1227  }
1228  // Unique factors with equal powers -- we've folded them into the first one's
1229  // base.
1230  Factors.erase(std::unique(Factors.begin(), Factors.end(),
1231                            Factor::PowerEqual()),
1232                Factors.end());
1233
1234  // Iteratively collect the base of each factor with an add power into the
1235  // outer product, and halve each power in preparation for squaring the
1236  // expression.
1237  for (unsigned Idx = 0, Size = Factors.size(); Idx != Size; ++Idx) {
1238    if (Factors[Idx].Power & 1)
1239      OuterProduct.push_back(Factors[Idx].Base);
1240    Factors[Idx].Power >>= 1;
1241  }
1242  if (Factors[0].Power) {
1243    Value *SquareRoot = buildMinimalMultiplyDAG(Builder, Factors);
1244    OuterProduct.push_back(SquareRoot);
1245    OuterProduct.push_back(SquareRoot);
1246  }
1247  if (OuterProduct.size() == 1)
1248    return OuterProduct.front();
1249
1250  Value *V = buildMultiplyTree(Builder, OuterProduct);
1251  return V;
1252}
1253
1254Value *Reassociate::OptimizeMul(BinaryOperator *I,
1255                                SmallVectorImpl<ValueEntry> &Ops) {
1256  // We can only optimize the multiplies when there is a chain of more than
1257  // three, such that a balanced tree might require fewer total multiplies.
1258  if (Ops.size() < 4)
1259    return 0;
1260
1261  // Try to turn linear trees of multiplies without other uses of the
1262  // intermediate stages into minimal multiply DAGs with perfect sub-expression
1263  // re-use.
1264  SmallVector<Factor, 4> Factors;
1265  if (!collectMultiplyFactors(Ops, Factors))
1266    return 0; // All distinct factors, so nothing left for us to do.
1267
1268  IRBuilder<> Builder(I);
1269  Value *V = buildMinimalMultiplyDAG(Builder, Factors);
1270  if (Ops.empty())
1271    return V;
1272
1273  ValueEntry NewEntry = ValueEntry(getRank(V), V);
1274  Ops.insert(std::lower_bound(Ops.begin(), Ops.end(), NewEntry), NewEntry);
1275  return 0;
1276}
1277
1278Value *Reassociate::OptimizeExpression(BinaryOperator *I,
1279                                       SmallVectorImpl<ValueEntry> &Ops) {
1280  // Now that we have the linearized expression tree, try to optimize it.
1281  // Start by folding any constants that we found.
1282  bool IterateOptimization = false;
1283  if (Ops.size() == 1) return Ops[0].Op;
1284
1285  unsigned Opcode = I->getOpcode();
1286
1287  if (Constant *V1 = dyn_cast<Constant>(Ops[Ops.size()-2].Op))
1288    if (Constant *V2 = dyn_cast<Constant>(Ops.back().Op)) {
1289      Ops.pop_back();
1290      Ops.back().Op = ConstantExpr::get(Opcode, V1, V2);
1291      return OptimizeExpression(I, Ops);
1292    }
1293
1294  // Check for destructive annihilation due to a constant being used.
1295  if (ConstantInt *CstVal = dyn_cast<ConstantInt>(Ops.back().Op))
1296    switch (Opcode) {
1297    default: break;
1298    case Instruction::And:
1299      if (CstVal->isZero())                  // X & 0 -> 0
1300        return CstVal;
1301      if (CstVal->isAllOnesValue())          // X & -1 -> X
1302        Ops.pop_back();
1303      break;
1304    case Instruction::Mul:
1305      if (CstVal->isZero()) {                // X * 0 -> 0
1306        ++NumAnnihil;
1307        return CstVal;
1308      }
1309
1310      if (cast<ConstantInt>(CstVal)->isOne())
1311        Ops.pop_back();                      // X * 1 -> X
1312      break;
1313    case Instruction::Or:
1314      if (CstVal->isAllOnesValue())          // X | -1 -> -1
1315        return CstVal;
1316      // FALLTHROUGH!
1317    case Instruction::Add:
1318    case Instruction::Xor:
1319      if (CstVal->isZero())                  // X [|^+] 0 -> X
1320        Ops.pop_back();
1321      break;
1322    }
1323  if (Ops.size() == 1) return Ops[0].Op;
1324
1325  // Handle destructive annihilation due to identities between elements in the
1326  // argument list here.
1327  unsigned NumOps = Ops.size();
1328  switch (Opcode) {
1329  default: break;
1330  case Instruction::And:
1331  case Instruction::Or:
1332  case Instruction::Xor:
1333    if (Value *Result = OptimizeAndOrXor(Opcode, Ops))
1334      return Result;
1335    break;
1336
1337  case Instruction::Add:
1338    if (Value *Result = OptimizeAdd(I, Ops))
1339      return Result;
1340    break;
1341
1342  case Instruction::Mul:
1343    if (Value *Result = OptimizeMul(I, Ops))
1344      return Result;
1345    break;
1346  }
1347
1348  if (IterateOptimization || Ops.size() != NumOps)
1349    return OptimizeExpression(I, Ops);
1350  return 0;
1351}
1352
1353/// ReassociateInst - Inspect and reassociate the instruction at the
1354/// given position, post-incrementing the position.
1355void Reassociate::ReassociateInst(BasicBlock::iterator &BBI) {
1356  Instruction *BI = BBI++;
1357  if (BI->getOpcode() == Instruction::Shl &&
1358      isa<ConstantInt>(BI->getOperand(1)))
1359    if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) {
1360      MadeChange = true;
1361      BI = NI;
1362    }
1363
1364  // Floating point binary operators are not associative, but we can still
1365  // commute (some) of them, to canonicalize the order of their operands.
1366  // This can potentially expose more CSE opportunities, and makes writing
1367  // other transformations simpler.
1368  if (isa<BinaryOperator>(BI) &&
1369      (BI->getType()->isFloatingPointTy() || BI->getType()->isVectorTy())) {
1370    // FAdd and FMul can be commuted.
1371    if (BI->getOpcode() != Instruction::FMul &&
1372        BI->getOpcode() != Instruction::FAdd)
1373      return;
1374
1375    Value *LHS = BI->getOperand(0);
1376    Value *RHS = BI->getOperand(1);
1377    unsigned LHSRank = getRank(LHS);
1378    unsigned RHSRank = getRank(RHS);
1379
1380    // Sort the operands by rank.
1381    if (RHSRank < LHSRank) {
1382      BI->setOperand(0, RHS);
1383      BI->setOperand(1, LHS);
1384    }
1385
1386    return;
1387  }
1388
1389  // Do not reassociate operations that we do not understand.
1390  if (!isa<BinaryOperator>(BI))
1391    return;
1392
1393  // Do not reassociate boolean (i1) expressions.  We want to preserve the
1394  // original order of evaluation for short-circuited comparisons that
1395  // SimplifyCFG has folded to AND/OR expressions.  If the expression
1396  // is not further optimized, it is likely to be transformed back to a
1397  // short-circuited form for code gen, and the source order may have been
1398  // optimized for the most likely conditions.
1399  if (BI->getType()->isIntegerTy(1))
1400    return;
1401
1402  // If this is a subtract instruction which is not already in negate form,
1403  // see if we can convert it to X+-Y.
1404  if (BI->getOpcode() == Instruction::Sub) {
1405    if (ShouldBreakUpSubtract(BI)) {
1406      BI = BreakUpSubtract(BI, ValueRankMap);
1407      // Reset the BBI iterator in case BreakUpSubtract changed the
1408      // instruction it points to.
1409      BBI = BI;
1410      ++BBI;
1411      MadeChange = true;
1412    } else if (BinaryOperator::isNeg(BI)) {
1413      // Otherwise, this is a negation.  See if the operand is a multiply tree
1414      // and if this is not an inner node of a multiply tree.
1415      if (isReassociableOp(BI->getOperand(1), Instruction::Mul) &&
1416          (!BI->hasOneUse() ||
1417           !isReassociableOp(BI->use_back(), Instruction::Mul))) {
1418        BI = LowerNegateToMultiply(BI, ValueRankMap);
1419        MadeChange = true;
1420      }
1421    }
1422  }
1423
1424  // If this instruction is a commutative binary operator, process it.
1425  if (!BI->isAssociative()) return;
1426  BinaryOperator *I = cast<BinaryOperator>(BI);
1427
1428  // If this is an interior node of a reassociable tree, ignore it until we
1429  // get to the root of the tree, to avoid N^2 analysis.
1430  if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode()))
1431    return;
1432
1433  // If this is an add tree that is used by a sub instruction, ignore it
1434  // until we process the subtract.
1435  if (I->hasOneUse() && I->getOpcode() == Instruction::Add &&
1436      cast<Instruction>(I->use_back())->getOpcode() == Instruction::Sub)
1437    return;
1438
1439  ReassociateExpression(I);
1440}
1441
1442Value *Reassociate::ReassociateExpression(BinaryOperator *I) {
1443
1444  // First, walk the expression tree, linearizing the tree, collecting the
1445  // operand information.
1446  SmallVector<ValueEntry, 8> Ops;
1447  LinearizeExprTree(I, Ops);
1448
1449  // Now that we have linearized the tree to a list and have gathered all of
1450  // the operands and their ranks, sort the operands by their rank.  Use a
1451  // stable_sort so that values with equal ranks will have their relative
1452  // positions maintained (and so the compiler is deterministic).  Note that
1453  // this sorts so that the highest ranking values end up at the beginning of
1454  // the vector.
1455  std::stable_sort(Ops.begin(), Ops.end());
1456
1457  DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n');
1458
1459  // OptimizeExpression - Now that we have the expression tree in a convenient
1460  // sorted form, optimize it globally if possible.
1461  if (Value *V = OptimizeExpression(I, Ops)) {
1462    // This expression tree simplified to something that isn't a tree,
1463    // eliminate it.
1464    DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n');
1465    I->replaceAllUsesWith(V);
1466    if (Instruction *VI = dyn_cast<Instruction>(V))
1467      VI->setDebugLoc(I->getDebugLoc());
1468    RemoveDeadBinaryOp(I);
1469    ++NumAnnihil;
1470    return V;
1471  }
1472
1473  // We want to sink immediates as deeply as possible except in the case where
1474  // this is a multiply tree used only by an add, and the immediate is a -1.
1475  // In this case we reassociate to put the negation on the outside so that we
1476  // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y
1477  if (I->getOpcode() == Instruction::Mul && I->hasOneUse() &&
1478      cast<Instruction>(I->use_back())->getOpcode() == Instruction::Add &&
1479      isa<ConstantInt>(Ops.back().Op) &&
1480      cast<ConstantInt>(Ops.back().Op)->isAllOnesValue()) {
1481    ValueEntry Tmp = Ops.pop_back_val();
1482    Ops.insert(Ops.begin(), Tmp);
1483  }
1484
1485  DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n');
1486
1487  if (Ops.size() == 1) {
1488    // This expression tree simplified to something that isn't a tree,
1489    // eliminate it.
1490    I->replaceAllUsesWith(Ops[0].Op);
1491    if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op))
1492      OI->setDebugLoc(I->getDebugLoc());
1493    RemoveDeadBinaryOp(I);
1494    return Ops[0].Op;
1495  }
1496
1497  // Now that we ordered and optimized the expressions, splat them back into
1498  // the expression tree, removing any unneeded nodes.
1499  RewriteExprTree(I, Ops);
1500  return I;
1501}
1502
1503bool Reassociate::runOnFunction(Function &F) {
1504  // Recalculate the rank map for F
1505  BuildRankMap(F);
1506
1507  MadeChange = false;
1508  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
1509    for (BasicBlock::iterator BBI = FI->begin(); BBI != FI->end(); )
1510      ReassociateInst(BBI);
1511
1512  // Now that we're done, revisit any instructions which are likely to
1513  // have secondary reassociation opportunities.
1514  while (!RedoInsts.empty())
1515    if (Value *V = RedoInsts.pop_back_val()) {
1516      BasicBlock::iterator BBI = cast<Instruction>(V);
1517      ReassociateInst(BBI);
1518    }
1519
1520  // We are done with the rank map.
1521  RankMap.clear();
1522  ValueRankMap.clear();
1523
1524  // Now that we're done, delete any instructions which are no longer used.
1525  while (!DeadInsts.empty())
1526    if (Value *V = DeadInsts.pop_back_val())
1527      RecursivelyDeleteTriviallyDeadInstructions(V);
1528
1529  return MadeChange;
1530}
1531