LoopUnswitch.cpp revision 28aa5de050e0f43617f2b5cce65edc8827f3e2f1
1//===-- LoopUnswitch.cpp - Hoist loop-invariant conditionals in loop ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass transforms loops that contain branches on loop-invariant conditions
11// to have multiple loops.  For example, it turns the left into the right code:
12//
13//  for (...)                  if (lic)
14//    A                          for (...)
15//    if (lic)                     A; B; C
16//      B                      else
17//    C                          for (...)
18//                                 A; C
19//
20// This can increase the size of the code exponentially (doubling it every time
21// a loop is unswitched) so we only unswitch if the resultant code will be
22// smaller than a threshold.
23//
24// This pass expects LICM to be run before it to hoist invariant conditions out
25// of the loop, to make the unswitching opportunity obvious.
26//
27//===----------------------------------------------------------------------===//
28
29#define DEBUG_TYPE "loop-unswitch"
30#include "llvm/Transforms/Scalar.h"
31#include "llvm/Constants.h"
32#include "llvm/DerivedTypes.h"
33#include "llvm/Function.h"
34#include "llvm/Instructions.h"
35#include "llvm/Analysis/ConstantFolding.h"
36#include "llvm/Analysis/LoopInfo.h"
37#include "llvm/Analysis/LoopPass.h"
38#include "llvm/Analysis/Dominators.h"
39#include "llvm/Transforms/Utils/Cloning.h"
40#include "llvm/Transforms/Utils/Local.h"
41#include "llvm/Transforms/Utils/BasicBlockUtils.h"
42#include "llvm/ADT/Statistic.h"
43#include "llvm/ADT/SmallPtrSet.h"
44#include "llvm/ADT/STLExtras.h"
45#include "llvm/Support/CommandLine.h"
46#include "llvm/Support/Compiler.h"
47#include "llvm/Support/Debug.h"
48#include <algorithm>
49#include <set>
50using namespace llvm;
51
52STATISTIC(NumBranches, "Number of branches unswitched");
53STATISTIC(NumSwitches, "Number of switches unswitched");
54STATISTIC(NumSelects , "Number of selects unswitched");
55STATISTIC(NumTrivial , "Number of unswitches that are trivial");
56STATISTIC(NumSimplify, "Number of simplifications of unswitched code");
57
58static cl::opt<unsigned>
59Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
60          cl::init(10), cl::Hidden);
61
62namespace {
63  class VISIBILITY_HIDDEN LoopUnswitch : public LoopPass {
64    LoopInfo *LI;  // Loop information
65    LPPassManager *LPM;
66
67    // LoopProcessWorklist - Used to check if second loop needs processing
68    // after RewriteLoopBodyWithConditionConstant rewrites first loop.
69    std::vector<Loop*> LoopProcessWorklist;
70    SmallPtrSet<Value *,8> UnswitchedVals;
71
72    bool OptimizeForSize;
73    bool redoLoop;
74
75    Loop *currentLoop;
76    DominanceFrontier *DF;
77    DominatorTree *DT;
78    BasicBlock *loopHeader;
79    BasicBlock *loopPreheader;
80
81    // LoopBlocks contains all of the basic blocks of the loop, including the
82    // preheader of the loop, the body of the loop, and the exit blocks of the
83    // loop, in that order.
84    std::vector<BasicBlock*> LoopBlocks;
85    // NewBlocks contained cloned copy of basic blocks from LoopBlocks.
86    std::vector<BasicBlock*> NewBlocks;
87
88  public:
89    static char ID; // Pass ID, replacement for typeid
90    explicit LoopUnswitch(bool Os = false) :
91      LoopPass(&ID), OptimizeForSize(Os), redoLoop(false),
92      currentLoop(NULL), DF(NULL), DT(NULL), loopHeader(NULL),
93      loopPreheader(NULL) {}
94
95    bool runOnLoop(Loop *L, LPPassManager &LPM);
96    bool processCurrentLoop();
97
98    /// This transformation requires natural loop information & requires that
99    /// loop preheaders be inserted into the CFG...
100    ///
101    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
102      AU.addRequiredID(LoopSimplifyID);
103      AU.addPreservedID(LoopSimplifyID);
104      AU.addRequired<LoopInfo>();
105      AU.addPreserved<LoopInfo>();
106      AU.addRequiredID(LCSSAID);
107      AU.addPreservedID(LCSSAID);
108      AU.addPreserved<DominatorTree>();
109      AU.addPreserved<DominanceFrontier>();
110    }
111
112  private:
113
114    /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist,
115    /// remove it.
116    void RemoveLoopFromWorklist(Loop *L) {
117      std::vector<Loop*>::iterator I = std::find(LoopProcessWorklist.begin(),
118                                                 LoopProcessWorklist.end(), L);
119      if (I != LoopProcessWorklist.end())
120        LoopProcessWorklist.erase(I);
121    }
122
123    void initLoopData() {
124      loopHeader = currentLoop->getHeader();
125      loopPreheader = currentLoop->getLoopPreheader();
126    }
127
128    /// Split all of the edges from inside the loop to their exit blocks.
129    /// Update the appropriate Phi nodes as we do so.
130    void SplitExitEdges(Loop *L, const SmallVector<BasicBlock *, 8> &ExitBlocks);
131
132    bool UnswitchIfProfitable(Value *LoopCond, Constant *Val);
133    unsigned getLoopUnswitchCost(Value *LIC);
134    void UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
135                                  BasicBlock *ExitBlock);
136    void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L);
137
138    void RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
139                                              Constant *Val, bool isEqual);
140
141    void EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
142                                        BasicBlock *TrueDest,
143                                        BasicBlock *FalseDest,
144                                        Instruction *InsertPt);
145
146    void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L);
147    void RemoveBlockIfDead(BasicBlock *BB,
148                           std::vector<Instruction*> &Worklist, Loop *l);
149    void RemoveLoopFromHierarchy(Loop *L);
150    bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = 0,
151                                    BasicBlock **LoopExit = 0);
152
153  };
154}
155char LoopUnswitch::ID = 0;
156static RegisterPass<LoopUnswitch> X("loop-unswitch", "Unswitch loops");
157
158Pass *llvm::createLoopUnswitchPass(bool Os) {
159  return new LoopUnswitch(Os);
160}
161
162/// FindLIVLoopCondition - Cond is a condition that occurs in L.  If it is
163/// invariant in the loop, or has an invariant piece, return the invariant.
164/// Otherwise, return null.
165static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
166  // Constants should be folded, not unswitched on!
167  if (isa<Constant>(Cond)) return 0;
168
169  // TODO: Handle: br (VARIANT|INVARIANT).
170  // TODO: Hoist simple expressions out of loops.
171  if (L->isLoopInvariant(Cond)) return Cond;
172
173  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond))
174    if (BO->getOpcode() == Instruction::And ||
175        BO->getOpcode() == Instruction::Or) {
176      // If either the left or right side is invariant, we can unswitch on this,
177      // which will cause the branch to go away in one loop and the condition to
178      // simplify in the other one.
179      if (Value *LHS = FindLIVLoopCondition(BO->getOperand(0), L, Changed))
180        return LHS;
181      if (Value *RHS = FindLIVLoopCondition(BO->getOperand(1), L, Changed))
182        return RHS;
183    }
184
185  return 0;
186}
187
188bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
189  LI = &getAnalysis<LoopInfo>();
190  LPM = &LPM_Ref;
191  DF = getAnalysisIfAvailable<DominanceFrontier>();
192  DT = getAnalysisIfAvailable<DominatorTree>();
193  currentLoop = L;
194  Function *F = currentLoop->getHeader()->getParent();
195  bool Changed = false;
196  do {
197    assert(currentLoop->isLCSSAForm());
198    redoLoop = false;
199    Changed |= processCurrentLoop();
200  } while(redoLoop);
201
202  if (Changed) {
203    // FIXME: Reconstruct dom info, because it is not preserved properly.
204    if (DT)
205      DT->runOnFunction(*F);
206    if (DF)
207      DF->runOnFunction(*F);
208  }
209  return Changed;
210}
211
212/// processCurrentLoop - Do actual work and unswitch loop if possible
213/// and profitable.
214bool LoopUnswitch::processCurrentLoop() {
215  bool Changed = false;
216
217  // Loop over all of the basic blocks in the loop.  If we find an interior
218  // block that is branching on a loop-invariant condition, we can unswitch this
219  // loop.
220  for (Loop::block_iterator I = currentLoop->block_begin(),
221         E = currentLoop->block_end();
222       I != E; ++I) {
223    TerminatorInst *TI = (*I)->getTerminator();
224    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
225      // If this isn't branching on an invariant condition, we can't unswitch
226      // it.
227      if (BI->isConditional()) {
228        // See if this, or some part of it, is loop invariant.  If so, we can
229        // unswitch on it if we desire.
230        Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),
231                                               currentLoop, Changed);
232        if (LoopCond && UnswitchIfProfitable(LoopCond,
233                                             ConstantInt::getTrue())) {
234          ++NumBranches;
235          return true;
236        }
237      }
238    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
239      Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
240                                             currentLoop, Changed);
241      if (LoopCond && SI->getNumCases() > 1) {
242        // Find a value to unswitch on:
243        // FIXME: this should chose the most expensive case!
244        Constant *UnswitchVal = SI->getCaseValue(1);
245        // Do not process same value again and again.
246        if (!UnswitchedVals.insert(UnswitchVal))
247          continue;
248
249        if (UnswitchIfProfitable(LoopCond, UnswitchVal)) {
250          ++NumSwitches;
251          return true;
252        }
253      }
254    }
255
256    // Scan the instructions to check for unswitchable values.
257    for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end();
258         BBI != E; ++BBI)
259      if (SelectInst *SI = dyn_cast<SelectInst>(BBI)) {
260        Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
261                                               currentLoop, Changed);
262        if (LoopCond && UnswitchIfProfitable(LoopCond,
263                                             ConstantInt::getTrue())) {
264          ++NumSelects;
265          return true;
266        }
267      }
268  }
269  return Changed;
270}
271
272/// isTrivialLoopExitBlock - Check to see if all paths from BB either:
273///   1. Exit the loop with no side effects.
274///   2. Branch to the latch block with no side-effects.
275///
276/// If these conditions are true, we return true and set ExitBB to the block we
277/// exit through.
278///
279static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
280                                         BasicBlock *&ExitBB,
281                                         std::set<BasicBlock*> &Visited) {
282  if (!Visited.insert(BB).second) {
283    // Already visited and Ok, end of recursion.
284    return true;
285  } else if (!L->contains(BB)) {
286    // Otherwise, this is a loop exit, this is fine so long as this is the
287    // first exit.
288    if (ExitBB != 0) return false;
289    ExitBB = BB;
290    return true;
291  }
292
293  // Otherwise, this is an unvisited intra-loop node.  Check all successors.
294  for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) {
295    // Check to see if the successor is a trivial loop exit.
296    if (!isTrivialLoopExitBlockHelper(L, *SI, ExitBB, Visited))
297      return false;
298  }
299
300  // Okay, everything after this looks good, check to make sure that this block
301  // doesn't include any side effects.
302  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
303    if (I->mayWriteToMemory())
304      return false;
305
306  return true;
307}
308
309/// isTrivialLoopExitBlock - Return true if the specified block unconditionally
310/// leads to an exit from the specified loop, and has no side-effects in the
311/// process.  If so, return the block that is exited to, otherwise return null.
312static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
313  std::set<BasicBlock*> Visited;
314  Visited.insert(L->getHeader());  // Branches to header are ok.
315  BasicBlock *ExitBB = 0;
316  if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited))
317    return ExitBB;
318  return 0;
319}
320
321/// IsTrivialUnswitchCondition - Check to see if this unswitch condition is
322/// trivial: that is, that the condition controls whether or not the loop does
323/// anything at all.  If this is a trivial condition, unswitching produces no
324/// code duplications (equivalently, it produces a simpler loop and a new empty
325/// loop, which gets deleted).
326///
327/// If this is a trivial condition, return true, otherwise return false.  When
328/// returning true, this sets Cond and Val to the condition that controls the
329/// trivial condition: when Cond dynamically equals Val, the loop is known to
330/// exit.  Finally, this sets LoopExit to the BB that the loop exits to when
331/// Cond == Val.
332///
333bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
334                                       BasicBlock **LoopExit) {
335  BasicBlock *Header = currentLoop->getHeader();
336  TerminatorInst *HeaderTerm = Header->getTerminator();
337
338  BasicBlock *LoopExitBB = 0;
339  if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) {
340    // If the header block doesn't end with a conditional branch on Cond, we
341    // can't handle it.
342    if (!BI->isConditional() || BI->getCondition() != Cond)
343      return false;
344
345    // Check to see if a successor of the branch is guaranteed to go to the
346    // latch block or exit through a one exit block without having any
347    // side-effects.  If so, determine the value of Cond that causes it to do
348    // this.
349    if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
350                                             BI->getSuccessor(0)))) {
351      if (Val) *Val = ConstantInt::getTrue();
352    } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
353                                                    BI->getSuccessor(1)))) {
354      if (Val) *Val = ConstantInt::getFalse();
355    }
356  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(HeaderTerm)) {
357    // If this isn't a switch on Cond, we can't handle it.
358    if (SI->getCondition() != Cond) return false;
359
360    // Check to see if a successor of the switch is guaranteed to go to the
361    // latch block or exit through a one exit block without having any
362    // side-effects.  If so, determine the value of Cond that causes it to do
363    // this.  Note that we can't trivially unswitch on the default case.
364    for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
365      if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
366                                               SI->getSuccessor(i)))) {
367        // Okay, we found a trivial case, remember the value that is trivial.
368        if (Val) *Val = SI->getCaseValue(i);
369        break;
370      }
371  }
372
373  // If we didn't find a single unique LoopExit block, or if the loop exit block
374  // contains phi nodes, this isn't trivial.
375  if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
376    return false;   // Can't handle this.
377
378  if (LoopExit) *LoopExit = LoopExitBB;
379
380  // We already know that nothing uses any scalar values defined inside of this
381  // loop.  As such, we just have to check to see if this loop will execute any
382  // side-effecting instructions (e.g. stores, calls, volatile loads) in the
383  // part of the loop that the code *would* execute.  We already checked the
384  // tail, check the header now.
385  for (BasicBlock::iterator I = Header->begin(), E = Header->end(); I != E; ++I)
386    if (I->mayWriteToMemory())
387      return false;
388  return true;
389}
390
391/// getLoopUnswitchCost - Return the cost (code size growth) that will happen if
392/// we choose to unswitch current loop on the specified value.
393///
394unsigned LoopUnswitch::getLoopUnswitchCost(Value *LIC) {
395  // If the condition is trivial, always unswitch.  There is no code growth for
396  // this case.
397  if (IsTrivialUnswitchCondition(LIC))
398    return 0;
399
400  // FIXME: This is really overly conservative.  However, more liberal
401  // estimations have thus far resulted in excessive unswitching, which is bad
402  // both in compile time and in code size.  This should be replaced once
403  // someone figures out how a good estimation.
404  return currentLoop->getBlocks().size();
405
406  unsigned Cost = 0;
407  // FIXME: this is brain dead.  It should take into consideration code
408  // shrinkage.
409  for (Loop::block_iterator I = currentLoop->block_begin(),
410         E = currentLoop->block_end();
411       I != E; ++I) {
412    BasicBlock *BB = *I;
413    // Do not include empty blocks in the cost calculation.  This happen due to
414    // loop canonicalization and will be removed.
415    if (BB->begin() == BasicBlock::iterator(BB->getTerminator()))
416      continue;
417
418    // Count basic blocks.
419    ++Cost;
420  }
421
422  return Cost;
423}
424
425/// UnswitchIfProfitable - We have found that we can unswitch currentLoop when
426/// LoopCond == Val to simplify the loop.  If we decide that this is profitable,
427/// unswitch the loop, reprocess the pieces, then return true.
428bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){
429
430  initLoopData();
431  Function *F = loopHeader->getParent();
432
433
434  // Check to see if it would be profitable to unswitch current loop.
435  unsigned Cost = getLoopUnswitchCost(LoopCond);
436
437  // Do not do non-trivial unswitch while optimizing for size.
438  if (Cost && OptimizeForSize)
439    return false;
440  if (Cost && !F->isDeclaration() && F->hasFnAttr(Attribute::OptimizeForSize))
441    return false;
442
443  if (Cost > Threshold) {
444    // FIXME: this should estimate growth by the amount of code shared by the
445    // resultant unswitched loops.
446    //
447    DOUT << "NOT unswitching loop %"
448         << currentLoop->getHeader()->getName() << ", cost too high: "
449         << currentLoop->getBlocks().size() << "\n";
450    return false;
451  }
452
453  Constant *CondVal;
454  BasicBlock *ExitBlock;
455  if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) {
456    UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock);
457  } else {
458    UnswitchNontrivialCondition(LoopCond, Val, currentLoop);
459  }
460
461  return true;
462}
463
464// RemapInstruction - Convert the instruction operands from referencing the
465// current values into those specified by ValueMap.
466//
467static inline void RemapInstruction(Instruction *I,
468                                    DenseMap<const Value *, Value*> &ValueMap) {
469  for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
470    Value *Op = I->getOperand(op);
471    DenseMap<const Value *, Value*>::iterator It = ValueMap.find(Op);
472    if (It != ValueMap.end()) Op = It->second;
473    I->setOperand(op, Op);
474  }
475}
476
477/// CloneLoop - Recursively clone the specified loop and all of its children,
478/// mapping the blocks with the specified map.
479static Loop *CloneLoop(Loop *L, Loop *PL, DenseMap<const Value*, Value*> &VM,
480                       LoopInfo *LI, LPPassManager *LPM) {
481  Loop *New = new Loop();
482
483  LPM->insertLoop(New, PL);
484
485  // Add all of the blocks in L to the new loop.
486  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
487       I != E; ++I)
488    if (LI->getLoopFor(*I) == L)
489      New->addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), LI->getBase());
490
491  // Add all of the subloops to the new loop.
492  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
493    CloneLoop(*I, New, VM, LI, LPM);
494
495  return New;
496}
497
498/// EmitPreheaderBranchOnCondition - Emit a conditional branch on two values
499/// if LIC == Val, branch to TrueDst, otherwise branch to FalseDest.  Insert the
500/// code immediately before InsertPt.
501void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
502                                                  BasicBlock *TrueDest,
503                                                  BasicBlock *FalseDest,
504                                                  Instruction *InsertPt) {
505  // Insert a conditional branch on LIC to the two preheaders.  The original
506  // code is the true version and the new code is the false version.
507  Value *BranchVal = LIC;
508  if (!isa<ConstantInt>(Val) || Val->getType() != Type::Int1Ty)
509    BranchVal = new ICmpInst(ICmpInst::ICMP_EQ, LIC, Val, "tmp", InsertPt);
510  else if (Val != ConstantInt::getTrue())
511    // We want to enter the new loop when the condition is true.
512    std::swap(TrueDest, FalseDest);
513
514  // Insert the new branch.
515  BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt);
516}
517
518/// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable
519/// condition in it (a cond branch from its header block to its latch block,
520/// where the path through the loop that doesn't execute its body has no
521/// side-effects), unswitch it.  This doesn't involve any code duplication, just
522/// moving the conditional branch outside of the loop and updating loop info.
523void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
524                                            Constant *Val,
525                                            BasicBlock *ExitBlock) {
526  DOUT << "loop-unswitch: Trivial-Unswitch loop %"
527       << loopHeader->getName() << " [" << L->getBlocks().size()
528       << " blocks] in Function " << L->getHeader()->getParent()->getName()
529       << " on cond: " << *Val << " == " << *Cond << "\n";
530
531  // First step, split the preheader, so that we know that there is a safe place
532  // to insert the conditional branch.  We will change loopPreheader to have a
533  // conditional branch on Cond.
534  BasicBlock *NewPH = SplitEdge(loopPreheader, loopHeader, this);
535
536  // Now that we have a place to insert the conditional branch, create a place
537  // to branch to: this is the exit block out of the loop that we should
538  // short-circuit to.
539
540  // Split this block now, so that the loop maintains its exit block, and so
541  // that the jump from the preheader can execute the contents of the exit block
542  // without actually branching to it (the exit block should be dominated by the
543  // loop header, not the preheader).
544  assert(!L->contains(ExitBlock) && "Exit block is in the loop?");
545  BasicBlock *NewExit = SplitBlock(ExitBlock, ExitBlock->begin(), this);
546
547  // Okay, now we have a position to branch from and a position to branch to,
548  // insert the new conditional branch.
549  EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH,
550                                 loopPreheader->getTerminator());
551  LPM->deleteSimpleAnalysisValue(loopPreheader->getTerminator(), L);
552  loopPreheader->getTerminator()->eraseFromParent();
553
554  // We need to reprocess this loop, it could be unswitched again.
555  redoLoop = true;
556
557  // Now that we know that the loop is never entered when this condition is a
558  // particular value, rewrite the loop with this info.  We know that this will
559  // at least eliminate the old branch.
560  RewriteLoopBodyWithConditionConstant(L, Cond, Val, false);
561  ++NumTrivial;
562}
563
564/// SplitExitEdges - Split all of the edges from inside the loop to their exit
565/// blocks.  Update the appropriate Phi nodes as we do so.
566void LoopUnswitch::SplitExitEdges(Loop *L,
567                                const SmallVector<BasicBlock *, 8> &ExitBlocks)
568{
569
570  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
571    BasicBlock *ExitBlock = ExitBlocks[i];
572    std::vector<BasicBlock*> Preds(pred_begin(ExitBlock), pred_end(ExitBlock));
573
574    for (unsigned j = 0, e = Preds.size(); j != e; ++j) {
575      BasicBlock* NewExitBlock = SplitEdge(Preds[j], ExitBlock, this);
576      BasicBlock* StartBlock = Preds[j];
577      BasicBlock* EndBlock;
578      if (NewExitBlock->getSinglePredecessor() == ExitBlock) {
579        EndBlock = NewExitBlock;
580        NewExitBlock = EndBlock->getSinglePredecessor();;
581      } else {
582        EndBlock = ExitBlock;
583      }
584
585      std::set<PHINode*> InsertedPHIs;
586      PHINode* OldLCSSA = 0;
587      for (BasicBlock::iterator I = EndBlock->begin();
588           (OldLCSSA = dyn_cast<PHINode>(I)); ++I) {
589        Value* OldValue = OldLCSSA->getIncomingValueForBlock(NewExitBlock);
590        PHINode* NewLCSSA = PHINode::Create(OldLCSSA->getType(),
591                                            OldLCSSA->getName() + ".us-lcssa",
592                                            NewExitBlock->getTerminator());
593        NewLCSSA->addIncoming(OldValue, StartBlock);
594        OldLCSSA->setIncomingValue(OldLCSSA->getBasicBlockIndex(NewExitBlock),
595                                   NewLCSSA);
596        InsertedPHIs.insert(NewLCSSA);
597      }
598
599      BasicBlock::iterator InsertPt = EndBlock->getFirstNonPHI();
600      for (BasicBlock::iterator I = NewExitBlock->begin();
601         (OldLCSSA = dyn_cast<PHINode>(I)) && InsertedPHIs.count(OldLCSSA) == 0;
602         ++I) {
603        PHINode *NewLCSSA = PHINode::Create(OldLCSSA->getType(),
604                                            OldLCSSA->getName() + ".us-lcssa",
605                                            InsertPt);
606        OldLCSSA->replaceAllUsesWith(NewLCSSA);
607        NewLCSSA->addIncoming(OldLCSSA, NewExitBlock);
608      }
609
610    }
611  }
612
613}
614
615/// UnswitchNontrivialCondition - We determined that the loop is profitable
616/// to unswitch when LIC equal Val.  Split it into loop versions and test the
617/// condition outside of either loop.  Return the loops created as Out1/Out2.
618void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
619                                               Loop *L) {
620  Function *F = loopHeader->getParent();
621  DOUT << "loop-unswitch: Unswitching loop %"
622       << loopHeader->getName() << " [" << L->getBlocks().size()
623       << " blocks] in Function " << F->getName()
624       << " when '" << *Val << "' == " << *LIC << "\n";
625
626  LoopBlocks.clear();
627  NewBlocks.clear();
628
629  // First step, split the preheader and exit blocks, and add these blocks to
630  // the LoopBlocks list.
631  BasicBlock *NewPreheader = SplitEdge(loopPreheader, loopHeader, this);
632  LoopBlocks.push_back(NewPreheader);
633
634  // We want the loop to come after the preheader, but before the exit blocks.
635  LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());
636
637  SmallVector<BasicBlock*, 8> ExitBlocks;
638  L->getUniqueExitBlocks(ExitBlocks);
639
640  // Split all of the edges from inside the loop to their exit blocks.  Update
641  // the appropriate Phi nodes as we do so.
642  SplitExitEdges(L, ExitBlocks);
643
644  // The exit blocks may have been changed due to edge splitting, recompute.
645  ExitBlocks.clear();
646  L->getUniqueExitBlocks(ExitBlocks);
647
648  // Add exit blocks to the loop blocks.
649  LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end());
650
651  // Next step, clone all of the basic blocks that make up the loop (including
652  // the loop preheader and exit blocks), keeping track of the mapping between
653  // the instructions and blocks.
654  NewBlocks.reserve(LoopBlocks.size());
655  DenseMap<const Value*, Value*> ValueMap;
656  for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
657    BasicBlock *New = CloneBasicBlock(LoopBlocks[i], ValueMap, ".us", F);
658    NewBlocks.push_back(New);
659    ValueMap[LoopBlocks[i]] = New;  // Keep the BB mapping.
660    LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], New, L);
661  }
662
663  // Splice the newly inserted blocks into the function right before the
664  // original preheader.
665  F->getBasicBlockList().splice(LoopBlocks[0], F->getBasicBlockList(),
666                                NewBlocks[0], F->end());
667
668  // Now we create the new Loop object for the versioned loop.
669  Loop *NewLoop = CloneLoop(L, L->getParentLoop(), ValueMap, LI, LPM);
670  Loop *ParentLoop = L->getParentLoop();
671  if (ParentLoop) {
672    // Make sure to add the cloned preheader and exit blocks to the parent loop
673    // as well.
674    ParentLoop->addBasicBlockToLoop(NewBlocks[0], LI->getBase());
675  }
676
677  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
678    BasicBlock *NewExit = cast<BasicBlock>(ValueMap[ExitBlocks[i]]);
679    // The new exit block should be in the same loop as the old one.
680    if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i]))
681      ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase());
682
683    assert(NewExit->getTerminator()->getNumSuccessors() == 1 &&
684           "Exit block should have been split to have one successor!");
685    BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0);
686
687    // If the successor of the exit block had PHI nodes, add an entry for
688    // NewExit.
689    PHINode *PN;
690    for (BasicBlock::iterator I = ExitSucc->begin();
691         (PN = dyn_cast<PHINode>(I)); ++I) {
692      Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]);
693      DenseMap<const Value *, Value*>::iterator It = ValueMap.find(V);
694      if (It != ValueMap.end()) V = It->second;
695      PN->addIncoming(V, NewExit);
696    }
697  }
698
699  // Rewrite the code to refer to itself.
700  for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
701    for (BasicBlock::iterator I = NewBlocks[i]->begin(),
702           E = NewBlocks[i]->end(); I != E; ++I)
703      RemapInstruction(I, ValueMap);
704
705  // Rewrite the original preheader to select between versions of the loop.
706  BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
707  assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] &&
708         "Preheader splitting did not work correctly!");
709
710  // Emit the new branch that selects between the two versions of this loop.
711  EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR);
712  LPM->deleteSimpleAnalysisValue(OldBR, L);
713  OldBR->eraseFromParent();
714
715  LoopProcessWorklist.push_back(NewLoop);
716  redoLoop = true;
717
718  // Now we rewrite the original code to know that the condition is true and the
719  // new code to know that the condition is false.
720  RewriteLoopBodyWithConditionConstant(L      , LIC, Val, false);
721
722  // It's possible that simplifying one loop could cause the other to be
723  // deleted.  If so, don't simplify it.
724  if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop)
725    RewriteLoopBodyWithConditionConstant(NewLoop, LIC, Val, true);
726
727}
728
729/// RemoveFromWorklist - Remove all instances of I from the worklist vector
730/// specified.
731static void RemoveFromWorklist(Instruction *I,
732                               std::vector<Instruction*> &Worklist) {
733  std::vector<Instruction*>::iterator WI = std::find(Worklist.begin(),
734                                                     Worklist.end(), I);
735  while (WI != Worklist.end()) {
736    unsigned Offset = WI-Worklist.begin();
737    Worklist.erase(WI);
738    WI = std::find(Worklist.begin()+Offset, Worklist.end(), I);
739  }
740}
741
742/// ReplaceUsesOfWith - When we find that I really equals V, remove I from the
743/// program, replacing all uses with V and update the worklist.
744static void ReplaceUsesOfWith(Instruction *I, Value *V,
745                              std::vector<Instruction*> &Worklist,
746                              Loop *L, LPPassManager *LPM) {
747  DOUT << "Replace with '" << *V << "': " << *I;
748
749  // Add uses to the worklist, which may be dead now.
750  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
751    if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
752      Worklist.push_back(Use);
753
754  // Add users to the worklist which may be simplified now.
755  for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
756       UI != E; ++UI)
757    Worklist.push_back(cast<Instruction>(*UI));
758  LPM->deleteSimpleAnalysisValue(I, L);
759  RemoveFromWorklist(I, Worklist);
760  I->replaceAllUsesWith(V);
761  I->eraseFromParent();
762  ++NumSimplify;
763}
764
765/// RemoveBlockIfDead - If the specified block is dead, remove it, update loop
766/// information, and remove any dead successors it has.
767///
768void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
769                                     std::vector<Instruction*> &Worklist,
770                                     Loop *L) {
771  if (pred_begin(BB) != pred_end(BB)) {
772    // This block isn't dead, since an edge to BB was just removed, see if there
773    // are any easy simplifications we can do now.
774    if (BasicBlock *Pred = BB->getSinglePredecessor()) {
775      // If it has one pred, fold phi nodes in BB.
776      while (isa<PHINode>(BB->begin()))
777        ReplaceUsesOfWith(BB->begin(),
778                          cast<PHINode>(BB->begin())->getIncomingValue(0),
779                          Worklist, L, LPM);
780
781      // If this is the header of a loop and the only pred is the latch, we now
782      // have an unreachable loop.
783      if (Loop *L = LI->getLoopFor(BB))
784        if (loopHeader == BB && L->contains(Pred)) {
785          // Remove the branch from the latch to the header block, this makes
786          // the header dead, which will make the latch dead (because the header
787          // dominates the latch).
788          LPM->deleteSimpleAnalysisValue(Pred->getTerminator(), L);
789          Pred->getTerminator()->eraseFromParent();
790          new UnreachableInst(Pred);
791
792          // The loop is now broken, remove it from LI.
793          RemoveLoopFromHierarchy(L);
794
795          // Reprocess the header, which now IS dead.
796          RemoveBlockIfDead(BB, Worklist, L);
797          return;
798        }
799
800      // If pred ends in a uncond branch, add uncond branch to worklist so that
801      // the two blocks will get merged.
802      if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator()))
803        if (BI->isUnconditional())
804          Worklist.push_back(BI);
805    }
806    return;
807  }
808
809  DOUT << "Nuking dead block: " << *BB;
810
811  // Remove the instructions in the basic block from the worklist.
812  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
813    RemoveFromWorklist(I, Worklist);
814
815    // Anything that uses the instructions in this basic block should have their
816    // uses replaced with undefs.
817    if (!I->use_empty())
818      I->replaceAllUsesWith(UndefValue::get(I->getType()));
819  }
820
821  // If this is the edge to the header block for a loop, remove the loop and
822  // promote all subloops.
823  if (Loop *BBLoop = LI->getLoopFor(BB)) {
824    if (BBLoop->getLoopLatch() == BB)
825      RemoveLoopFromHierarchy(BBLoop);
826  }
827
828  // Remove the block from the loop info, which removes it from any loops it
829  // was in.
830  LI->removeBlock(BB);
831
832
833  // Remove phi node entries in successors for this block.
834  TerminatorInst *TI = BB->getTerminator();
835  SmallVector<BasicBlock*, 4> Succs;
836  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
837    Succs.push_back(TI->getSuccessor(i));
838    TI->getSuccessor(i)->removePredecessor(BB);
839  }
840
841  // Unique the successors, remove anything with multiple uses.
842  array_pod_sort(Succs.begin(), Succs.end());
843  Succs.erase(std::unique(Succs.begin(), Succs.end()), Succs.end());
844
845  // Remove the basic block, including all of the instructions contained in it.
846  LPM->deleteSimpleAnalysisValue(BB, L);
847  BB->eraseFromParent();
848  // Remove successor blocks here that are not dead, so that we know we only
849  // have dead blocks in this list.  Nondead blocks have a way of becoming dead,
850  // then getting removed before we revisit them, which is badness.
851  //
852  for (unsigned i = 0; i != Succs.size(); ++i)
853    if (pred_begin(Succs[i]) != pred_end(Succs[i])) {
854      // One exception is loop headers.  If this block was the preheader for a
855      // loop, then we DO want to visit the loop so the loop gets deleted.
856      // We know that if the successor is a loop header, that this loop had to
857      // be the preheader: the case where this was the latch block was handled
858      // above and headers can only have two predecessors.
859      if (!LI->isLoopHeader(Succs[i])) {
860        Succs.erase(Succs.begin()+i);
861        --i;
862      }
863    }
864
865  for (unsigned i = 0, e = Succs.size(); i != e; ++i)
866    RemoveBlockIfDead(Succs[i], Worklist, L);
867}
868
869/// RemoveLoopFromHierarchy - We have discovered that the specified loop has
870/// become unwrapped, either because the backedge was deleted, or because the
871/// edge into the header was removed.  If the edge into the header from the
872/// latch block was removed, the loop is unwrapped but subloops are still alive,
873/// so they just reparent loops.  If the loops are actually dead, they will be
874/// removed later.
875void LoopUnswitch::RemoveLoopFromHierarchy(Loop *L) {
876  LPM->deleteLoopFromQueue(L);
877  RemoveLoopFromWorklist(L);
878}
879
880// RewriteLoopBodyWithConditionConstant - We know either that the value LIC has
881// the value specified by Val in the specified loop, or we know it does NOT have
882// that value.  Rewrite any uses of LIC or of properties correlated to it.
883void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
884                                                        Constant *Val,
885                                                        bool IsEqual) {
886  assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?");
887
888  // FIXME: Support correlated properties, like:
889  //  for (...)
890  //    if (li1 < li2)
891  //      ...
892  //    if (li1 > li2)
893  //      ...
894
895  // FOLD boolean conditions (X|LIC), (X&LIC).  Fold conditional branches,
896  // selects, switches.
897  std::vector<User*> Users(LIC->use_begin(), LIC->use_end());
898  std::vector<Instruction*> Worklist;
899
900  // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC
901  // in the loop with the appropriate one directly.
902  if (IsEqual || (isa<ConstantInt>(Val) && Val->getType() == Type::Int1Ty)) {
903    Value *Replacement;
904    if (IsEqual)
905      Replacement = Val;
906    else
907      Replacement = ConstantInt::get(Type::Int1Ty,
908                                     !cast<ConstantInt>(Val)->getZExtValue());
909
910    for (unsigned i = 0, e = Users.size(); i != e; ++i)
911      if (Instruction *U = cast<Instruction>(Users[i])) {
912        if (!L->contains(U->getParent()))
913          continue;
914        U->replaceUsesOfWith(LIC, Replacement);
915        Worklist.push_back(U);
916      }
917  } else {
918    // Otherwise, we don't know the precise value of LIC, but we do know that it
919    // is certainly NOT "Val".  As such, simplify any uses in the loop that we
920    // can.  This case occurs when we unswitch switch statements.
921    for (unsigned i = 0, e = Users.size(); i != e; ++i)
922      if (Instruction *U = cast<Instruction>(Users[i])) {
923        if (!L->contains(U->getParent()))
924          continue;
925
926        Worklist.push_back(U);
927
928        // If we know that LIC is not Val, use this info to simplify code.
929        if (SwitchInst *SI = dyn_cast<SwitchInst>(U)) {
930          for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) {
931            if (SI->getCaseValue(i) == Val) {
932              // Found a dead case value.  Don't remove PHI nodes in the
933              // successor if they become single-entry, those PHI nodes may
934              // be in the Users list.
935
936              // FIXME: This is a hack.  We need to keep the successor around
937              // and hooked up so as to preserve the loop structure, because
938              // trying to update it is complicated.  So instead we preserve the
939              // loop structure and put the block on an dead code path.
940
941              BasicBlock *SISucc = SI->getSuccessor(i);
942              BasicBlock* Old = SI->getParent();
943              BasicBlock* Split = SplitBlock(Old, SI, this);
944
945              Instruction* OldTerm = Old->getTerminator();
946              BranchInst::Create(Split, SISucc,
947                                 ConstantInt::getTrue(), OldTerm);
948
949              LPM->deleteSimpleAnalysisValue(Old->getTerminator(), L);
950              Old->getTerminator()->eraseFromParent();
951
952              PHINode *PN;
953              for (BasicBlock::iterator II = SISucc->begin();
954                   (PN = dyn_cast<PHINode>(II)); ++II) {
955                Value *InVal = PN->removeIncomingValue(Split, false);
956                PN->addIncoming(InVal, Old);
957              }
958
959              SI->removeCase(i);
960              break;
961            }
962          }
963        }
964
965        // TODO: We could do other simplifications, for example, turning
966        // LIC == Val -> false.
967      }
968  }
969
970  SimplifyCode(Worklist, L);
971}
972
973/// SimplifyCode - Okay, now that we have simplified some instructions in the
974/// loop, walk over it and constant prop, dce, and fold control flow where
975/// possible.  Note that this is effectively a very simple loop-structure-aware
976/// optimizer.  During processing of this loop, L could very well be deleted, so
977/// it must not be used.
978///
979/// FIXME: When the loop optimizer is more mature, separate this out to a new
980/// pass.
981///
982void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
983  while (!Worklist.empty()) {
984    Instruction *I = Worklist.back();
985    Worklist.pop_back();
986
987    // Simple constant folding.
988    if (Constant *C = ConstantFoldInstruction(I)) {
989      ReplaceUsesOfWith(I, C, Worklist, L, LPM);
990      continue;
991    }
992
993    // Simple DCE.
994    if (isInstructionTriviallyDead(I)) {
995      DOUT << "Remove dead instruction '" << *I;
996
997      // Add uses to the worklist, which may be dead now.
998      for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
999        if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
1000          Worklist.push_back(Use);
1001      LPM->deleteSimpleAnalysisValue(I, L);
1002      RemoveFromWorklist(I, Worklist);
1003      I->eraseFromParent();
1004      ++NumSimplify;
1005      continue;
1006    }
1007
1008    // Special case hacks that appear commonly in unswitched code.
1009    switch (I->getOpcode()) {
1010    case Instruction::Select:
1011      if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(0))) {
1012        ReplaceUsesOfWith(I, I->getOperand(!CB->getZExtValue()+1), Worklist, L,
1013                          LPM);
1014        continue;
1015      }
1016      break;
1017    case Instruction::And:
1018      if (isa<ConstantInt>(I->getOperand(0)) &&
1019          I->getOperand(0)->getType() == Type::Int1Ty)   // constant -> RHS
1020        cast<BinaryOperator>(I)->swapOperands();
1021      if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1)))
1022        if (CB->getType() == Type::Int1Ty) {
1023          if (CB->isOne())      // X & 1 -> X
1024            ReplaceUsesOfWith(I, I->getOperand(0), Worklist, L, LPM);
1025          else                  // X & 0 -> 0
1026            ReplaceUsesOfWith(I, I->getOperand(1), Worklist, L, LPM);
1027          continue;
1028        }
1029      break;
1030    case Instruction::Or:
1031      if (isa<ConstantInt>(I->getOperand(0)) &&
1032          I->getOperand(0)->getType() == Type::Int1Ty)   // constant -> RHS
1033        cast<BinaryOperator>(I)->swapOperands();
1034      if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1)))
1035        if (CB->getType() == Type::Int1Ty) {
1036          if (CB->isOne())   // X | 1 -> 1
1037            ReplaceUsesOfWith(I, I->getOperand(1), Worklist, L, LPM);
1038          else                  // X | 0 -> X
1039            ReplaceUsesOfWith(I, I->getOperand(0), Worklist, L, LPM);
1040          continue;
1041        }
1042      break;
1043    case Instruction::Br: {
1044      BranchInst *BI = cast<BranchInst>(I);
1045      if (BI->isUnconditional()) {
1046        // If BI's parent is the only pred of the successor, fold the two blocks
1047        // together.
1048        BasicBlock *Pred = BI->getParent();
1049        BasicBlock *Succ = BI->getSuccessor(0);
1050        BasicBlock *SinglePred = Succ->getSinglePredecessor();
1051        if (!SinglePred) continue;  // Nothing to do.
1052        assert(SinglePred == Pred && "CFG broken");
1053
1054        DOUT << "Merging blocks: " << Pred->getName() << " <- "
1055             << Succ->getName() << "\n";
1056
1057        // Resolve any single entry PHI nodes in Succ.
1058        while (PHINode *PN = dyn_cast<PHINode>(Succ->begin()))
1059          ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM);
1060
1061        // Move all of the successor contents from Succ to Pred.
1062        Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(),
1063                                   Succ->end());
1064        LPM->deleteSimpleAnalysisValue(BI, L);
1065        BI->eraseFromParent();
1066        RemoveFromWorklist(BI, Worklist);
1067
1068        // If Succ has any successors with PHI nodes, update them to have
1069        // entries coming from Pred instead of Succ.
1070        Succ->replaceAllUsesWith(Pred);
1071
1072        // Remove Succ from the loop tree.
1073        LI->removeBlock(Succ);
1074        LPM->deleteSimpleAnalysisValue(Succ, L);
1075        Succ->eraseFromParent();
1076        ++NumSimplify;
1077      } else if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){
1078        // Conditional branch.  Turn it into an unconditional branch, then
1079        // remove dead blocks.
1080        break;  // FIXME: Enable.
1081
1082        DOUT << "Folded branch: " << *BI;
1083        BasicBlock *DeadSucc = BI->getSuccessor(CB->getZExtValue());
1084        BasicBlock *LiveSucc = BI->getSuccessor(!CB->getZExtValue());
1085        DeadSucc->removePredecessor(BI->getParent(), true);
1086        Worklist.push_back(BranchInst::Create(LiveSucc, BI));
1087        LPM->deleteSimpleAnalysisValue(BI, L);
1088        BI->eraseFromParent();
1089        RemoveFromWorklist(BI, Worklist);
1090        ++NumSimplify;
1091
1092        RemoveBlockIfDead(DeadSucc, Worklist, L);
1093      }
1094      break;
1095    }
1096    }
1097  }
1098}
1099