1c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel//===- LoopRotation.cpp - Loop Rotation Pass ------------------------------===//
2c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel//
3c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel//                     The LLVM Compiler Infrastructure
4c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel//
54ee451de366474b9c228b4e5fa573795a715216dChris Lattner// This file is distributed under the University of Illinois Open Source
64ee451de366474b9c228b4e5fa573795a715216dChris Lattner// License. See LICENSE.TXT for details.
7c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel//
8c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel//===----------------------------------------------------------------------===//
9c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel//
10c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel// This file implements Loop Rotation Pass.
11c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel//
12c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel//===----------------------------------------------------------------------===//
13c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel
14c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel#include "llvm/Transforms/Scalar.h"
15d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/ADT/Statistic.h"
16d9e079706e782c5f054322a0d15f063a1a683230Chris Lattner#include "llvm/Analysis/CodeMetrics.h"
17d9ec3572f3d0997348361334446f942194522127Chris Lattner#include "llvm/Analysis/InstructionSimplify.h"
18d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Analysis/LoopPass.h"
19990e866deb158fd88377b5606c8d86603e93e533Devang Patel#include "llvm/Analysis/ScalarEvolution.h"
20a5157e68d183e1bdf010e94a15dc0c44b65f889bChandler Carruth#include "llvm/Analysis/TargetTransformInfo.h"
21f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick#include "llvm/Analysis/ValueTracking.h"
2236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/IR/CFG.h"
2336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/IR/Dominators.h"
240b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/Function.h"
250b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/IntrinsicInst.h"
26dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/Support/CommandLine.h"
27d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Support/Debug.h"
28990e866deb158fd88377b5606c8d86603e93e533Devang Patel#include "llvm/Transforms/Utils/BasicBlockUtils.h"
29d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Transforms/Utils/Local.h"
30e6e37b94e86acfa33e687a6284079d0adb0c8c4aDan Gohman#include "llvm/Transforms/Utils/SSAUpdater.h"
316ccb3652933bcdede2b2a53cf76ddd56ce9592a2Chris Lattner#include "llvm/Transforms/Utils/ValueMapper.h"
32c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patelusing namespace llvm;
33c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel
34dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define DEBUG_TYPE "loop-rotate"
35dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
36dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesstatic cl::opt<unsigned>
37dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesDefaultRotationThreshold("rotation-max-header-size", cl::init(16), cl::Hidden,
38dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines       cl::desc("The default maximum header size for automatic loop rotation"));
39c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel
40c4625da483ff9e835aef886864e37dd68fb7a03cDevang PatelSTATISTIC(NumRotated, "Number of loops rotated");
41c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patelnamespace {
42c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel
433e8b6631e67e01e4960a7ba4668a50c596607473Chris Lattner  class LoopRotate : public LoopPass {
44c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel  public:
451997473cf72957d0e70322e2fe6fe2ab141c58a6Devang Patel    static char ID; // Pass ID, replacement for typeid
46dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    LoopRotate(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) {
47081c34b725980f995be9080eaec24cd3dfaaf065Owen Anderson      initializeLoopRotatePass(*PassRegistry::getPassRegistry());
48dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      if (SpecifiedMaxHeaderSize == -1)
49dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        MaxHeaderSize = DefaultRotationThreshold;
50dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      else
51dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize);
52081c34b725980f995be9080eaec24cd3dfaaf065Owen Anderson    }
53794fd75c67a2cdc128d67342c6d88a504d186896Devang Patel
54322313376a9ccac86cbfc208684d84936d62322dDevang Patel    // LCSSA form makes instruction renaming easier.
5536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    void getAnalysisUsage(AnalysisUsage &AU) const override {
5636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      AU.addPreserved<DominatorTreeWrapperPass>();
571e381fcd553a3955a10338fd305efc023d7d22e1Dan Gohman      AU.addRequired<LoopInfo>();
581e381fcd553a3955a10338fd305efc023d7d22e1Dan Gohman      AU.addPreserved<LoopInfo>();
599d9b204d6a155f4778a9009c643eed5bf59148e2Devang Patel      AU.addRequiredID(LoopSimplifyID);
609d9b204d6a155f4778a9009c643eed5bf59148e2Devang Patel      AU.addPreservedID(LoopSimplifyID);
61c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel      AU.addRequiredID(LCSSAID);
62c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel      AU.addPreservedID(LCSSAID);
63990e866deb158fd88377b5606c8d86603e93e533Devang Patel      AU.addPreserved<ScalarEvolution>();
64a5157e68d183e1bdf010e94a15dc0c44b65f889bChandler Carruth      AU.addRequired<TargetTransformInfo>();
65c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel    }
66c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel
6736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    bool runOnLoop(Loop *L, LPPassManager &LPM) override;
68fcf79528da4da87996a936b9e86f669c1f0c1dc6Andrew Trick    bool simplifyLoopLatch(Loop *L);
69fcf79528da4da87996a936b9e86f669c1f0c1dc6Andrew Trick    bool rotateLoop(Loop *L, bool SimplifiedLatch);
70c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick
71c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel  private:
72dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    unsigned MaxHeaderSize;
73012ca949afc1e78c06dd49b8bc5f3b6f6c45458dChris Lattner    LoopInfo *LI;
74a5157e68d183e1bdf010e94a15dc0c44b65f889bChandler Carruth    const TargetTransformInfo *TTI;
75c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel  };
76c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel}
77c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick
78844731a7f1909f55935e3514c9e713a62d67662eDan Gohmanchar LoopRotate::ID = 0;
792ab36d350293c77fc8941ce1023e4899df7e3a82Owen AndersonINITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
80a5157e68d183e1bdf010e94a15dc0c44b65f889bChandler CarruthINITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
812ab36d350293c77fc8941ce1023e4899df7e3a82Owen AndersonINITIALIZE_PASS_DEPENDENCY(LoopInfo)
822ab36d350293c77fc8941ce1023e4899df7e3a82Owen AndersonINITIALIZE_PASS_DEPENDENCY(LoopSimplify)
832ab36d350293c77fc8941ce1023e4899df7e3a82Owen AndersonINITIALIZE_PASS_DEPENDENCY(LCSSA)
842ab36d350293c77fc8941ce1023e4899df7e3a82Owen AndersonINITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
85c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel
86dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesPass *llvm::createLoopRotatePass(int MaxHeaderSize) {
87dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  return new LoopRotate(MaxHeaderSize);
88dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines}
89c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel
90322313376a9ccac86cbfc208684d84936d62322dDevang Patel/// Rotate Loop L as many times as possible. Return true if
91cc4e605721e1144da8990edca64bf5799220e279Dan Gohman/// the loop is rotated at least once.
924aefc9bf1b963f2fe42f2de3341cd1eb5a9a0ce7Chris Lattnerbool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
9336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (skipOptnoneFunction(L))
9436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return false;
9536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
96dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // Save the loop metadata.
97dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  MDNode *LoopMD = L->getLoopID();
98dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
99012ca949afc1e78c06dd49b8bc5f3b6f6c45458dChris Lattner  LI = &getAnalysis<LoopInfo>();
100a5157e68d183e1bdf010e94a15dc0c44b65f889bChandler Carruth  TTI = &getAnalysis<TargetTransformInfo>();
101990e866deb158fd88377b5606c8d86603e93e533Devang Patel
102f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  // Simplify the loop latch before attempting to rotate the header
103f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  // upward. Rotation may not be needed if the loop tail can be folded into the
104f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  // loop exit.
105fcf79528da4da87996a936b9e86f669c1f0c1dc6Andrew Trick  bool SimplifiedLatch = simplifyLoopLatch(L);
106f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick
107c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel  // One loop can be rotated multiple times.
108012ca949afc1e78c06dd49b8bc5f3b6f6c45458dChris Lattner  bool MadeChange = false;
109fcf79528da4da87996a936b9e86f669c1f0c1dc6Andrew Trick  while (rotateLoop(L, SimplifiedLatch)) {
110012ca949afc1e78c06dd49b8bc5f3b6f6c45458dChris Lattner    MadeChange = true;
111fcf79528da4da87996a936b9e86f669c1f0c1dc6Andrew Trick    SimplifiedLatch = false;
112fcf79528da4da87996a936b9e86f669c1f0c1dc6Andrew Trick  }
113dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
114dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // Restore the loop metadata.
115dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // NB! We presume LoopRotation DOESN'T ADD its own metadata.
116dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if ((MadeChange || SimplifiedLatch) && LoopMD)
117dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    L->setLoopID(LoopMD);
118dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
119012ca949afc1e78c06dd49b8bc5f3b6f6c45458dChris Lattner  return MadeChange;
120c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel}
121c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel
12264c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
12364c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner/// old header into the preheader.  If there were uses of the values produced by
12464c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner/// these instruction that were outside of the loop, we have to insert PHI nodes
12564c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner/// to merge the two values.  Do this now.
12664c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattnerstatic void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
12764c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner                                            BasicBlock *OrigPreheader,
12864c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner                                            ValueToValueMapTy &ValueMap) {
12964c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner  // Remove PHI node entries that are no longer live.
13064c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner  BasicBlock::iterator I, E = OrigHeader->end();
13164c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner  for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
13264c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner    PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
133c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick
13464c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner  // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
13564c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner  // as necessary.
13664c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner  SSAUpdater SSA;
13764c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner  for (I = OrigHeader->begin(); I != E; ++I) {
13864c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner    Value *OrigHeaderVal = I;
139c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick
14064c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner    // If there are no uses of the value (e.g. because it returns void), there
14164c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner    // is nothing to rewrite.
14264c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner    if (OrigHeaderVal->use_empty())
14364c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner      continue;
144c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick
14564c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner    Value *OrigPreHeaderVal = ValueMap[OrigHeaderVal];
14664c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner
14764c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner    // The value now exits in two versions: the initial value in the preheader
14864c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner    // and the loop "next" value in the original header.
14964c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner    SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
15064c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner    SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
15164c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner    SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
152c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick
15364c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner    // Visit each use of the OrigHeader instruction.
15464c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner    for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
15564c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner         UE = OrigHeaderVal->use_end(); UI != UE; ) {
15664c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner      // Grab the use before incrementing the iterator.
15736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      Use &U = *UI;
158c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick
15964c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner      // Increment the iterator before removing the use from the list.
16064c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner      ++UI;
161c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick
16264c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner      // SSAUpdater can't handle a non-PHI use in the same block as an
16364c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner      // earlier def. We can easily handle those cases manually.
16464c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner      Instruction *UserInst = cast<Instruction>(U.getUser());
16564c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner      if (!isa<PHINode>(UserInst)) {
16664c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner        BasicBlock *UserBB = UserInst->getParent();
167c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick
16864c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner        // The original users in the OrigHeader are already using the
16964c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner        // original definitions.
17064c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner        if (UserBB == OrigHeader)
17164c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner          continue;
172c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick
17364c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner        // Users in the OrigPreHeader need to use the value to which the
17464c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner        // original definitions are mapped.
17564c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner        if (UserBB == OrigPreheader) {
17664c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner          U = OrigPreHeaderVal;
17764c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner          continue;
17864c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner        }
17964c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner      }
180c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick
18164c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner      // Anything else can be handled by SSAUpdater.
18264c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner      SSA.RewriteUse(U);
18364c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner    }
18464c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner  }
185c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick}
18664c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner
187f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick/// Determine whether the instructions in this range my be safely and cheaply
188f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick/// speculated. This is not an important enough situation to develop complex
189f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick/// heuristics. We handle a single arithmetic instruction along with any type
190f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick/// conversions.
191f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trickstatic bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
192f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick                                  BasicBlock::iterator End) {
193f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  bool seenIncrement = false;
194f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  for (BasicBlock::iterator I = Begin; I != End; ++I) {
195f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick
196f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick    if (!isSafeToSpeculativelyExecute(I))
197f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick      return false;
198f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick
199f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick    if (isa<DbgInfoIntrinsic>(I))
200f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick      continue;
201f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick
202f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick    switch (I->getOpcode()) {
203f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick    default:
204f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick      return false;
205f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick    case Instruction::GetElementPtr:
206f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick      // GEPs are cheap if all indices are constant.
207f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick      if (!cast<GEPOperator>(I)->hasAllConstantIndices())
208f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick        return false;
209f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick      // fall-thru to increment case
210f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick    case Instruction::Add:
211f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick    case Instruction::Sub:
212f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick    case Instruction::And:
213f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick    case Instruction::Or:
214f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick    case Instruction::Xor:
215f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick    case Instruction::Shl:
216f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick    case Instruction::LShr:
217f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick    case Instruction::AShr:
218f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick      if (seenIncrement)
219f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick        return false;
220f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick      seenIncrement = true;
221f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick      break;
222f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick    case Instruction::Trunc:
223f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick    case Instruction::ZExt:
224f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick    case Instruction::SExt:
225f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick      // ignore type conversions
226f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick      break;
227f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick    }
228f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  }
229f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  return true;
230f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick}
231f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick
232f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick/// Fold the loop tail into the loop exit by speculating the loop tail
233f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick/// instructions. Typically, this is a single post-increment. In the case of a
234f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick/// simple 2-block loop, hoisting the increment can be much better than
235f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick/// duplicating the entire loop header. In the cast of loops with early exits,
236f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick/// rotation will not work anyway, but simplifyLoopLatch will put the loop in
237f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick/// canonical form so downstream passes can handle it.
238f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick///
239f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick/// I don't believe this invalidates SCEV.
240fcf79528da4da87996a936b9e86f669c1f0c1dc6Andrew Trickbool LoopRotate::simplifyLoopLatch(Loop *L) {
241f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  BasicBlock *Latch = L->getLoopLatch();
242f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  if (!Latch || Latch->hasAddressTaken())
243fcf79528da4da87996a936b9e86f669c1f0c1dc6Andrew Trick    return false;
244f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick
245f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
246f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  if (!Jmp || !Jmp->isUnconditional())
247fcf79528da4da87996a936b9e86f669c1f0c1dc6Andrew Trick    return false;
248f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick
249f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  BasicBlock *LastExit = Latch->getSinglePredecessor();
250f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  if (!LastExit || !L->isLoopExiting(LastExit))
251fcf79528da4da87996a936b9e86f669c1f0c1dc6Andrew Trick    return false;
252f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick
253f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
254f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  if (!BI)
255fcf79528da4da87996a936b9e86f669c1f0c1dc6Andrew Trick    return false;
256f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick
257f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  if (!shouldSpeculateInstrs(Latch->begin(), Jmp))
258fcf79528da4da87996a936b9e86f669c1f0c1dc6Andrew Trick    return false;
259f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick
260f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
261f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick        << LastExit->getName() << "\n");
262f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick
263f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  // Hoist the instructions from Latch into LastExit.
264f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  LastExit->getInstList().splice(BI, Latch->getInstList(), Latch->begin(), Jmp);
265f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick
266f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1;
267f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  BasicBlock *Header = Jmp->getSuccessor(0);
268f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  assert(Header == L->getHeader() && "expected a backward branch");
269f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick
270f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  // Remove Latch from the CFG so that LastExit becomes the new Latch.
271f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  BI->setSuccessor(FallThruPath, Header);
272f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  Latch->replaceSuccessorsPhiUsesWith(LastExit);
273f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  Jmp->eraseFromParent();
274f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick
275f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  // Nuke the Latch block.
276f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  assert(Latch->empty() && "unable to evacuate Latch");
277f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  LI->removeBlock(Latch);
27836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (DominatorTreeWrapperPass *DTWP =
27936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          getAnalysisIfAvailable<DominatorTreeWrapperPass>())
28036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    DTWP->getDomTree().eraseNode(Latch);
281f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick  Latch->eraseFromParent();
282fcf79528da4da87996a936b9e86f669c1f0c1dc6Andrew Trick  return true;
283f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick}
284f6629ab84785ff5e7db4cbb9e2d1cbd4ab3b5d34Andrew Trick
28523d9d27c265753da55a8ee7879820acb4d1e3a6dDan Gohman/// Rotate loop LP. Return true if the loop is rotated.
286fcf79528da4da87996a936b9e86f669c1f0c1dc6Andrew Trick///
287fcf79528da4da87996a936b9e86f669c1f0c1dc6Andrew Trick/// \param SimplifiedLatch is true if the latch was just folded into the final
288fcf79528da4da87996a936b9e86f669c1f0c1dc6Andrew Trick/// loop exit. In this case we may want to rotate even though the new latch is
289fcf79528da4da87996a936b9e86f669c1f0c1dc6Andrew Trick/// now an exiting branch. This rotation would have happened had the latch not
290fcf79528da4da87996a936b9e86f669c1f0c1dc6Andrew Trick/// been simplified. However, if SimplifiedLatch is false, then we avoid
291fcf79528da4da87996a936b9e86f669c1f0c1dc6Andrew Trick/// rotating loops in which the latch exits to avoid excessive or endless
292fcf79528da4da87996a936b9e86f669c1f0c1dc6Andrew Trick/// rotation. LoopRotate should be repeatable and converge to a canonical
293fcf79528da4da87996a936b9e86f669c1f0c1dc6Andrew Trick/// form. This property is satisfied because simplifying the loop latch can only
294fcf79528da4da87996a936b9e86f669c1f0c1dc6Andrew Trick/// happen once across multiple invocations of the LoopRotate pass.
295fcf79528da4da87996a936b9e86f669c1f0c1dc6Andrew Trickbool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
296cc4e605721e1144da8990edca64bf5799220e279Dan Gohman  // If the loop has only one block then there is not much to rotate.
297322313376a9ccac86cbfc208684d84936d62322dDevang Patel  if (L->getBlocks().size() == 1)
298c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel    return false;
299c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick
3002aa69082310f48162cb732efdc41613391796cd7Chris Lattner  BasicBlock *OrigHeader = L->getHeader();
301d70846ec1b79162d519620ddf07bcb7a64b65eb5Benjamin Kramer  BasicBlock *OrigLatch = L->getLoopLatch();
302c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick
3032aa69082310f48162cb732efdc41613391796cd7Chris Lattner  BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
304dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (!BI || BI->isUnconditional())
3052aa69082310f48162cb732efdc41613391796cd7Chris Lattner    return false;
306c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick
307cc4e605721e1144da8990edca64bf5799220e279Dan Gohman  // If the loop header is not one of the loop exiting blocks then
308cc4e605721e1144da8990edca64bf5799220e279Dan Gohman  // either this loop is already rotated or it is not
309c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel  // suitable for loop rotation transformations.
31032663b719b4996b3a735f22bba80d771d50f96e7Dan Gohman  if (!L->isLoopExiting(OrigHeader))
311c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel    return false;
312c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel
313d70846ec1b79162d519620ddf07bcb7a64b65eb5Benjamin Kramer  // If the loop latch already contains a branch that leaves the loop then the
314d70846ec1b79162d519620ddf07bcb7a64b65eb5Benjamin Kramer  // loop is already rotated.
315dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (!OrigLatch)
316fcf79528da4da87996a936b9e86f669c1f0c1dc6Andrew Trick    return false;
317fcf79528da4da87996a936b9e86f669c1f0c1dc6Andrew Trick
318fcf79528da4da87996a936b9e86f669c1f0c1dc6Andrew Trick  // Rotate if either the loop latch does *not* exit the loop, or if the loop
319fcf79528da4da87996a936b9e86f669c1f0c1dc6Andrew Trick  // latch was just simplified.
320fcf79528da4da87996a936b9e86f669c1f0c1dc6Andrew Trick  if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch)
321c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel    return false;
322c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel
32367ae13575900e8efd056672987249fd0adbf5e73James Molloy  // Check size of original header and reject loop if it is very big or we can't
32467ae13575900e8efd056672987249fd0adbf5e73James Molloy  // duplicate blocks inside it.
325d9e079706e782c5f054322a0d15f063a1a683230Chris Lattner  {
326d9e079706e782c5f054322a0d15f063a1a683230Chris Lattner    CodeMetrics Metrics;
327a5157e68d183e1bdf010e94a15dc0c44b65f889bChandler Carruth    Metrics.analyzeBasicBlock(OrigHeader, *TTI);
32867ae13575900e8efd056672987249fd0adbf5e73James Molloy    if (Metrics.notDuplicatable) {
32936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable"
33067ae13575900e8efd056672987249fd0adbf5e73James Molloy            << " instructions: "; L->dump());
33167ae13575900e8efd056672987249fd0adbf5e73James Molloy      return false;
33267ae13575900e8efd056672987249fd0adbf5e73James Molloy    }
333dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    if (Metrics.NumInsts > MaxHeaderSize)
334d9e079706e782c5f054322a0d15f063a1a683230Chris Lattner      return false;
3353f43a7021fae71c056f5e1afc60016cfd8193f68Devang Patel  }
3363f43a7021fae71c056f5e1afc60016cfd8193f68Devang Patel
337990e866deb158fd88377b5606c8d86603e93e533Devang Patel  // Now, this loop is suitable for rotation.
33864c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner  BasicBlock *OrigPreheader = L->getLoopPreheader();
339c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick
340f5bf464b701908d16f7cee0bbf2b8c8df4f3a917Chris Lattner  // If the loop could not be converted to canonical form, it must have an
341f5bf464b701908d16f7cee0bbf2b8c8df4f3a917Chris Lattner  // indirectbr in it, just give up.
342dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (!OrigPreheader)
343f5bf464b701908d16f7cee0bbf2b8c8df4f3a917Chris Lattner    return false;
344990e866deb158fd88377b5606c8d86603e93e533Devang Patel
345e6fe67b2fb48b1f0f3c1ac8c1ce01e67c1f0bb1dDan Gohman  // Anything ScalarEvolution may know about this loop or the PHI nodes
346e6fe67b2fb48b1f0f3c1ac8c1ce01e67c1f0bb1dDan Gohman  // in its header will soon be invalidated.
347e6fe67b2fb48b1f0f3c1ac8c1ce01e67c1f0bb1dDan Gohman  if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
3484c7279ac726e338400626fca5a09b5533426eb6aDan Gohman    SE->forgetLoop(L);
349e6fe67b2fb48b1f0f3c1ac8c1ce01e67c1f0bb1dDan Gohman
350d70846ec1b79162d519620ddf07bcb7a64b65eb5Benjamin Kramer  DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
351d70846ec1b79162d519620ddf07bcb7a64b65eb5Benjamin Kramer
352c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel  // Find new Loop header. NewHeader is a Header's one and only successor
353f6784a326293941cd4ff6d25da64732760e993f3Chris Lattner  // that is inside loop.  Header's other successor is outside the
354f6784a326293941cd4ff6d25da64732760e993f3Chris Lattner  // loop.  Otherwise loop is not suitable for rotation.
3554aefc9bf1b963f2fe42f2de3341cd1eb5a9a0ce7Chris Lattner  BasicBlock *Exit = BI->getSuccessor(0);
3564aefc9bf1b963f2fe42f2de3341cd1eb5a9a0ce7Chris Lattner  BasicBlock *NewHeader = BI->getSuccessor(1);
357322313376a9ccac86cbfc208684d84936d62322dDevang Patel  if (L->contains(Exit))
358322313376a9ccac86cbfc208684d84936d62322dDevang Patel    std::swap(Exit, NewHeader);
3592ba2543df26e15b67d27a449eb58429d62465200Chris Lattner  assert(NewHeader && "Unable to determine new loop header");
360c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick  assert(L->contains(NewHeader) && !L->contains(Exit) &&
361322313376a9ccac86cbfc208684d84936d62322dDevang Patel         "Unable to determine loop header and exit blocks");
362c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick
363cc4e605721e1144da8990edca64bf5799220e279Dan Gohman  // This code assumes that the new header has exactly one predecessor.
364cc4e605721e1144da8990edca64bf5799220e279Dan Gohman  // Remove any single-entry PHI nodes in it.
3653796a262c50e0b04c1a5a9571f12bb9bc4936c25Chris Lattner  assert(NewHeader->getSinglePredecessor() &&
3663796a262c50e0b04c1a5a9571f12bb9bc4936c25Chris Lattner         "New header doesn't have one pred!");
3673796a262c50e0b04c1a5a9571f12bb9bc4936c25Chris Lattner  FoldSingleEntryPHINodes(NewHeader);
368c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel
369e6e37b94e86acfa33e687a6284079d0adb0c8c4aDan Gohman  // Begin by walking OrigHeader and populating ValueMap with an entry for
370e6e37b94e86acfa33e687a6284079d0adb0c8c4aDan Gohman  // each Instruction.
371322313376a9ccac86cbfc208684d84936d62322dDevang Patel  BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
3726ccb3652933bcdede2b2a53cf76ddd56ce9592a2Chris Lattner  ValueToValueMapTy ValueMap;
373322313376a9ccac86cbfc208684d84936d62322dDevang Patel
374e6e37b94e86acfa33e687a6284079d0adb0c8c4aDan Gohman  // For PHI nodes, the value available in OldPreHeader is just the
375e6e37b94e86acfa33e687a6284079d0adb0c8c4aDan Gohman  // incoming value from OldPreHeader.
376e6e37b94e86acfa33e687a6284079d0adb0c8c4aDan Gohman  for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
377c137120bb047a7017cbab21f5f9c9e6f65e2b84fJay Foad    ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader);
378e98815469ccd2f1f1585abdc7c36042177bc26f0Devang Patel
37950fb46983ccae116bdbda64471f4861108766135Chris Lattner  // For the rest of the instructions, either hoist to the OrigPreheader if
38050fb46983ccae116bdbda64471f4861108766135Chris Lattner  // possible or create a clone in the OldPreHeader if not.
38164c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner  TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
38250fb46983ccae116bdbda64471f4861108766135Chris Lattner  while (I != E) {
38350fb46983ccae116bdbda64471f4861108766135Chris Lattner    Instruction *Inst = I++;
384c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick
38550fb46983ccae116bdbda64471f4861108766135Chris Lattner    // If the instruction's operands are invariant and it doesn't read or write
38650fb46983ccae116bdbda64471f4861108766135Chris Lattner    // memory, then it is safe to hoist.  Doing this doesn't change the order of
38750fb46983ccae116bdbda64471f4861108766135Chris Lattner    // execution in the preheader, but does prevent the instruction from
38850fb46983ccae116bdbda64471f4861108766135Chris Lattner    // executing in each iteration of the loop.  This means it is safe to hoist
38950fb46983ccae116bdbda64471f4861108766135Chris Lattner    // something that might trap, but isn't safe to hoist something that reads
39050fb46983ccae116bdbda64471f4861108766135Chris Lattner    // memory (without proving that the loop doesn't write).
39150fb46983ccae116bdbda64471f4861108766135Chris Lattner    if (L->hasLoopInvariantOperands(Inst) &&
39250fb46983ccae116bdbda64471f4861108766135Chris Lattner        !Inst->mayReadFromMemory() && !Inst->mayWriteToMemory() &&
3935e6162e75645122b6afdbca8ba55294e073dc369Eli Friedman        !isa<TerminatorInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst) &&
3945e6162e75645122b6afdbca8ba55294e073dc369Eli Friedman        !isa<AllocaInst>(Inst)) {
39550fb46983ccae116bdbda64471f4861108766135Chris Lattner      Inst->moveBefore(LoopEntryBranch);
39650fb46983ccae116bdbda64471f4861108766135Chris Lattner      continue;
39750fb46983ccae116bdbda64471f4861108766135Chris Lattner    }
398c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick
39950fb46983ccae116bdbda64471f4861108766135Chris Lattner    // Otherwise, create a duplicate of the instruction.
40050fb46983ccae116bdbda64471f4861108766135Chris Lattner    Instruction *C = Inst->clone();
401c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick
402d9ec3572f3d0997348361334446f942194522127Chris Lattner    // Eagerly remap the operands of the instruction.
403d9ec3572f3d0997348361334446f942194522127Chris Lattner    RemapInstruction(C, ValueMap,
404d9ec3572f3d0997348361334446f942194522127Chris Lattner                     RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
405c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick
406d9ec3572f3d0997348361334446f942194522127Chris Lattner    // With the operands remapped, see if the instruction constant folds or is
407d9ec3572f3d0997348361334446f942194522127Chris Lattner    // otherwise simplifyable.  This commonly occurs because the entry from PHI
408d9ec3572f3d0997348361334446f942194522127Chris Lattner    // nodes allows icmps and other instructions to fold.
409012ca949afc1e78c06dd49b8bc5f3b6f6c45458dChris Lattner    Value *V = SimplifyInstruction(C);
410012ca949afc1e78c06dd49b8bc5f3b6f6c45458dChris Lattner    if (V && LI->replacementPreservesLCSSAForm(C, V)) {
411d9ec3572f3d0997348361334446f942194522127Chris Lattner      // If so, then delete the temporary instruction and stick the folded value
412d9ec3572f3d0997348361334446f942194522127Chris Lattner      // in the map.
413d9ec3572f3d0997348361334446f942194522127Chris Lattner      delete C;
414d9ec3572f3d0997348361334446f942194522127Chris Lattner      ValueMap[Inst] = V;
415d9ec3572f3d0997348361334446f942194522127Chris Lattner    } else {
416d9ec3572f3d0997348361334446f942194522127Chris Lattner      // Otherwise, stick the new instruction into the new block!
417d9ec3572f3d0997348361334446f942194522127Chris Lattner      C->setName(Inst->getName());
418d9ec3572f3d0997348361334446f942194522127Chris Lattner      C->insertBefore(LoopEntryBranch);
419d9ec3572f3d0997348361334446f942194522127Chris Lattner      ValueMap[Inst] = C;
420d9ec3572f3d0997348361334446f942194522127Chris Lattner    }
421c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel  }
422c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel
423e6e37b94e86acfa33e687a6284079d0adb0c8c4aDan Gohman  // Along with all the other instructions, we just cloned OrigHeader's
424e6e37b94e86acfa33e687a6284079d0adb0c8c4aDan Gohman  // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
425e6e37b94e86acfa33e687a6284079d0adb0c8c4aDan Gohman  // successors by duplicating their incoming values for OrigHeader.
426e6e37b94e86acfa33e687a6284079d0adb0c8c4aDan Gohman  TerminatorInst *TI = OrigHeader->getTerminator();
427e6e37b94e86acfa33e687a6284079d0adb0c8c4aDan Gohman  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
428e6e37b94e86acfa33e687a6284079d0adb0c8c4aDan Gohman    for (BasicBlock::iterator BI = TI->getSuccessor(i)->begin();
429e6e37b94e86acfa33e687a6284079d0adb0c8c4aDan Gohman         PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
43064c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner      PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
431e6e37b94e86acfa33e687a6284079d0adb0c8c4aDan Gohman
432e6e37b94e86acfa33e687a6284079d0adb0c8c4aDan Gohman  // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
433e6e37b94e86acfa33e687a6284079d0adb0c8c4aDan Gohman  // OrigPreHeader's old terminator (the original branch into the loop), and
434e6e37b94e86acfa33e687a6284079d0adb0c8c4aDan Gohman  // remove the corresponding incoming values from the PHI nodes in OrigHeader.
435e6e37b94e86acfa33e687a6284079d0adb0c8c4aDan Gohman  LoopEntryBranch->eraseFromParent();
436e6e37b94e86acfa33e687a6284079d0adb0c8c4aDan Gohman
43764c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner  // If there were any uses of instructions in the duplicated block outside the
43864c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner  // loop, update them, inserting PHI nodes as required
43964c24db959815c6b7edbebd32e5a16936d75b2e1Chris Lattner  RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap);
440c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel
441e6e37b94e86acfa33e687a6284079d0adb0c8c4aDan Gohman  // NewHeader is now the header of the loop.
442c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel  L->moveToHeader(NewHeader);
443883401a72f03286cb51d08bf1c894c7d7eb58a31Chris Lattner  assert(L->getHeader() == NewHeader && "Latch block is our new header");
444c4625da483ff9e835aef886864e37dd68fb7a03cDevang Patel
445c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick
4465d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner  // At this point, we've finished our major CFG changes.  As part of cloning
4475d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner  // the loop into the preheader we've simplified instructions and the
4485d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner  // duplicated conditional branch may now be branching on a constant.  If it is
4495d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner  // branching on a constant and if that constant means that we enter the loop,
4505d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner  // then we fold away the cond branch to an uncond branch.  This simplifies the
4515d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner  // loop in cases important for nested loops, and it also means we don't have
4525d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner  // to split as many edges.
4535d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner  BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator());
4545d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner  assert(PHBI->isConditional() && "Should be clone of BI condbr!");
4555d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner  if (!isa<ConstantInt>(PHBI->getCondition()) ||
4565d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner      PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero())
4575d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner          != NewHeader) {
4585d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner    // The conditional branch can't be folded, handle the general case.
4595d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner    // Update DominatorTree to reflect the CFG change we just made.  Then split
4605d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner    // edges as necessary to preserve LoopSimplify form.
46136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (DominatorTreeWrapperPass *DTWP =
46236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines            getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
46336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      DominatorTree &DT = DTWP->getDomTree();
464d70846ec1b79162d519620ddf07bcb7a64b65eb5Benjamin Kramer      // Everything that was dominated by the old loop header is now dominated
465d70846ec1b79162d519620ddf07bcb7a64b65eb5Benjamin Kramer      // by the original loop preheader. Conceptually the header was merged
466d70846ec1b79162d519620ddf07bcb7a64b65eb5Benjamin Kramer      // into the preheader, even though we reuse the actual block as a new
467d70846ec1b79162d519620ddf07bcb7a64b65eb5Benjamin Kramer      // loop latch.
46836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      DomTreeNode *OrigHeaderNode = DT.getNode(OrigHeader);
469d70846ec1b79162d519620ddf07bcb7a64b65eb5Benjamin Kramer      SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(),
470d70846ec1b79162d519620ddf07bcb7a64b65eb5Benjamin Kramer                                                   OrigHeaderNode->end());
47136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      DomTreeNode *OrigPreheaderNode = DT.getNode(OrigPreheader);
472d70846ec1b79162d519620ddf07bcb7a64b65eb5Benjamin Kramer      for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I)
47336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        DT.changeImmediateDominator(HeaderChildren[I], OrigPreheaderNode);
474c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick
47536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      assert(DT.getNode(Exit)->getIDom() == OrigPreheaderNode);
47636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      assert(DT.getNode(NewHeader)->getIDom() == OrigPreheaderNode);
47764f30e3eedc08a9387baef2d2e24d62d85b53f26Benjamin Kramer
4785d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner      // Update OrigHeader to be dominated by the new header block.
47936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      DT.changeImmediateDominator(OrigHeader, OrigLatch);
4805d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner    }
481c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick
4825d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner    // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
483a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem    // thus is not a preheader anymore.
484a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem    // Split the edge to form a real preheader.
4855d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner    BasicBlock *NewPH = SplitCriticalEdge(OrigPreheader, NewHeader, this);
4865d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner    NewPH->setName(NewHeader->getName() + ".lr.ph");
487c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick
488a94d6e87c4c49f2e81b01d66d8bfb591277f8f96Nadav Rotem    // Preserve canonical loop form, which means that 'Exit' should have only
48936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // one predecessor. Note that Exit could be an exit block for multiple
49036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // nested loops, causing both of the edges to now be critical and need to
49136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // be split.
49236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    SmallVector<BasicBlock *, 4> ExitPreds(pred_begin(Exit), pred_end(Exit));
49336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    bool SplitLatchEdge = false;
49436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    for (SmallVectorImpl<BasicBlock *>::iterator PI = ExitPreds.begin(),
49536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                                 PE = ExitPreds.end();
49636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines         PI != PE; ++PI) {
49736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // We only need to split loop exit edges.
49836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      Loop *PredLoop = LI->getLoopFor(*PI);
49936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      if (!PredLoop || PredLoop->contains(Exit))
50036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        continue;
50136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      SplitLatchEdge |= L->getLoopLatch() == *PI;
50236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      BasicBlock *ExitSplit = SplitCriticalEdge(*PI, Exit, this);
50336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      ExitSplit->moveBefore(Exit);
50436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    }
50536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    assert(SplitLatchEdge &&
50636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines           "Despite splitting all preds, failed to split latch exit?");
5075d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner  } else {
5085d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner    // We can fold the conditional branch in the preheader, this makes things
5095d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner    // simpler. The first step is to remove the extra edge to the Exit block.
5105d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner    Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/);
511bd5426a4f263eca7893fc54775d6cd86e729e100Devang Patel    BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI);
512bd5426a4f263eca7893fc54775d6cd86e729e100Devang Patel    NewBI->setDebugLoc(PHBI->getDebugLoc());
5135d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner    PHBI->eraseFromParent();
514c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick
5155d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner    // With our CFG finalized, update DomTree if it is available.
51636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (DominatorTreeWrapperPass *DTWP =
51736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines            getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
51836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      DominatorTree &DT = DTWP->getDomTree();
5195d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner      // Update OrigHeader to be dominated by the new header block.
52036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      DT.changeImmediateDominator(NewHeader, OrigPreheader);
52136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      DT.changeImmediateDominator(OrigHeader, OrigLatch);
522d70846ec1b79162d519620ddf07bcb7a64b65eb5Benjamin Kramer
523d70846ec1b79162d519620ddf07bcb7a64b65eb5Benjamin Kramer      // Brute force incremental dominator tree update. Call
524d70846ec1b79162d519620ddf07bcb7a64b65eb5Benjamin Kramer      // findNearestCommonDominator on all CFG predecessors of each child of the
525d70846ec1b79162d519620ddf07bcb7a64b65eb5Benjamin Kramer      // original header.
52636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      DomTreeNode *OrigHeaderNode = DT.getNode(OrigHeader);
5277de7078933292b0487f1f39f539bece922e3dde5Benjamin Kramer      SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(),
5287de7078933292b0487f1f39f539bece922e3dde5Benjamin Kramer                                                   OrigHeaderNode->end());
5297de7078933292b0487f1f39f539bece922e3dde5Benjamin Kramer      bool Changed;
5307de7078933292b0487f1f39f539bece922e3dde5Benjamin Kramer      do {
5317de7078933292b0487f1f39f539bece922e3dde5Benjamin Kramer        Changed = false;
5327de7078933292b0487f1f39f539bece922e3dde5Benjamin Kramer        for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I) {
5337de7078933292b0487f1f39f539bece922e3dde5Benjamin Kramer          DomTreeNode *Node = HeaderChildren[I];
5347de7078933292b0487f1f39f539bece922e3dde5Benjamin Kramer          BasicBlock *BB = Node->getBlock();
5357de7078933292b0487f1f39f539bece922e3dde5Benjamin Kramer
5367de7078933292b0487f1f39f539bece922e3dde5Benjamin Kramer          pred_iterator PI = pred_begin(BB);
5377de7078933292b0487f1f39f539bece922e3dde5Benjamin Kramer          BasicBlock *NearestDom = *PI;
5387de7078933292b0487f1f39f539bece922e3dde5Benjamin Kramer          for (pred_iterator PE = pred_end(BB); PI != PE; ++PI)
53936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines            NearestDom = DT.findNearestCommonDominator(NearestDom, *PI);
5407de7078933292b0487f1f39f539bece922e3dde5Benjamin Kramer
5417de7078933292b0487f1f39f539bece922e3dde5Benjamin Kramer          // Remember if this changes the DomTree.
5427de7078933292b0487f1f39f539bece922e3dde5Benjamin Kramer          if (Node->getIDom()->getBlock() != NearestDom) {
54336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines            DT.changeImmediateDominator(BB, NearestDom);
5447de7078933292b0487f1f39f539bece922e3dde5Benjamin Kramer            Changed = true;
545d70846ec1b79162d519620ddf07bcb7a64b65eb5Benjamin Kramer          }
546d70846ec1b79162d519620ddf07bcb7a64b65eb5Benjamin Kramer        }
547d70846ec1b79162d519620ddf07bcb7a64b65eb5Benjamin Kramer
5487de7078933292b0487f1f39f539bece922e3dde5Benjamin Kramer      // If the dominator changed, this may have an effect on other
5497de7078933292b0487f1f39f539bece922e3dde5Benjamin Kramer      // predecessors, continue until we reach a fixpoint.
5507de7078933292b0487f1f39f539bece922e3dde5Benjamin Kramer      } while (Changed);
5515d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner    }
552990e866deb158fd88377b5606c8d86603e93e533Devang Patel  }
553c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick
5545d37370a6ff255293d0b97abf9e8b3d46ed17238Chris Lattner  assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
5550e4a1543aba1364fe4a1eacee46b6d08d4611506Chris Lattner  assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
556a1ae0c74affb3672e75c6d9984dc092886f48454Chris Lattner
55793767fdb61dfff6ab13056b2a73209998ae4e786Chris Lattner  // Now that the CFG and DomTree are in a consistent state again, try to merge
55893767fdb61dfff6ab13056b2a73209998ae4e786Chris Lattner  // the OrigHeader block into OrigLatch.  This will succeed if they are
55993767fdb61dfff6ab13056b2a73209998ae4e786Chris Lattner  // connected by an unconditional branch.  This is just a cleanup so the
56093767fdb61dfff6ab13056b2a73209998ae4e786Chris Lattner  // emitted code isn't too gross in this common case.
56193767fdb61dfff6ab13056b2a73209998ae4e786Chris Lattner  MergeBlockIntoPredecessor(OrigHeader, this);
562c3a825b76cbaea3dcde02e9fa53d8f2d1311fe43Andrew Trick
563d70846ec1b79162d519620ddf07bcb7a64b65eb5Benjamin Kramer  DEBUG(dbgs() << "LoopRotation: into "; L->dump());
564d70846ec1b79162d519620ddf07bcb7a64b65eb5Benjamin Kramer
565a1ae0c74affb3672e75c6d9984dc092886f48454Chris Lattner  ++NumRotated;
566a1ae0c74affb3672e75c6d9984dc092886f48454Chris Lattner  return true;
5675464b96073626f811d79d56fa37be230552d2264Devang Patel}
568