15d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
25d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick//
35d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick//                     The LLVM Compiler Infrastructure
45d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick//
55d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick// This file is distributed under the University of Illinois Open Source
65d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick// License. See LICENSE.TXT for details.
75d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick//
85d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick//===----------------------------------------------------------------------===//
95d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick//
105d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick// This file implements some loop unrolling utilities for loops with run-time
115d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick// trip counts.  See LoopUnroll.cpp for unrolling loops with compile-time
125d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick// trip counts.
135d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick//
1453ce4286469b7a565674dc253d9e69b3cfeb7054Jakub Staszak// The functions in this file are used to generate extra code when the
155d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick// run-time trip count modulo the unroll factor is not 0.  When this is the
165d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick// case, we need to generate code to execute these 'left over' iterations.
175d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick//
1853ce4286469b7a565674dc253d9e69b3cfeb7054Jakub Staszak// The current strategy generates an if-then-else sequence prior to the
195d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick// unrolled loop to execute the 'left over' iterations.  Other strategies
205d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick// include generate a loop before or after the unrolled loop.
215d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick//
225d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick//===----------------------------------------------------------------------===//
235d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
245d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick#define DEBUG_TYPE "loop-unroll"
255d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick#include "llvm/Transforms/Utils/UnrollLoop.h"
265d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick#include "llvm/BasicBlock.h"
275d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick#include "llvm/ADT/Statistic.h"
285d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick#include "llvm/Analysis/LoopIterator.h"
295d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick#include "llvm/Analysis/LoopPass.h"
305d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick#include "llvm/Analysis/ScalarEvolution.h"
315d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick#include "llvm/Analysis/ScalarEvolutionExpander.h"
325d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick#include "llvm/Support/Debug.h"
335d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick#include "llvm/Support/raw_ostream.h"
345d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick#include "llvm/Transforms/Utils/BasicBlockUtils.h"
355d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick#include "llvm/Transforms/Utils/Cloning.h"
365d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick#include <algorithm>
375d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
385d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trickusing namespace llvm;
395d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
4053ce4286469b7a565674dc253d9e69b3cfeb7054Jakub StaszakSTATISTIC(NumRuntimeUnrolled,
415d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick          "Number of loops unrolled with run-time trip counts");
425d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
435d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// Connect the unrolling prolog code to the original loop.
445d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// The unrolling prolog code contains code to execute the
455d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// 'extra' iterations if the run-time trip count modulo the
465d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// unroll count is non-zero.
475d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick///
485d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// This function performs the following:
495d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// - Create PHI nodes at prolog end block to combine values
505d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick///   that exit the prolog code and jump around the prolog.
515d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// - Add a PHI operand to a PHI node at the loop exit block
525d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick///   for values that exit the prolog and go around the loop.
535d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// - Branch around the original loop if the trip count is less
545d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick///   than the unroll factor.
555d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick///
565d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trickstatic void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,
575d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick                          BasicBlock *LastPrologBB, BasicBlock *PrologEnd,
585d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick                          BasicBlock *OrigPH, BasicBlock *NewPH,
595d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick                          ValueToValueMapTy &LVMap, Pass *P) {
605d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  BasicBlock *Latch = L->getLoopLatch();
615d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  assert(Latch != 0 && "Loop must have a latch");
625d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
635d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // Create a PHI node for each outgoing value from the original loop
645d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // (which means it is an outgoing value from the prolog code too).
655d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // The new PHI node is inserted in the prolog end basic block.
665d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // The new PHI name is added as an operand of a PHI node in either
675d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // the loop header or the loop exit block.
685d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  for (succ_iterator SBI = succ_begin(Latch), SBE = succ_end(Latch);
695d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick       SBI != SBE; ++SBI) {
705d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    for (BasicBlock::iterator BBI = (*SBI)->begin();
715d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick         PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) {
725d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
735d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      // Add a new PHI node to the prolog end block and add the
745d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      // appropriate incoming values.
755d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName()+".unr",
765d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick                                       PrologEnd->getTerminator());
775d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      // Adding a value to the new PHI node from the original loop preheader.
785d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      // This is the value that skips all the prolog code.
795d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      if (L->contains(PN)) {
805d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick        NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH);
815d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      } else {
825d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick        NewPN->addIncoming(Constant::getNullValue(PN->getType()), OrigPH);
835d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      }
8453ce4286469b7a565674dc253d9e69b3cfeb7054Jakub Staszak
8553ce4286469b7a565674dc253d9e69b3cfeb7054Jakub Staszak      Value *V = PN->getIncomingValueForBlock(Latch);
865d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      if (Instruction *I = dyn_cast<Instruction>(V)) {
875d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick        if (L->contains(I)) {
885d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick          V = LVMap[I];
895d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick        }
905d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      }
915d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      // Adding a value to the new PHI node from the last prolog block
925d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      // that was created.
935d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      NewPN->addIncoming(V, LastPrologBB);
945d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
955d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      // Update the existing PHI node operand with the value from the
965d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      // new PHI node.  How this is done depends on if the existing
975d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      // PHI node is in the original loop block, or the exit block.
985d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      if (L->contains(PN)) {
995d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick        PN->setIncomingValue(PN->getBasicBlockIndex(NewPH), NewPN);
1005d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      } else {
1015d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick        PN->addIncoming(NewPN, PrologEnd);
1025d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      }
1035d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    }
1045d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  }
1055d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
1065d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // Create a branch around the orignal loop, which is taken if the
1075d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // trip count is less than the unroll factor.
1085d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  Instruction *InsertPt = PrologEnd->getTerminator();
1095d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  Instruction *BrLoopExit =
1105d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount,
1115d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick                 ConstantInt::get(TripCount->getType(), Count));
1125d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  BasicBlock *Exit = L->getUniqueExitBlock();
1135d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  assert(Exit != 0 && "Loop must have a single exit block only");
1145d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // Split the exit to maintain loop canonicalization guarantees
1155d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit));
1165d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  if (!Exit->isLandingPad()) {
1172fac1d5d61a83c45dcf44119c41dce15ef10e9dcJakub Staszak    SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", P);
1185d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  } else {
1195d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    SmallVector<BasicBlock*, 2> NewBBs;
1205d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    SplitLandingPadPredecessors(Exit, Preds, ".unr1-lcssa", ".unr2-lcssa",
1215d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick                                P, NewBBs);
1225d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  }
1235d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // Add the branch to the exit block (around the unrolled loop)
1245d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  BranchInst::Create(Exit, NewPH, BrLoopExit, InsertPt);
1255d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  InsertPt->eraseFromParent();
1265d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick}
1275d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
1285d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// Create a clone of the blocks in a loop and connect them together.
1295d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// This function doesn't create a clone of the loop structure.
1305d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick///
1315d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// There are two value maps that are defined and used.  VMap is
1325d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// for the values in the current loop instance.  LVMap contains
1335d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// the values from the last loop instance.  We need the LVMap values
134d9b0b025612992a0b724eeca8bdf10b1d7a5c355Benjamin Kramer/// to update the initial values for the current loop instance.
1355d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick///
1365d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trickstatic void CloneLoopBlocks(Loop *L,
1375d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick                            bool FirstCopy,
1385d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick                            BasicBlock *InsertTop,
1395d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick                            BasicBlock *InsertBot,
1405d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick                            std::vector<BasicBlock *> &NewBlocks,
1415d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick                            LoopBlocksDFS &LoopBlocks,
1425d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick                            ValueToValueMapTy &VMap,
1435d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick                            ValueToValueMapTy &LVMap,
1445d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick                            LoopInfo *LI) {
1455d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
1465d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  BasicBlock *Preheader = L->getLoopPreheader();
1475d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  BasicBlock *Header = L->getHeader();
1485d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  BasicBlock *Latch = L->getLoopLatch();
1495d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  Function *F = Header->getParent();
1505d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
1515d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
1525d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // For each block in the original loop, create a new copy,
1535d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // and update the value map with the newly created values.
1545d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
1555d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".unr", F);
1565d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    NewBlocks.push_back(NewBB);
1575d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
1585d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    if (Loop *ParentLoop = L->getParentLoop())
1595d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());
1605d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
1615d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    VMap[*BB] = NewBB;
1625d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    if (Header == *BB) {
1635d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      // For the first block, add a CFG connection to this newly
1645d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      // created block
1655d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      InsertTop->getTerminator()->setSuccessor(0, NewBB);
1665d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
1675d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      // Change the incoming values to the ones defined in the
1685d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      // previously cloned loop.
1695d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
1705d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick        PHINode *NewPHI = cast<PHINode>(VMap[I]);
1715d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick        if (FirstCopy) {
1725d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick          // We replace the first phi node with the value from the preheader
1735d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick          VMap[I] = NewPHI->getIncomingValueForBlock(Preheader);
1745d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick          NewBB->getInstList().erase(NewPHI);
1755d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick        } else {
1765d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick          // Update VMap with values from the previous block
1775d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick          unsigned idx = NewPHI->getBasicBlockIndex(Latch);
1785d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick          Value *InVal = NewPHI->getIncomingValue(idx);
1795d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick          if (Instruction *I = dyn_cast<Instruction>(InVal))
1805d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick            if (L->contains(I))
1815d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick              InVal = LVMap[InVal];
1825d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick          NewPHI->setIncomingValue(idx, InVal);
1835d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick          NewPHI->setIncomingBlock(idx, InsertTop);
1845d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick        }
1855d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      }
1865d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    }
1875d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
1885d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    if (Latch == *BB) {
1895d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      VMap.erase((*BB)->getTerminator());
1905d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      NewBB->getTerminator()->eraseFromParent();
1915d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      BranchInst::Create(InsertBot, NewBB);
1925d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    }
1935d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  }
1945d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // LastValueMap is updated with the values for the current loop
1955d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // which are used the next time this function is called.
1965d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
1975d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick       VI != VE; ++VI) {
1985d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    LVMap[VI->first] = VI->second;
1995d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  }
2005d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick}
2015d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
2025d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// Insert code in the prolog code when unrolling a loop with a
2035d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// run-time trip-count.
2045d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick///
2055d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// This method assumes that the loop unroll factor is total number
2065d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// of loop bodes in the loop after unrolling. (Some folks refer
2075d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// to the unroll factor as the number of *extra* copies added).
2085d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// We assume also that the loop unroll factor is a power-of-two. So, after
2095d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// unrolling the loop, the number of loop bodies executed is 2,
21053ce4286469b7a565674dc253d9e69b3cfeb7054Jakub Staszak/// 4, 8, etc.  Note - LLVM converts the if-then-sequence to a switch
2115d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// instruction in SimplifyCFG.cpp.  Then, the backend decides how code for
2125d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// the switch instruction is generated.
2135d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick///
2145d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick///    extraiters = tripcount % loopfactor
2155d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick///    if (extraiters == 0) jump Loop:
2165d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick///    if (extraiters == loopfactor) jump L1
2175d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick///    if (extraiters == loopfactor-1) jump L2
2185d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick///    ...
2195d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick///    L1:  LoopBody;
2205d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick///    L2:  LoopBody;
2215d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick///    ...
2225d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick///    if tripcount < loopfactor jump End
2235d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick///    Loop:
2245d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick///    ...
2255d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick///    End:
2265d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick///
2275d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trickbool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
2285d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick                                   LPPassManager *LPM) {
2295d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // for now, only unroll loops that contain a single exit
23053ce4286469b7a565674dc253d9e69b3cfeb7054Jakub Staszak  if (!L->getExitingBlock())
2315d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    return false;
2325d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
2335d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // Make sure the loop is in canonical form, and there is a single
2345d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // exit block only.
2355d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  if (!L->isLoopSimplifyForm() || L->getUniqueExitBlock() == 0)
2365d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    return false;
2375d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
2385d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // Use Scalar Evolution to compute the trip count.  This allows more
2395d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // loops to be unrolled than relying on induction var simplification
240b78d83d837fc72a6565f186b0ad92d48cdfdb805Andrew Trick  if (!LPM)
241b78d83d837fc72a6565f186b0ad92d48cdfdb805Andrew Trick    return false;
2425d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
2435d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  if (SE == 0)
2445d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    return false;
2455d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
2465d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // Only unroll loops with a computable trip count and the trip count needs
2475d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // to be an int value (allowing a pointer type is a TODO item)
2485d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  const SCEV *BECount = SE->getBackedgeTakenCount(L);
2495d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy())
2505d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    return false;
2515d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
2525d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // Add 1 since the backedge count doesn't include the first loop iteration
25353ce4286469b7a565674dc253d9e69b3cfeb7054Jakub Staszak  const SCEV *TripCountSC =
2545d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1));
2555d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  if (isa<SCEVCouldNotCompute>(TripCountSC))
2565d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    return false;
2575d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
2585d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // We only handle cases when the unroll factor is a power of 2.
2595d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // Count is the loop unroll factor, the number of extra copies added + 1.
2605d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  if ((Count & (Count-1)) != 0)
2615d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    return false;
2625d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
2635d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // If this loop is nested, then the loop unroller changes the code in
2645d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // parent loop, so the Scalar Evolution pass needs to be run again
2655d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  if (Loop *ParentLoop = L->getParentLoop())
2665d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    SE->forgetLoop(ParentLoop);
2675d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
2685d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  BasicBlock *PH = L->getLoopPreheader();
2695d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  BasicBlock *Header = L->getHeader();
2705d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  BasicBlock *Latch = L->getLoopLatch();
2715d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // It helps to splits the original preheader twice, one for the end of the
2725d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // prolog code and one for a new loop preheader
2735d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  BasicBlock *PEnd = SplitEdge(PH, Header, LPM->getAsPass());
2745d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), LPM->getAsPass());
2755d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
2765d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
2775d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // Compute the number of extra iterations required, which is:
2785d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  //  extra iterations = run-time trip count % (loop unroll factor + 1)
2795d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  SCEVExpander Expander(*SE, "loop-unroll");
2805d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
2815d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick                                            PreHeaderBR);
2825d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  Type *CountTy = TripCount->getType();
2835d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  BinaryOperator *ModVal =
2845d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    BinaryOperator::CreateURem(TripCount,
2855d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick                               ConstantInt::get(CountTy, Count),
2865d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick                               "xtraiter");
2875d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  ModVal->insertBefore(PreHeaderBR);
2885d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
2895d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // Check if for no extra iterations, then jump to unrolled loop
2905d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  Value *BranchVal = new ICmpInst(PreHeaderBR,
2915d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick                                  ICmpInst::ICMP_NE, ModVal,
2925d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick                                  ConstantInt::get(CountTy, 0), "lcmp");
2935d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // Branch to either the extra iterations or the unrolled loop
2945d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // We will fix up the true branch label when adding loop body copies
2955d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR);
29653ce4286469b7a565674dc253d9e69b3cfeb7054Jakub Staszak  assert(PreHeaderBR->isUnconditional() &&
29753ce4286469b7a565674dc253d9e69b3cfeb7054Jakub Staszak         PreHeaderBR->getSuccessor(0) == PEnd &&
2985d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick         "CFG edges in Preheader are not correct");
2995d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  PreHeaderBR->eraseFromParent();
3005d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
3015d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  ValueToValueMapTy LVMap;
3025d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  Function *F = Header->getParent();
3035d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // These variables are used to update the CFG links in each iteration
3045d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  BasicBlock *CompareBB = 0;
3055d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  BasicBlock *LastLoopBB = PH;
3065d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // Get an ordered list of blocks in the loop to help with the ordering of the
3075d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // cloned blocks in the prolog code
3085d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  LoopBlocksDFS LoopBlocks(L);
3095d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  LoopBlocks.perform(LI);
3105d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
3115d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  //
3125d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // For each extra loop iteration, create a copy of the loop's basic blocks
3135d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // and generate a condition that branches to the copy depending on the
3145d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // number of 'left over' iterations.
3155d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  //
3165d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  for (unsigned leftOverIters = Count-1; leftOverIters > 0; --leftOverIters) {
3175d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    std::vector<BasicBlock*> NewBlocks;
3185d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    ValueToValueMapTy VMap;
3195d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
3205d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    // Clone all the basic blocks in the loop, but we don't clone the loop
3215d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    // This function adds the appropriate CFG connections.
3225d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    CloneLoopBlocks(L, (leftOverIters == Count-1), LastLoopBB, PEnd, NewBlocks,
3235d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick                    LoopBlocks, VMap, LVMap, LI);
3245d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    LastLoopBB = cast<BasicBlock>(VMap[Latch]);
3255d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
3265d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    // Insert the cloned blocks into function just before the original loop
3275d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(),
3285d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick                                  NewBlocks[0], F->end());
3295d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
3305d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    // Generate the code for the comparison which determines if the loop
3315d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    // prolog code needs to be executed.
3325d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    if (leftOverIters == Count-1) {
3335d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      // There is no compare block for the fall-thru case when for the last
3345d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      // left over iteration
3355d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      CompareBB = NewBlocks[0];
3365d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    } else {
3375d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      // Create a new block for the comparison
3385d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      BasicBlock *NewBB = BasicBlock::Create(CompareBB->getContext(), "unr.cmp",
3395d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick                                             F, CompareBB);
3405d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      if (Loop *ParentLoop = L->getParentLoop()) {
3415d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick        // Add the new block to the parent loop, if needed
3425d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick        ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());
3435d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      }
3445d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
3455d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      // The comparison w/ the extra iteration value and branch
3465d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal,
3475d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick                                      ConstantInt::get(CountTy, leftOverIters),
3485d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick                                      "un.tmp");
3495d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      // Branch to either the extra iterations or the unrolled loop
3505d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      BranchInst::Create(NewBlocks[0], CompareBB,
3515d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick                         BranchVal, NewBB);
3525d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      CompareBB = NewBB;
3535d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      PH->getTerminator()->setSuccessor(0, NewBB);
3545d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      VMap[NewPH] = CompareBB;
3555d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    }
3565d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
3575d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    // Rewrite the cloned instruction operands to use the values
3585d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    // created when the clone is created.
3595d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) {
3605d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      for (BasicBlock::iterator I = NewBlocks[i]->begin(),
3615d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick             E = NewBlocks[i]->end(); I != E; ++I) {
36253ce4286469b7a565674dc253d9e69b3cfeb7054Jakub Staszak        RemapInstruction(I, VMap,
3635d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick                         RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
3645d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick      }
3655d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick    }
3665d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  }
3675d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick
3685d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // Connect the prolog code to the original loop and update the
3695d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  // PHI functions.
3705d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, LVMap,
3715d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick                LPM->getAsPass());
3725d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  NumRuntimeUnrolled++;
3735d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick  return true;
3745d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick}
375