15d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===// 25d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick// 35d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick// The LLVM Compiler Infrastructure 45d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick// 55d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick// This file is distributed under the University of Illinois Open Source 65d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick// License. See LICENSE.TXT for details. 75d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick// 85d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick//===----------------------------------------------------------------------===// 95d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick// 105d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick// This file implements some loop unrolling utilities for loops with run-time 115d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick// trip counts. See LoopUnroll.cpp for unrolling loops with compile-time 125d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick// trip counts. 135d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick// 1453ce4286469b7a565674dc253d9e69b3cfeb7054Jakub Staszak// The functions in this file are used to generate extra code when the 155d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick// run-time trip count modulo the unroll factor is not 0. When this is the 165d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick// case, we need to generate code to execute these 'left over' iterations. 175d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick// 1853ce4286469b7a565674dc253d9e69b3cfeb7054Jakub Staszak// The current strategy generates an if-then-else sequence prior to the 195d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick// unrolled loop to execute the 'left over' iterations. Other strategies 205d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick// include generate a loop before or after the unrolled loop. 215d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick// 225d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick//===----------------------------------------------------------------------===// 235d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 245d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick#define DEBUG_TYPE "loop-unroll" 255d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick#include "llvm/Transforms/Utils/UnrollLoop.h" 265d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick#include "llvm/BasicBlock.h" 275d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick#include "llvm/ADT/Statistic.h" 285d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick#include "llvm/Analysis/LoopIterator.h" 295d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick#include "llvm/Analysis/LoopPass.h" 305d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick#include "llvm/Analysis/ScalarEvolution.h" 315d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick#include "llvm/Analysis/ScalarEvolutionExpander.h" 325d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick#include "llvm/Support/Debug.h" 335d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick#include "llvm/Support/raw_ostream.h" 345d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick#include "llvm/Transforms/Utils/BasicBlockUtils.h" 355d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick#include "llvm/Transforms/Utils/Cloning.h" 365d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick#include <algorithm> 375d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 385d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trickusing namespace llvm; 395d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 4053ce4286469b7a565674dc253d9e69b3cfeb7054Jakub StaszakSTATISTIC(NumRuntimeUnrolled, 415d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick "Number of loops unrolled with run-time trip counts"); 425d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 435d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// Connect the unrolling prolog code to the original loop. 445d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// The unrolling prolog code contains code to execute the 455d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// 'extra' iterations if the run-time trip count modulo the 465d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// unroll count is non-zero. 475d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// 485d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// This function performs the following: 495d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// - Create PHI nodes at prolog end block to combine values 505d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// that exit the prolog code and jump around the prolog. 515d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// - Add a PHI operand to a PHI node at the loop exit block 525d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// for values that exit the prolog and go around the loop. 535d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// - Branch around the original loop if the trip count is less 545d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// than the unroll factor. 555d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// 565d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trickstatic void ConnectProlog(Loop *L, Value *TripCount, unsigned Count, 575d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick BasicBlock *LastPrologBB, BasicBlock *PrologEnd, 585d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick BasicBlock *OrigPH, BasicBlock *NewPH, 595d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick ValueToValueMapTy &LVMap, Pass *P) { 605d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick BasicBlock *Latch = L->getLoopLatch(); 615d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick assert(Latch != 0 && "Loop must have a latch"); 625d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 635d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // Create a PHI node for each outgoing value from the original loop 645d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // (which means it is an outgoing value from the prolog code too). 655d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // The new PHI node is inserted in the prolog end basic block. 665d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // The new PHI name is added as an operand of a PHI node in either 675d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // the loop header or the loop exit block. 685d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick for (succ_iterator SBI = succ_begin(Latch), SBE = succ_end(Latch); 695d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick SBI != SBE; ++SBI) { 705d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick for (BasicBlock::iterator BBI = (*SBI)->begin(); 715d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) { 725d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 735d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // Add a new PHI node to the prolog end block and add the 745d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // appropriate incoming values. 755d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName()+".unr", 765d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick PrologEnd->getTerminator()); 775d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // Adding a value to the new PHI node from the original loop preheader. 785d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // This is the value that skips all the prolog code. 795d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick if (L->contains(PN)) { 805d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH); 815d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick } else { 825d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick NewPN->addIncoming(Constant::getNullValue(PN->getType()), OrigPH); 835d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick } 8453ce4286469b7a565674dc253d9e69b3cfeb7054Jakub Staszak 8553ce4286469b7a565674dc253d9e69b3cfeb7054Jakub Staszak Value *V = PN->getIncomingValueForBlock(Latch); 865d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick if (Instruction *I = dyn_cast<Instruction>(V)) { 875d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick if (L->contains(I)) { 885d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick V = LVMap[I]; 895d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick } 905d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick } 915d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // Adding a value to the new PHI node from the last prolog block 925d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // that was created. 935d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick NewPN->addIncoming(V, LastPrologBB); 945d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 955d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // Update the existing PHI node operand with the value from the 965d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // new PHI node. How this is done depends on if the existing 975d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // PHI node is in the original loop block, or the exit block. 985d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick if (L->contains(PN)) { 995d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick PN->setIncomingValue(PN->getBasicBlockIndex(NewPH), NewPN); 1005d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick } else { 1015d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick PN->addIncoming(NewPN, PrologEnd); 1025d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick } 1035d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick } 1045d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick } 1055d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 1065d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // Create a branch around the orignal loop, which is taken if the 1075d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // trip count is less than the unroll factor. 1085d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick Instruction *InsertPt = PrologEnd->getTerminator(); 1095d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick Instruction *BrLoopExit = 1105d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount, 1115d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick ConstantInt::get(TripCount->getType(), Count)); 1125d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick BasicBlock *Exit = L->getUniqueExitBlock(); 1135d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick assert(Exit != 0 && "Loop must have a single exit block only"); 1145d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // Split the exit to maintain loop canonicalization guarantees 1155d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit)); 1165d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick if (!Exit->isLandingPad()) { 1172fac1d5d61a83c45dcf44119c41dce15ef10e9dcJakub Staszak SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", P); 1185d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick } else { 1195d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick SmallVector<BasicBlock*, 2> NewBBs; 1205d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick SplitLandingPadPredecessors(Exit, Preds, ".unr1-lcssa", ".unr2-lcssa", 1215d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick P, NewBBs); 1225d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick } 1235d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // Add the branch to the exit block (around the unrolled loop) 1245d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick BranchInst::Create(Exit, NewPH, BrLoopExit, InsertPt); 1255d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick InsertPt->eraseFromParent(); 1265d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick} 1275d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 1285d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// Create a clone of the blocks in a loop and connect them together. 1295d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// This function doesn't create a clone of the loop structure. 1305d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// 1315d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// There are two value maps that are defined and used. VMap is 1325d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// for the values in the current loop instance. LVMap contains 1335d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// the values from the last loop instance. We need the LVMap values 134d9b0b025612992a0b724eeca8bdf10b1d7a5c355Benjamin Kramer/// to update the initial values for the current loop instance. 1355d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// 1365d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trickstatic void CloneLoopBlocks(Loop *L, 1375d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick bool FirstCopy, 1385d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick BasicBlock *InsertTop, 1395d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick BasicBlock *InsertBot, 1405d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick std::vector<BasicBlock *> &NewBlocks, 1415d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick LoopBlocksDFS &LoopBlocks, 1425d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick ValueToValueMapTy &VMap, 1435d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick ValueToValueMapTy &LVMap, 1445d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick LoopInfo *LI) { 1455d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 1465d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick BasicBlock *Preheader = L->getLoopPreheader(); 1475d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick BasicBlock *Header = L->getHeader(); 1485d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick BasicBlock *Latch = L->getLoopLatch(); 1495d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick Function *F = Header->getParent(); 1505d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); 1515d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); 1525d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // For each block in the original loop, create a new copy, 1535d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // and update the value map with the newly created values. 1545d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { 1555d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".unr", F); 1565d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick NewBlocks.push_back(NewBB); 1575d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 1585d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick if (Loop *ParentLoop = L->getParentLoop()) 1595d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase()); 1605d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 1615d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick VMap[*BB] = NewBB; 1625d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick if (Header == *BB) { 1635d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // For the first block, add a CFG connection to this newly 1645d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // created block 1655d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick InsertTop->getTerminator()->setSuccessor(0, NewBB); 1665d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 1675d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // Change the incoming values to the ones defined in the 1685d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // previously cloned loop. 1695d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { 1705d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick PHINode *NewPHI = cast<PHINode>(VMap[I]); 1715d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick if (FirstCopy) { 1725d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // We replace the first phi node with the value from the preheader 1735d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick VMap[I] = NewPHI->getIncomingValueForBlock(Preheader); 1745d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick NewBB->getInstList().erase(NewPHI); 1755d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick } else { 1765d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // Update VMap with values from the previous block 1775d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick unsigned idx = NewPHI->getBasicBlockIndex(Latch); 1785d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick Value *InVal = NewPHI->getIncomingValue(idx); 1795d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick if (Instruction *I = dyn_cast<Instruction>(InVal)) 1805d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick if (L->contains(I)) 1815d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick InVal = LVMap[InVal]; 1825d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick NewPHI->setIncomingValue(idx, InVal); 1835d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick NewPHI->setIncomingBlock(idx, InsertTop); 1845d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick } 1855d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick } 1865d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick } 1875d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 1885d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick if (Latch == *BB) { 1895d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick VMap.erase((*BB)->getTerminator()); 1905d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick NewBB->getTerminator()->eraseFromParent(); 1915d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick BranchInst::Create(InsertBot, NewBB); 1925d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick } 1935d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick } 1945d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // LastValueMap is updated with the values for the current loop 1955d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // which are used the next time this function is called. 1965d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); 1975d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick VI != VE; ++VI) { 1985d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick LVMap[VI->first] = VI->second; 1995d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick } 2005d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick} 2015d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 2025d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// Insert code in the prolog code when unrolling a loop with a 2035d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// run-time trip-count. 2045d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// 2055d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// This method assumes that the loop unroll factor is total number 2065d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// of loop bodes in the loop after unrolling. (Some folks refer 2075d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// to the unroll factor as the number of *extra* copies added). 2085d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// We assume also that the loop unroll factor is a power-of-two. So, after 2095d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// unrolling the loop, the number of loop bodies executed is 2, 21053ce4286469b7a565674dc253d9e69b3cfeb7054Jakub Staszak/// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch 2115d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for 2125d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// the switch instruction is generated. 2135d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// 2145d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// extraiters = tripcount % loopfactor 2155d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// if (extraiters == 0) jump Loop: 2165d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// if (extraiters == loopfactor) jump L1 2175d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// if (extraiters == loopfactor-1) jump L2 2185d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// ... 2195d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// L1: LoopBody; 2205d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// L2: LoopBody; 2215d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// ... 2225d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// if tripcount < loopfactor jump End 2235d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// Loop: 2245d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// ... 2255d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// End: 2265d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick/// 2275d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trickbool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, 2285d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick LPPassManager *LPM) { 2295d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // for now, only unroll loops that contain a single exit 23053ce4286469b7a565674dc253d9e69b3cfeb7054Jakub Staszak if (!L->getExitingBlock()) 2315d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick return false; 2325d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 2335d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // Make sure the loop is in canonical form, and there is a single 2345d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // exit block only. 2355d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick if (!L->isLoopSimplifyForm() || L->getUniqueExitBlock() == 0) 2365d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick return false; 2375d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 2385d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // Use Scalar Evolution to compute the trip count. This allows more 2395d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // loops to be unrolled than relying on induction var simplification 240b78d83d837fc72a6565f186b0ad92d48cdfdb805Andrew Trick if (!LPM) 241b78d83d837fc72a6565f186b0ad92d48cdfdb805Andrew Trick return false; 2425d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); 2435d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick if (SE == 0) 2445d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick return false; 2455d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 2465d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // Only unroll loops with a computable trip count and the trip count needs 2475d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // to be an int value (allowing a pointer type is a TODO item) 2485d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick const SCEV *BECount = SE->getBackedgeTakenCount(L); 2495d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy()) 2505d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick return false; 2515d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 2525d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // Add 1 since the backedge count doesn't include the first loop iteration 25353ce4286469b7a565674dc253d9e69b3cfeb7054Jakub Staszak const SCEV *TripCountSC = 2545d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1)); 2555d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick if (isa<SCEVCouldNotCompute>(TripCountSC)) 2565d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick return false; 2575d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 2585d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // We only handle cases when the unroll factor is a power of 2. 2595d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // Count is the loop unroll factor, the number of extra copies added + 1. 2605d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick if ((Count & (Count-1)) != 0) 2615d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick return false; 2625d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 2635d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // If this loop is nested, then the loop unroller changes the code in 2645d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // parent loop, so the Scalar Evolution pass needs to be run again 2655d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick if (Loop *ParentLoop = L->getParentLoop()) 2665d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick SE->forgetLoop(ParentLoop); 2675d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 2685d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick BasicBlock *PH = L->getLoopPreheader(); 2695d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick BasicBlock *Header = L->getHeader(); 2705d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick BasicBlock *Latch = L->getLoopLatch(); 2715d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // It helps to splits the original preheader twice, one for the end of the 2725d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // prolog code and one for a new loop preheader 2735d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick BasicBlock *PEnd = SplitEdge(PH, Header, LPM->getAsPass()); 2745d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), LPM->getAsPass()); 2755d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator()); 2765d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 2775d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // Compute the number of extra iterations required, which is: 2785d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // extra iterations = run-time trip count % (loop unroll factor + 1) 2795d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick SCEVExpander Expander(*SE, "loop-unroll"); 2805d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), 2815d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick PreHeaderBR); 2825d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick Type *CountTy = TripCount->getType(); 2835d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick BinaryOperator *ModVal = 2845d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick BinaryOperator::CreateURem(TripCount, 2855d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick ConstantInt::get(CountTy, Count), 2865d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick "xtraiter"); 2875d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick ModVal->insertBefore(PreHeaderBR); 2885d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 2895d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // Check if for no extra iterations, then jump to unrolled loop 2905d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick Value *BranchVal = new ICmpInst(PreHeaderBR, 2915d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick ICmpInst::ICMP_NE, ModVal, 2925d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick ConstantInt::get(CountTy, 0), "lcmp"); 2935d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // Branch to either the extra iterations or the unrolled loop 2945d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // We will fix up the true branch label when adding loop body copies 2955d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR); 29653ce4286469b7a565674dc253d9e69b3cfeb7054Jakub Staszak assert(PreHeaderBR->isUnconditional() && 29753ce4286469b7a565674dc253d9e69b3cfeb7054Jakub Staszak PreHeaderBR->getSuccessor(0) == PEnd && 2985d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick "CFG edges in Preheader are not correct"); 2995d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick PreHeaderBR->eraseFromParent(); 3005d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 3015d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick ValueToValueMapTy LVMap; 3025d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick Function *F = Header->getParent(); 3035d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // These variables are used to update the CFG links in each iteration 3045d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick BasicBlock *CompareBB = 0; 3055d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick BasicBlock *LastLoopBB = PH; 3065d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // Get an ordered list of blocks in the loop to help with the ordering of the 3075d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // cloned blocks in the prolog code 3085d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick LoopBlocksDFS LoopBlocks(L); 3095d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick LoopBlocks.perform(LI); 3105d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 3115d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // 3125d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // For each extra loop iteration, create a copy of the loop's basic blocks 3135d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // and generate a condition that branches to the copy depending on the 3145d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // number of 'left over' iterations. 3155d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // 3165d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick for (unsigned leftOverIters = Count-1; leftOverIters > 0; --leftOverIters) { 3175d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick std::vector<BasicBlock*> NewBlocks; 3185d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick ValueToValueMapTy VMap; 3195d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 3205d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // Clone all the basic blocks in the loop, but we don't clone the loop 3215d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // This function adds the appropriate CFG connections. 3225d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick CloneLoopBlocks(L, (leftOverIters == Count-1), LastLoopBB, PEnd, NewBlocks, 3235d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick LoopBlocks, VMap, LVMap, LI); 3245d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick LastLoopBB = cast<BasicBlock>(VMap[Latch]); 3255d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 3265d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // Insert the cloned blocks into function just before the original loop 3275d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(), 3285d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick NewBlocks[0], F->end()); 3295d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 3305d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // Generate the code for the comparison which determines if the loop 3315d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // prolog code needs to be executed. 3325d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick if (leftOverIters == Count-1) { 3335d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // There is no compare block for the fall-thru case when for the last 3345d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // left over iteration 3355d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick CompareBB = NewBlocks[0]; 3365d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick } else { 3375d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // Create a new block for the comparison 3385d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick BasicBlock *NewBB = BasicBlock::Create(CompareBB->getContext(), "unr.cmp", 3395d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick F, CompareBB); 3405d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick if (Loop *ParentLoop = L->getParentLoop()) { 3415d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // Add the new block to the parent loop, if needed 3425d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase()); 3435d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick } 3445d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 3455d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // The comparison w/ the extra iteration value and branch 3465d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal, 3475d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick ConstantInt::get(CountTy, leftOverIters), 3485d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick "un.tmp"); 3495d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // Branch to either the extra iterations or the unrolled loop 3505d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick BranchInst::Create(NewBlocks[0], CompareBB, 3515d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick BranchVal, NewBB); 3525d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick CompareBB = NewBB; 3535d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick PH->getTerminator()->setSuccessor(0, NewBB); 3545d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick VMap[NewPH] = CompareBB; 3555d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick } 3565d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 3575d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // Rewrite the cloned instruction operands to use the values 3585d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // created when the clone is created. 3595d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) { 3605d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick for (BasicBlock::iterator I = NewBlocks[i]->begin(), 3615d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick E = NewBlocks[i]->end(); I != E; ++I) { 36253ce4286469b7a565674dc253d9e69b3cfeb7054Jakub Staszak RemapInstruction(I, VMap, 3635d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick RF_NoModuleLevelChanges|RF_IgnoreMissingEntries); 3645d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick } 3655d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick } 3665d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick } 3675d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick 3685d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // Connect the prolog code to the original loop and update the 3695d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick // PHI functions. 3705d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, LVMap, 3715d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick LPM->getAsPass()); 3725d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick NumRuntimeUnrolled++; 3735d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick return true; 3745d73448bb7f3d326f310e6f35030821b103b1cdbAndrew Trick} 375