PPCCTRLoops.cpp revision b5f7b0f9780cd1bc6f948b194adfc57176d41711
1//===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass identifies loops where we can generate the PPC branch instructions
11// that decrement and test the count register (CTR) (bdnz and friends).
12//
13// The pattern that defines the induction variable can changed depending on
14// prior optimizations.  For example, the IndVarSimplify phase run by 'opt'
15// normalizes induction variables, and the Loop Strength Reduction pass
16// run by 'llc' may also make changes to the induction variable.
17//
18// Criteria for CTR loops:
19//  - Countable loops (w/ ind. var for a trip count)
20//  - Try inner-most loops first
21//  - No nested CTR loops.
22//  - No function calls in loops.
23//
24//===----------------------------------------------------------------------===//
25
26#define DEBUG_TYPE "ctrloops"
27
28#include "llvm/Transforms/Scalar.h"
29#include "llvm/ADT/Statistic.h"
30#include "llvm/ADT/STLExtras.h"
31#include "llvm/Analysis/Dominators.h"
32#include "llvm/Analysis/LoopInfo.h"
33#include "llvm/Analysis/ScalarEvolutionExpander.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DerivedTypes.h"
36#include "llvm/IR/InlineAsm.h"
37#include "llvm/IR/Instructions.h"
38#include "llvm/IR/IntrinsicInst.h"
39#include "llvm/IR/Module.h"
40#include "llvm/PassSupport.h"
41#include "llvm/Support/CommandLine.h"
42#include "llvm/Support/Debug.h"
43#include "llvm/Support/ValueHandle.h"
44#include "llvm/Support/raw_ostream.h"
45#include "llvm/Transforms/Utils/BasicBlockUtils.h"
46#include "llvm/Transforms/Utils/Local.h"
47#include "llvm/Transforms/Utils/LoopUtils.h"
48#include "llvm/Target/TargetLibraryInfo.h"
49#include "PPCTargetMachine.h"
50#include "PPC.h"
51
52#ifndef NDEBUG
53#include "llvm/CodeGen/MachineDominators.h"
54#include "llvm/CodeGen/MachineFunction.h"
55#include "llvm/CodeGen/MachineFunctionPass.h"
56#include "llvm/CodeGen/MachineRegisterInfo.h"
57#endif
58
59#include <algorithm>
60#include <vector>
61
62using namespace llvm;
63
64#ifndef NDEBUG
65static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
66#endif
67
68STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
69
70namespace llvm {
71  void initializePPCCTRLoopsPass(PassRegistry&);
72#ifndef NDEBUG
73  void initializePPCCTRLoopsVerifyPass(PassRegistry&);
74#endif
75}
76
77namespace {
78  struct PPCCTRLoops : public FunctionPass {
79
80#ifndef NDEBUG
81    static int Counter;
82#endif
83
84  public:
85    static char ID;
86
87    PPCCTRLoops() : FunctionPass(ID), TM(0) {
88      initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
89    }
90    PPCCTRLoops(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
91      initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
92    }
93
94    virtual bool runOnFunction(Function &F);
95
96    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
97      AU.addRequired<LoopInfo>();
98      AU.addPreserved<LoopInfo>();
99      AU.addRequired<DominatorTree>();
100      AU.addPreserved<DominatorTree>();
101      AU.addRequired<ScalarEvolution>();
102    }
103
104  private:
105    bool mightUseCTR(const Triple &TT, BasicBlock *BB);
106    bool convertToCTRLoop(Loop *L);
107
108  private:
109    PPCTargetMachine *TM;
110    LoopInfo *LI;
111    ScalarEvolution *SE;
112    DataLayout *TD;
113    DominatorTree *DT;
114    const TargetLibraryInfo *LibInfo;
115  };
116
117  char PPCCTRLoops::ID = 0;
118#ifndef NDEBUG
119  int PPCCTRLoops::Counter = 0;
120#endif
121
122#ifndef NDEBUG
123  struct PPCCTRLoopsVerify : public MachineFunctionPass {
124  public:
125    static char ID;
126
127    PPCCTRLoopsVerify() : MachineFunctionPass(ID) {
128      initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry());
129    }
130
131    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
132      AU.addRequired<MachineDominatorTree>();
133      MachineFunctionPass::getAnalysisUsage(AU);
134    }
135
136    virtual bool runOnMachineFunction(MachineFunction &MF);
137
138  private:
139    MachineDominatorTree *MDT;
140  };
141
142  char PPCCTRLoopsVerify::ID = 0;
143#endif // NDEBUG
144} // end anonymous namespace
145
146INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
147                      false, false)
148INITIALIZE_PASS_DEPENDENCY(DominatorTree)
149INITIALIZE_PASS_DEPENDENCY(LoopInfo)
150INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
151INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
152                    false, false)
153
154FunctionPass *llvm::createPPCCTRLoops(PPCTargetMachine &TM) {
155  return new PPCCTRLoops(TM);
156}
157
158#ifndef NDEBUG
159INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
160                      "PowerPC CTR Loops Verify", false, false)
161INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
162INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
163                    "PowerPC CTR Loops Verify", false, false)
164
165FunctionPass *llvm::createPPCCTRLoopsVerify() {
166  return new PPCCTRLoopsVerify();
167}
168#endif // NDEBUG
169
170bool PPCCTRLoops::runOnFunction(Function &F) {
171  LI = &getAnalysis<LoopInfo>();
172  SE = &getAnalysis<ScalarEvolution>();
173  DT = &getAnalysis<DominatorTree>();
174  TD = getAnalysisIfAvailable<DataLayout>();
175  LibInfo = getAnalysisIfAvailable<TargetLibraryInfo>();
176
177  bool MadeChange = false;
178
179  for (LoopInfo::iterator I = LI->begin(), E = LI->end();
180       I != E; ++I) {
181    Loop *L = *I;
182    if (!L->getParentLoop())
183      MadeChange |= convertToCTRLoop(L);
184  }
185
186  return MadeChange;
187}
188
189bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
190  for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
191       J != JE; ++J) {
192    if (CallInst *CI = dyn_cast<CallInst>(J)) {
193      if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
194        // Inline ASM is okay, unless it clobbers the ctr register.
195        InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
196        for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
197          InlineAsm::ConstraintInfo &C = CIV[i];
198          if (C.Type != InlineAsm::isInput)
199            for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
200              if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
201                return true;
202        }
203
204        continue;
205      }
206
207      if (!TM)
208        return true;
209      const TargetLowering *TLI = TM->getTargetLowering();
210
211      if (Function *F = CI->getCalledFunction()) {
212        // Most intrinsics don't become function calls, but some might.
213        // sin, cos, exp and log are always calls.
214        unsigned Opcode;
215        if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
216          switch (F->getIntrinsicID()) {
217          default: continue;
218
219// VisualStudio defines setjmp as _setjmp
220#if defined(_MSC_VER) && defined(setjmp) && \
221                       !defined(setjmp_undefined_for_msvc)
222#  pragma push_macro("setjmp")
223#  undef setjmp
224#  define setjmp_undefined_for_msvc
225#endif
226
227          case Intrinsic::setjmp:
228
229#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
230 // let's return it to _setjmp state
231#  pragma pop_macro("setjmp")
232#  undef setjmp_undefined_for_msvc
233#endif
234
235          case Intrinsic::longjmp:
236          case Intrinsic::memcpy:
237          case Intrinsic::memmove:
238          case Intrinsic::memset:
239          case Intrinsic::powi:
240          case Intrinsic::log:
241          case Intrinsic::log2:
242          case Intrinsic::log10:
243          case Intrinsic::exp:
244          case Intrinsic::exp2:
245          case Intrinsic::pow:
246          case Intrinsic::sin:
247          case Intrinsic::cos:
248            return true;
249          case Intrinsic::sqrt:      Opcode = ISD::FSQRT;      break;
250          case Intrinsic::floor:     Opcode = ISD::FFLOOR;     break;
251          case Intrinsic::ceil:      Opcode = ISD::FCEIL;      break;
252          case Intrinsic::trunc:     Opcode = ISD::FTRUNC;     break;
253          case Intrinsic::rint:      Opcode = ISD::FRINT;      break;
254          case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
255          }
256        }
257
258        // PowerPC does not use [US]DIVREM or other library calls for
259        // operations on regular types which are not otherwise library calls
260        // (i.e. soft float or atomics). If adapting for targets that do,
261        // additional care is required here.
262
263        LibFunc::Func Func;
264        if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
265            LibInfo->getLibFunc(F->getName(), Func) &&
266            LibInfo->hasOptimizedCodeGen(Func)) {
267          // Non-read-only functions are never treated as intrinsics.
268          if (!CI->onlyReadsMemory())
269            return true;
270
271          // Conversion happens only for FP calls.
272          if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
273            return true;
274
275          switch (Func) {
276          default: return true;
277          case LibFunc::copysign:
278          case LibFunc::copysignf:
279          case LibFunc::copysignl:
280            continue; // ISD::FCOPYSIGN is never a library call.
281          case LibFunc::fabs:
282          case LibFunc::fabsf:
283          case LibFunc::fabsl:
284            continue; // ISD::FABS is never a library call.
285          case LibFunc::sqrt:
286          case LibFunc::sqrtf:
287          case LibFunc::sqrtl:
288            Opcode = ISD::FSQRT; break;
289          case LibFunc::floor:
290          case LibFunc::floorf:
291          case LibFunc::floorl:
292            Opcode = ISD::FFLOOR; break;
293          case LibFunc::nearbyint:
294          case LibFunc::nearbyintf:
295          case LibFunc::nearbyintl:
296            Opcode = ISD::FNEARBYINT; break;
297          case LibFunc::ceil:
298          case LibFunc::ceilf:
299          case LibFunc::ceill:
300            Opcode = ISD::FCEIL; break;
301          case LibFunc::rint:
302          case LibFunc::rintf:
303          case LibFunc::rintl:
304            Opcode = ISD::FRINT; break;
305          case LibFunc::trunc:
306          case LibFunc::truncf:
307          case LibFunc::truncl:
308            Opcode = ISD::FTRUNC; break;
309          }
310
311          MVT VTy =
312            TLI->getSimpleValueType(CI->getArgOperand(0)->getType(), true);
313          if (VTy == MVT::Other)
314            return true;
315
316          if (TLI->isOperationLegalOrCustom(Opcode, VTy))
317            continue;
318          else if (VTy.isVector() &&
319                   TLI->isOperationLegalOrCustom(Opcode, VTy.getScalarType()))
320            continue;
321
322          return true;
323        }
324      }
325
326      return true;
327    } else if (isa<BinaryOperator>(J) &&
328               J->getType()->getScalarType()->isPPC_FP128Ty()) {
329      // Most operations on ppc_f128 values become calls.
330      return true;
331    } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
332               isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
333      CastInst *CI = cast<CastInst>(J);
334      if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
335          CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
336          (TT.isArch32Bit() &&
337           (CI->getSrcTy()->getScalarType()->isIntegerTy(64) ||
338            CI->getDestTy()->getScalarType()->isIntegerTy(64))
339          ))
340        return true;
341    } else if (TT.isArch32Bit() &&
342               J->getType()->getScalarType()->isIntegerTy(64) &&
343               (J->getOpcode() == Instruction::UDiv ||
344                J->getOpcode() == Instruction::SDiv ||
345                J->getOpcode() == Instruction::URem ||
346                J->getOpcode() == Instruction::SRem)) {
347      return true;
348    } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
349      // On PowerPC, indirect jumps use the counter register.
350      return true;
351    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
352      if (!TM)
353        return true;
354      const TargetLowering *TLI = TM->getTargetLowering();
355
356      if (TLI->supportJumpTables() &&
357          SI->getNumCases()+1 >= (unsigned) TLI->getMinimumJumpTableEntries())
358        return true;
359    }
360  }
361
362  return false;
363}
364
365bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
366  bool MadeChange = false;
367
368  Triple TT = Triple(L->getHeader()->getParent()->getParent()->
369                     getTargetTriple());
370  if (!TT.isArch32Bit() && !TT.isArch64Bit())
371    return MadeChange; // Unknown arch. type.
372
373  // Process nested loops first.
374  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
375    MadeChange |= convertToCTRLoop(*I);
376  }
377
378  // If a nested loop has been converted, then we can't convert this loop.
379  if (MadeChange)
380    return MadeChange;
381
382#ifndef NDEBUG
383  // Stop trying after reaching the limit (if any).
384  int Limit = CTRLoopLimit;
385  if (Limit >= 0) {
386    if (Counter >= CTRLoopLimit)
387      return false;
388    Counter++;
389  }
390#endif
391
392  // We don't want to spill/restore the counter register, and so we don't
393  // want to use the counter register if the loop contains calls.
394  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
395       I != IE; ++I)
396    if (mightUseCTR(TT, *I))
397      return MadeChange;
398
399  SmallVector<BasicBlock*, 4> ExitingBlocks;
400  L->getExitingBlocks(ExitingBlocks);
401
402  BasicBlock *CountedExitBlock = 0;
403  const SCEV *ExitCount = 0;
404  BranchInst *CountedExitBranch = 0;
405  for (SmallVector<BasicBlock*, 4>::iterator I = ExitingBlocks.begin(),
406       IE = ExitingBlocks.end(); I != IE; ++I) {
407    const SCEV *EC = SE->getExitCount(L, *I);
408    DEBUG(dbgs() << "Exit Count for " << *L << " from block " <<
409                    (*I)->getName() << ": " << *EC << "\n");
410    if (isa<SCEVCouldNotCompute>(EC))
411      continue;
412    if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
413      if (ConstEC->getValue()->isZero())
414        continue;
415    } else if (!SE->isLoopInvariant(EC, L))
416      continue;
417
418    if (SE->getTypeSizeInBits(EC->getType()) > (TT.isArch64Bit() ? 64 : 32))
419      continue;
420
421    // We now have a loop-invariant count of loop iterations (which is not the
422    // constant zero) for which we know that this loop will not exit via this
423    // exisiting block.
424
425    // We need to make sure that this block will run on every loop iteration.
426    // For this to be true, we must dominate all blocks with backedges. Such
427    // blocks are in-loop predecessors to the header block.
428    bool NotAlways = false;
429    for (pred_iterator PI = pred_begin(L->getHeader()),
430         PIE = pred_end(L->getHeader()); PI != PIE; ++PI) {
431      if (!L->contains(*PI))
432        continue;
433
434      if (!DT->dominates(*I, *PI)) {
435        NotAlways = true;
436        break;
437      }
438    }
439
440    if (NotAlways)
441      continue;
442
443    // Make sure this blocks ends with a conditional branch.
444    Instruction *TI = (*I)->getTerminator();
445    if (!TI)
446      continue;
447
448    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
449      if (!BI->isConditional())
450        continue;
451
452      CountedExitBranch = BI;
453    } else
454      continue;
455
456    // Note that this block may not be the loop latch block, even if the loop
457    // has a latch block.
458    CountedExitBlock = *I;
459    ExitCount = EC;
460    break;
461  }
462
463  if (!CountedExitBlock)
464    return MadeChange;
465
466  BasicBlock *Preheader = L->getLoopPreheader();
467
468  // If we don't have a preheader, then insert one. If we already have a
469  // preheader, then we can use it (except if the preheader contains a use of
470  // the CTR register because some such uses might be reordered by the
471  // selection DAG after the mtctr instruction).
472  if (!Preheader || mightUseCTR(TT, Preheader))
473    Preheader = InsertPreheaderForLoop(L, this);
474  if (!Preheader)
475    return MadeChange;
476
477  DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n");
478
479  // Insert the count into the preheader and replace the condition used by the
480  // selected branch.
481  MadeChange = true;
482
483  SCEVExpander SCEVE(*SE, "loopcnt");
484  LLVMContext &C = SE->getContext();
485  Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) :
486                                       Type::getInt32Ty(C);
487  if (!ExitCount->getType()->isPointerTy() &&
488      ExitCount->getType() != CountType)
489    ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
490  ExitCount = SE->getAddExpr(ExitCount,
491                             SE->getConstant(CountType, 1));
492  Value *ECValue = SCEVE.expandCodeFor(ExitCount, CountType,
493                                       Preheader->getTerminator());
494
495  IRBuilder<> CountBuilder(Preheader->getTerminator());
496  Module *M = Preheader->getParent()->getParent();
497  Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr,
498                                               CountType);
499  CountBuilder.CreateCall(MTCTRFunc, ECValue);
500
501  IRBuilder<> CondBuilder(CountedExitBranch);
502  Value *DecFunc =
503    Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
504  Value *NewCond = CondBuilder.CreateCall(DecFunc);
505  Value *OldCond = CountedExitBranch->getCondition();
506  CountedExitBranch->setCondition(NewCond);
507
508  // The false branch must exit the loop.
509  if (!L->contains(CountedExitBranch->getSuccessor(0)))
510    CountedExitBranch->swapSuccessors();
511
512  // The old condition may be dead now, and may have even created a dead PHI
513  // (the original induction variable).
514  RecursivelyDeleteTriviallyDeadInstructions(OldCond);
515  DeleteDeadPHIs(CountedExitBlock);
516
517  ++NumCTRLoops;
518  return MadeChange;
519}
520
521#ifndef NDEBUG
522static bool clobbersCTR(const MachineInstr *MI) {
523  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
524    const MachineOperand &MO = MI->getOperand(i);
525    if (MO.isReg()) {
526      if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8))
527        return true;
528    } else if (MO.isRegMask()) {
529      if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8))
530        return true;
531    }
532  }
533
534  return false;
535}
536
537static bool verifyCTRBranch(MachineBasicBlock *MBB,
538                            MachineBasicBlock::iterator I) {
539  MachineBasicBlock::iterator BI = I;
540  SmallSet<MachineBasicBlock *, 16>   Visited;
541  SmallVector<MachineBasicBlock *, 8> Preds;
542  bool CheckPreds;
543
544  if (I == MBB->begin()) {
545    Visited.insert(MBB);
546    goto queue_preds;
547  } else
548    --I;
549
550check_block:
551  Visited.insert(MBB);
552  if (I == MBB->end())
553    goto queue_preds;
554
555  CheckPreds = true;
556  for (MachineBasicBlock::iterator IE = MBB->begin();; --I) {
557    unsigned Opc = I->getOpcode();
558    if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) {
559      CheckPreds = false;
560      break;
561    }
562
563    if (I != BI && clobbersCTR(I)) {
564      DEBUG(dbgs() << "BB#" << MBB->getNumber() << " (" <<
565                      MBB->getFullName() << ") instruction " << *I <<
566                      " clobbers CTR, invalidating " << "BB#" <<
567                      BI->getParent()->getNumber() << " (" <<
568                      BI->getParent()->getFullName() << ") instruction " <<
569                      *BI << "\n");
570      return false;
571    }
572
573    if (I == IE)
574      break;
575  }
576
577  if (!CheckPreds && Preds.empty())
578    return true;
579
580  if (CheckPreds) {
581queue_preds:
582    if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) {
583      DEBUG(dbgs() << "Unable to find a MTCTR instruction for BB#" <<
584                      BI->getParent()->getNumber() << " (" <<
585                      BI->getParent()->getFullName() << ") instruction " <<
586                      *BI << "\n");
587      return false;
588    }
589
590    for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
591         PIE = MBB->pred_end(); PI != PIE; ++PI)
592      Preds.push_back(*PI);
593  }
594
595  do {
596    MBB = Preds.pop_back_val();
597    if (!Visited.count(MBB)) {
598      I = MBB->getLastNonDebugInstr();
599      goto check_block;
600    }
601  } while (!Preds.empty());
602
603  return true;
604}
605
606bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) {
607  MDT = &getAnalysis<MachineDominatorTree>();
608
609  // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before
610  // any other instructions that might clobber the ctr register.
611  for (MachineFunction::iterator I = MF.begin(), IE = MF.end();
612       I != IE; ++I) {
613    MachineBasicBlock *MBB = I;
614    if (!MDT->isReachableFromEntry(MBB))
615      continue;
616
617    for (MachineBasicBlock::iterator MII = MBB->getFirstTerminator(),
618      MIIE = MBB->end(); MII != MIIE; ++MII) {
619      unsigned Opc = MII->getOpcode();
620      if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ ||
621          Opc == PPC::BDZ8  || Opc == PPC::BDZ)
622        if (!verifyCTRBranch(MBB, MII))
623          llvm_unreachable("Invalid PPC CTR loop!");
624    }
625  }
626
627  return false;
628}
629#endif // NDEBUG
630
631