SystemZLongBranch.cpp revision 6824f127f90197b26af93cf5d6c13b7941567e54
1//===-- SystemZLongBranch.cpp - Branch lengthening for SystemZ ------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass does two things:
11// (1) fuse compares and branches into COMPARE AND BRANCH instructions
12// (2) make sure that all branches are in range.
13//
14// We do (1) here rather than earlier because the fused form prevents
15// predication.
16//
17// Doing it so late makes it more likely that a register will be reused
18// between the compare and the branch, but it isn't clear whether preventing
19// that would be a win or not.
20//
21// There are several ways in which (2) could be done.  One aggressive
22// approach is to assume that all branches are in range and successively
23// replace those that turn out not to be in range with a longer form
24// (branch relaxation).  A simple implementation is to continually walk
25// through the function relaxing branches until no more changes are
26// needed and a fixed point is reached.  However, in the pathological
27// worst case, this implementation is quadratic in the number of blocks;
28// relaxing branch N can make branch N-1 go out of range, which in turn
29// can make branch N-2 go out of range, and so on.
30//
31// An alternative approach is to assume that all branches must be
32// converted to their long forms, then reinstate the short forms of
33// branches that, even under this pessimistic assumption, turn out to be
34// in range (branch shortening).  This too can be implemented as a function
35// walk that is repeated until a fixed point is reached.  In general,
36// the result of shortening is not as good as that of relaxation, and
37// shortening is also quadratic in the worst case; shortening branch N
38// can bring branch N-1 in range of the short form, which in turn can do
39// the same for branch N-2, and so on.  The main advantage of shortening
40// is that each walk through the function produces valid code, so it is
41// possible to stop at any point after the first walk.  The quadraticness
42// could therefore be handled with a maximum pass count, although the
43// question then becomes: what maximum count should be used?
44//
45// On SystemZ, long branches are only needed for functions bigger than 64k,
46// which are relatively rare to begin with, and the long branch sequences
47// are actually relatively cheap.  It therefore doesn't seem worth spending
48// much compilation time on the problem.  Instead, the approach we take is:
49//
50// (1) Work out the address that each block would have if no branches
51//     need relaxing.  Exit the pass early if all branches are in range
52//     according to this assumption.
53//
54// (2) Work out the address that each block would have if all branches
55//     need relaxing.
56//
57// (3) Walk through the block calculating the final address of each instruction
58//     and relaxing those that need to be relaxed.  For backward branches,
59//     this check uses the final address of the target block, as calculated
60//     earlier in the walk.  For forward branches, this check uses the
61//     address of the target block that was calculated in (2).  Both checks
62//     give a conservatively-correct range.
63//
64//===----------------------------------------------------------------------===//
65
66#define DEBUG_TYPE "systemz-long-branch"
67
68#include "SystemZTargetMachine.h"
69#include "llvm/ADT/Statistic.h"
70#include "llvm/CodeGen/MachineFunctionPass.h"
71#include "llvm/CodeGen/MachineInstrBuilder.h"
72#include "llvm/IR/Function.h"
73#include "llvm/Support/CommandLine.h"
74#include "llvm/Support/MathExtras.h"
75#include "llvm/Target/TargetInstrInfo.h"
76#include "llvm/Target/TargetMachine.h"
77#include "llvm/Target/TargetRegisterInfo.h"
78
79using namespace llvm;
80
81STATISTIC(LongBranches, "Number of long branches.");
82
83namespace {
84  typedef MachineBasicBlock::iterator Iter;
85
86  // Represents positional information about a basic block.
87  struct MBBInfo {
88    // The address that we currently assume the block has.
89    uint64_t Address;
90
91    // The size of the block in bytes, excluding terminators.
92    // This value never changes.
93    uint64_t Size;
94
95    // The minimum alignment of the block, as a log2 value.
96    // This value never changes.
97    unsigned Alignment;
98
99    // The number of terminators in this block.  This value never changes.
100    unsigned NumTerminators;
101
102    MBBInfo()
103      : Address(0), Size(0), Alignment(0), NumTerminators(0) {}
104  };
105
106  // Represents the state of a block terminator.
107  struct TerminatorInfo {
108    // If this terminator is a relaxable branch, this points to the branch
109    // instruction, otherwise it is null.
110    MachineInstr *Branch;
111
112    // The address that we currently assume the terminator has.
113    uint64_t Address;
114
115    // The current size of the terminator in bytes.
116    uint64_t Size;
117
118    // If Branch is nonnull, this is the number of the target block,
119    // otherwise it is unused.
120    unsigned TargetBlock;
121
122    // If Branch is nonnull, this is the length of the longest relaxed form,
123    // otherwise it is zero.
124    unsigned ExtraRelaxSize;
125
126    TerminatorInfo() : Branch(0), Size(0), TargetBlock(0), ExtraRelaxSize(0) {}
127  };
128
129  // Used to keep track of the current position while iterating over the blocks.
130  struct BlockPosition {
131    // The address that we assume this position has.
132    uint64_t Address;
133
134    // The number of low bits in Address that are known to be the same
135    // as the runtime address.
136    unsigned KnownBits;
137
138    BlockPosition(unsigned InitialAlignment)
139      : Address(0), KnownBits(InitialAlignment) {}
140  };
141
142  class SystemZLongBranch : public MachineFunctionPass {
143  public:
144    static char ID;
145    SystemZLongBranch(const SystemZTargetMachine &tm)
146      : MachineFunctionPass(ID), TII(0) {}
147
148    virtual const char *getPassName() const {
149      return "SystemZ Long Branch";
150    }
151
152    bool runOnMachineFunction(MachineFunction &F);
153
154  private:
155    void skipNonTerminators(BlockPosition &Position, MBBInfo &Block);
156    void skipTerminator(BlockPosition &Position, TerminatorInfo &Terminator,
157                        bool AssumeRelaxed);
158    TerminatorInfo describeTerminator(MachineInstr *MI);
159    bool fuseCompareAndBranch(MachineInstr *Compare);
160    uint64_t initMBBInfo();
161    bool mustRelaxBranch(const TerminatorInfo &Terminator, uint64_t Address);
162    bool mustRelaxABranch();
163    void setWorstCaseAddresses();
164    void splitCompareBranch(MachineInstr *MI, unsigned CompareOpcode);
165    void relaxBranch(TerminatorInfo &Terminator);
166    void relaxBranches();
167
168    const SystemZInstrInfo *TII;
169    MachineFunction *MF;
170    SmallVector<MBBInfo, 16> MBBs;
171    SmallVector<TerminatorInfo, 16> Terminators;
172  };
173
174  char SystemZLongBranch::ID = 0;
175
176  const uint64_t MaxBackwardRange = 0x10000;
177  const uint64_t MaxForwardRange = 0xfffe;
178} // end of anonymous namespace
179
180FunctionPass *llvm::createSystemZLongBranchPass(SystemZTargetMachine &TM) {
181  return new SystemZLongBranch(TM);
182}
183
184// Position describes the state immediately before Block.  Update Block
185// accordingly and move Position to the end of the block's non-terminator
186// instructions.
187void SystemZLongBranch::skipNonTerminators(BlockPosition &Position,
188                                           MBBInfo &Block) {
189  if (Block.Alignment > Position.KnownBits) {
190    // When calculating the address of Block, we need to conservatively
191    // assume that Block had the worst possible misalignment.
192    Position.Address += ((uint64_t(1) << Block.Alignment) -
193                         (uint64_t(1) << Position.KnownBits));
194    Position.KnownBits = Block.Alignment;
195  }
196
197  // Align the addresses.
198  uint64_t AlignMask = (uint64_t(1) << Block.Alignment) - 1;
199  Position.Address = (Position.Address + AlignMask) & ~AlignMask;
200
201  // Record the block's position.
202  Block.Address = Position.Address;
203
204  // Move past the non-terminators in the block.
205  Position.Address += Block.Size;
206}
207
208// Position describes the state immediately before Terminator.
209// Update Terminator accordingly and move Position past it.
210// Assume that Terminator will be relaxed if AssumeRelaxed.
211void SystemZLongBranch::skipTerminator(BlockPosition &Position,
212                                       TerminatorInfo &Terminator,
213                                       bool AssumeRelaxed) {
214  Terminator.Address = Position.Address;
215  Position.Address += Terminator.Size;
216  if (AssumeRelaxed)
217    Position.Address += Terminator.ExtraRelaxSize;
218}
219
220// Return a description of terminator instruction MI.
221TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) {
222  TerminatorInfo Terminator;
223  Terminator.Size = TII->getInstSizeInBytes(MI);
224  if (MI->isConditionalBranch() || MI->isUnconditionalBranch()) {
225    switch (MI->getOpcode()) {
226    case SystemZ::J:
227      // Relaxes to JG, which is 2 bytes longer.
228      Terminator.ExtraRelaxSize = 2;
229      break;
230    case SystemZ::BRC:
231      // Relaxes to BRCL, which is 2 bytes longer.
232      Terminator.ExtraRelaxSize = 2;
233      break;
234    case SystemZ::CRJ:
235      // Relaxes to a CR/BRCL sequence, which is 2 bytes longer.
236      Terminator.ExtraRelaxSize = 2;
237      break;
238    case SystemZ::CGRJ:
239      // Relaxes to a CGR/BRCL sequence, which is 4 bytes longer.
240      Terminator.ExtraRelaxSize = 4;
241      break;
242    case SystemZ::CIJ:
243    case SystemZ::CGIJ:
244      // Relaxes to a C(G)HI/BRCL sequence, which is 4 bytes longer.
245      Terminator.ExtraRelaxSize = 4;
246      break;
247    default:
248      llvm_unreachable("Unrecognized branch instruction");
249    }
250    Terminator.Branch = MI;
251    Terminator.TargetBlock =
252      TII->getBranchInfo(MI).Target->getMBB()->getNumber();
253  }
254  return Terminator;
255}
256
257// Return true if CC is live after MBBI.
258static bool isCCLiveAfter(MachineBasicBlock::iterator MBBI,
259                          const TargetRegisterInfo *TRI) {
260  if (MBBI->killsRegister(SystemZ::CC, TRI))
261    return false;
262
263  MachineBasicBlock *MBB = MBBI->getParent();
264  MachineBasicBlock::iterator MBBE = MBB->end();
265  for (++MBBI; MBBI != MBBE; ++MBBI) {
266    if (MBBI->readsRegister(SystemZ::CC, TRI))
267      return true;
268    if (MBBI->definesRegister(SystemZ::CC, TRI))
269      return false;
270  }
271
272  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
273         SE = MBB->succ_end(); SI != SE; ++SI)
274    if ((*SI)->isLiveIn(SystemZ::CC))
275      return true;
276
277  return false;
278}
279
280// Try to fuse compare instruction Compare into a later branch.  Return
281// true on success and if Compare is therefore redundant.
282bool SystemZLongBranch::fuseCompareAndBranch(MachineInstr *Compare) {
283  if (MF->getTarget().getOptLevel() == CodeGenOpt::None)
284    return false;
285
286  unsigned FusedOpcode = TII->getCompareAndBranch(Compare->getOpcode(),
287                                                  Compare);
288  if (!FusedOpcode)
289    return false;
290
291  unsigned SrcReg = Compare->getOperand(0).getReg();
292  unsigned SrcReg2 = (Compare->getOperand(1).isReg() ?
293                      Compare->getOperand(1).getReg() : 0);
294  const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
295  MachineBasicBlock *MBB = Compare->getParent();
296  MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB->end();
297  for (++MBBI; MBBI != MBBE; ++MBBI) {
298    if (MBBI->getOpcode() == SystemZ::BRC && !isCCLiveAfter(MBBI, TRI)) {
299      // Read the branch mask and target.
300      MachineOperand CCMask(MBBI->getOperand(1));
301      MachineOperand Target(MBBI->getOperand(2));
302      assert((CCMask.getImm() & ~SystemZ::CCMASK_ICMP) == 0 &&
303             "Invalid condition-code mask for integer comparison");
304
305      // Clear out all current operands.
306      int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI);
307      assert(CCUse >= 0 && "BRC must use CC");
308      MBBI->RemoveOperand(CCUse);
309      MBBI->RemoveOperand(2);
310      MBBI->RemoveOperand(1);
311      MBBI->RemoveOperand(0);
312
313      // Rebuild MBBI as a fused compare and branch.
314      MBBI->setDesc(TII->get(FusedOpcode));
315      MachineInstrBuilder(*MBB->getParent(), MBBI)
316        .addOperand(Compare->getOperand(0))
317        .addOperand(Compare->getOperand(1))
318        .addOperand(CCMask)
319        .addOperand(Target);
320
321      // Clear any intervening kills of SrcReg and SrcReg2.
322      MBBI = Compare;
323      for (++MBBI; MBBI != MBBE; ++MBBI) {
324        MBBI->clearRegisterKills(SrcReg, TRI);
325        if (SrcReg2)
326          MBBI->clearRegisterKills(SrcReg2, TRI);
327      }
328      return true;
329    }
330
331    // Stop if we find another reference to CC before a branch.
332    if (MBBI->readsRegister(SystemZ::CC, TRI) ||
333        MBBI->modifiesRegister(SystemZ::CC, TRI))
334      return false;
335
336    // Stop if we find another assignment to the registers before the branch.
337    if (MBBI->modifiesRegister(SrcReg, TRI) ||
338        (SrcReg2 && MBBI->modifiesRegister(SrcReg2, TRI)))
339      return false;
340  }
341  return false;
342}
343
344// Fill MBBs and Terminators, setting the addresses on the assumption
345// that no branches need relaxation.  Return the size of the function under
346// this assumption.
347uint64_t SystemZLongBranch::initMBBInfo() {
348  MF->RenumberBlocks();
349  unsigned NumBlocks = MF->size();
350
351  MBBs.clear();
352  MBBs.resize(NumBlocks);
353
354  Terminators.clear();
355  Terminators.reserve(NumBlocks);
356
357  BlockPosition Position(MF->getAlignment());
358  for (unsigned I = 0; I < NumBlocks; ++I) {
359    MachineBasicBlock *MBB = MF->getBlockNumbered(I);
360    MBBInfo &Block = MBBs[I];
361
362    // Record the alignment, for quick access.
363    Block.Alignment = MBB->getAlignment();
364
365    // Calculate the size of the fixed part of the block.
366    MachineBasicBlock::iterator MI = MBB->begin();
367    MachineBasicBlock::iterator End = MBB->end();
368    while (MI != End && !MI->isTerminator()) {
369      MachineInstr *Current = MI;
370      ++MI;
371      if (Current->isCompare() && fuseCompareAndBranch(Current))
372        Current->removeFromParent();
373      else
374        Block.Size += TII->getInstSizeInBytes(Current);
375    }
376    skipNonTerminators(Position, Block);
377
378    // Add the terminators.
379    while (MI != End) {
380      if (!MI->isDebugValue()) {
381        assert(MI->isTerminator() && "Terminator followed by non-terminator");
382        Terminators.push_back(describeTerminator(MI));
383        skipTerminator(Position, Terminators.back(), false);
384        ++Block.NumTerminators;
385      }
386      ++MI;
387    }
388  }
389
390  return Position.Address;
391}
392
393// Return true if, under current assumptions, Terminator would need to be
394// relaxed if it were placed at address Address.
395bool SystemZLongBranch::mustRelaxBranch(const TerminatorInfo &Terminator,
396                                        uint64_t Address) {
397  if (!Terminator.Branch)
398    return false;
399
400  const MBBInfo &Target = MBBs[Terminator.TargetBlock];
401  if (Address >= Target.Address) {
402    if (Address - Target.Address <= MaxBackwardRange)
403      return false;
404  } else {
405    if (Target.Address - Address <= MaxForwardRange)
406      return false;
407  }
408
409  return true;
410}
411
412// Return true if, under current assumptions, any terminator needs
413// to be relaxed.
414bool SystemZLongBranch::mustRelaxABranch() {
415  for (SmallVectorImpl<TerminatorInfo>::iterator TI = Terminators.begin(),
416         TE = Terminators.end(); TI != TE; ++TI)
417    if (mustRelaxBranch(*TI, TI->Address))
418      return true;
419  return false;
420}
421
422// Set the address of each block on the assumption that all branches
423// must be long.
424void SystemZLongBranch::setWorstCaseAddresses() {
425  SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin();
426  BlockPosition Position(MF->getAlignment());
427  for (SmallVectorImpl<MBBInfo>::iterator BI = MBBs.begin(), BE = MBBs.end();
428       BI != BE; ++BI) {
429    skipNonTerminators(Position, *BI);
430    for (unsigned BTI = 0, BTE = BI->NumTerminators; BTI != BTE; ++BTI) {
431      skipTerminator(Position, *TI, true);
432      ++TI;
433    }
434  }
435}
436
437// Split MI into the comparison given by CompareOpcode followed
438// a BRCL on the result.
439void SystemZLongBranch::splitCompareBranch(MachineInstr *MI,
440                                           unsigned CompareOpcode) {
441  MachineBasicBlock *MBB = MI->getParent();
442  DebugLoc DL = MI->getDebugLoc();
443  BuildMI(*MBB, MI, DL, TII->get(CompareOpcode))
444    .addOperand(MI->getOperand(0))
445    .addOperand(MI->getOperand(1));
446  MachineInstr *BRCL = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BRCL))
447    .addImm(SystemZ::CCMASK_ICMP)
448    .addOperand(MI->getOperand(2))
449    .addOperand(MI->getOperand(3));
450  // The implicit use of CC is a killing use.
451  BRCL->addRegisterKilled(SystemZ::CC, &TII->getRegisterInfo());
452  MI->eraseFromParent();
453}
454
455// Relax the branch described by Terminator.
456void SystemZLongBranch::relaxBranch(TerminatorInfo &Terminator) {
457  MachineInstr *Branch = Terminator.Branch;
458  switch (Branch->getOpcode()) {
459  case SystemZ::J:
460    Branch->setDesc(TII->get(SystemZ::JG));
461    break;
462  case SystemZ::BRC:
463    Branch->setDesc(TII->get(SystemZ::BRCL));
464    break;
465  case SystemZ::CRJ:
466    splitCompareBranch(Branch, SystemZ::CR);
467    break;
468  case SystemZ::CGRJ:
469    splitCompareBranch(Branch, SystemZ::CGR);
470    break;
471  case SystemZ::CIJ:
472    splitCompareBranch(Branch, SystemZ::CHI);
473    break;
474  case SystemZ::CGIJ:
475    splitCompareBranch(Branch, SystemZ::CGHI);
476    break;
477  default:
478    llvm_unreachable("Unrecognized branch");
479  }
480
481  Terminator.Size += Terminator.ExtraRelaxSize;
482  Terminator.ExtraRelaxSize = 0;
483  Terminator.Branch = 0;
484
485  ++LongBranches;
486}
487
488// Run a shortening pass and relax any branches that need to be relaxed.
489void SystemZLongBranch::relaxBranches() {
490  SmallVector<TerminatorInfo, 16>::iterator TI = Terminators.begin();
491  BlockPosition Position(MF->getAlignment());
492  for (SmallVectorImpl<MBBInfo>::iterator BI = MBBs.begin(), BE = MBBs.end();
493       BI != BE; ++BI) {
494    skipNonTerminators(Position, *BI);
495    for (unsigned BTI = 0, BTE = BI->NumTerminators; BTI != BTE; ++BTI) {
496      assert(Position.Address <= TI->Address &&
497             "Addresses shouldn't go forwards");
498      if (mustRelaxBranch(*TI, Position.Address))
499        relaxBranch(*TI);
500      skipTerminator(Position, *TI, false);
501      ++TI;
502    }
503  }
504}
505
506bool SystemZLongBranch::runOnMachineFunction(MachineFunction &F) {
507  TII = static_cast<const SystemZInstrInfo *>(F.getTarget().getInstrInfo());
508  MF = &F;
509  uint64_t Size = initMBBInfo();
510  if (Size <= MaxForwardRange || !mustRelaxABranch())
511    return false;
512
513  setWorstCaseAddresses();
514  relaxBranches();
515  return true;
516}
517