ARMConstantIslandPass.cpp revision 7a4c071cd9f67599eba21e902079d0e85f2abf97
1//===-- ARMConstantIslandPass.cpp - ARM constant islands ------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a pass that splits the constant pool up into 'islands'
11// which are scattered through-out the function.  This is required due to the
12// limited pc-relative displacements that ARM has.
13//
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "arm-cp-islands"
17#include "ARM.h"
18#include "ARMMachineFunctionInfo.h"
19#include "Thumb2InstrInfo.h"
20#include "MCTargetDesc/ARMAddressingModes.h"
21#include "llvm/CodeGen/MachineConstantPool.h"
22#include "llvm/CodeGen/MachineFunctionPass.h"
23#include "llvm/CodeGen/MachineJumpTableInfo.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25#include "llvm/Target/TargetData.h"
26#include "llvm/Target/TargetMachine.h"
27#include "llvm/Support/Debug.h"
28#include "llvm/Support/ErrorHandling.h"
29#include "llvm/Support/Format.h"
30#include "llvm/Support/raw_ostream.h"
31#include "llvm/ADT/SmallSet.h"
32#include "llvm/ADT/SmallVector.h"
33#include "llvm/ADT/STLExtras.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/Support/CommandLine.h"
36#include <algorithm>
37using namespace llvm;
38
39STATISTIC(NumCPEs,       "Number of constpool entries");
40STATISTIC(NumSplit,      "Number of uncond branches inserted");
41STATISTIC(NumCBrFixed,   "Number of cond branches fixed");
42STATISTIC(NumUBrFixed,   "Number of uncond branches fixed");
43STATISTIC(NumTBs,        "Number of table branches generated");
44STATISTIC(NumT2CPShrunk, "Number of Thumb2 constantpool instructions shrunk");
45STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk");
46STATISTIC(NumCBZ,        "Number of CBZ / CBNZ formed");
47STATISTIC(NumJTMoved,    "Number of jump table destination blocks moved");
48STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted");
49
50
51static cl::opt<bool>
52AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
53          cl::desc("Adjust basic block layout to better use TB[BH]"));
54
55// FIXME: This option should be removed once it has received sufficient testing.
56static cl::opt<bool>
57AlignConstantIslands("arm-align-constant-islands", cl::Hidden, cl::init(true),
58          cl::desc("Align constant islands in code"));
59
60/// UnknownPadding - Return the worst case padding that could result from
61/// unknown offset bits.  This does not include alignment padding caused by
62/// known offset bits.
63///
64/// @param LogAlign log2(alignment)
65/// @param KnownBits Number of known low offset bits.
66static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
67  if (KnownBits < LogAlign)
68    return (1u << LogAlign) - (1u << KnownBits);
69  return 0;
70}
71
72/// WorstCaseAlign - Assuming only the low KnownBits bits in Offset are exact,
73/// add padding such that:
74///
75/// 1. The result is aligned to 1 << LogAlign.
76///
77/// 2. No other value of the unknown bits would require more padding.
78///
79/// This may add more padding than is required to satisfy just one of the
80/// constraints.  It is necessary to compute alignment this way to guarantee
81/// that we don't underestimate the padding before an aligned block.  If the
82/// real padding before a block is larger than we think, constant pool entries
83/// may go out of range.
84static inline unsigned WorstCaseAlign(unsigned Offset, unsigned LogAlign,
85                                      unsigned KnownBits) {
86  // Add the worst possible padding that the unknown bits could cause.
87  Offset += UnknownPadding(LogAlign, KnownBits);
88
89  // Then align the result.
90  return RoundUpToAlignment(Offset, 1u << LogAlign);
91}
92
93namespace {
94  /// ARMConstantIslands - Due to limited PC-relative displacements, ARM
95  /// requires constant pool entries to be scattered among the instructions
96  /// inside a function.  To do this, it completely ignores the normal LLVM
97  /// constant pool; instead, it places constants wherever it feels like with
98  /// special instructions.
99  ///
100  /// The terminology used in this pass includes:
101  ///   Islands - Clumps of constants placed in the function.
102  ///   Water   - Potential places where an island could be formed.
103  ///   CPE     - A constant pool entry that has been placed somewhere, which
104  ///             tracks a list of users.
105  class ARMConstantIslands : public MachineFunctionPass {
106    /// BasicBlockInfo - Information about the offset and size of a single
107    /// basic block.
108    struct BasicBlockInfo {
109      /// Offset - Distance from the beginning of the function to the beginning
110      /// of this basic block.
111      ///
112      /// The offset is always aligned as required by the basic block.
113      unsigned Offset;
114
115      /// Size - Size of the basic block in bytes.  If the block contains
116      /// inline assembly, this is a worst case estimate.
117      ///
118      /// The size does not include any alignment padding whether from the
119      /// beginning of the block, or from an aligned jump table at the end.
120      unsigned Size;
121
122      /// KnownBits - The number of low bits in Offset that are known to be
123      /// exact.  The remaining bits of Offset are an upper bound.
124      uint8_t KnownBits;
125
126      /// Unalign - When non-zero, the block contains instructions (inline asm)
127      /// of unknown size.  The real size may be smaller than Size bytes by a
128      /// multiple of 1 << Unalign.
129      uint8_t Unalign;
130
131      /// PostAlign - When non-zero, the block terminator contains a .align
132      /// directive, so the end of the block is aligned to 1 << PostAlign
133      /// bytes.
134      uint8_t PostAlign;
135
136      BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0),
137        PostAlign(0) {}
138
139      /// Compute the number of known offset bits internally to this block.
140      /// This number should be used to predict worst case padding when
141      /// splitting the block.
142      unsigned internalKnownBits() const {
143        return Unalign ? Unalign : KnownBits;
144      }
145
146      /// Compute the offset immediately following this block.  If LogAlign is
147      /// specified, return the offset the successor block will get if it has
148      /// this alignment.
149      unsigned postOffset(unsigned LogAlign = 0) const {
150        unsigned PO = Offset + Size;
151        unsigned LA = std::max(unsigned(PostAlign), LogAlign);
152        if (!LA)
153          return PO;
154        // Add alignment padding from the terminator.
155        return WorstCaseAlign(PO, LA, internalKnownBits());
156      }
157
158      /// Compute the number of known low bits of postOffset.  If this block
159      /// contains inline asm, the number of known bits drops to the
160      /// instruction alignment.  An aligned terminator may increase the number
161      /// of know bits.
162      /// If LogAlign is given, also consider the alignment of the next block.
163      unsigned postKnownBits(unsigned LogAlign = 0) const {
164        return std::max(std::max(unsigned(PostAlign), LogAlign),
165                        internalKnownBits());
166      }
167    };
168
169    std::vector<BasicBlockInfo> BBInfo;
170
171    /// WaterList - A sorted list of basic blocks where islands could be placed
172    /// (i.e. blocks that don't fall through to the following block, due
173    /// to a return, unreachable, or unconditional branch).
174    std::vector<MachineBasicBlock*> WaterList;
175
176    /// NewWaterList - The subset of WaterList that was created since the
177    /// previous iteration by inserting unconditional branches.
178    SmallSet<MachineBasicBlock*, 4> NewWaterList;
179
180    typedef std::vector<MachineBasicBlock*>::iterator water_iterator;
181
182    /// CPUser - One user of a constant pool, keeping the machine instruction
183    /// pointer, the constant pool being referenced, and the max displacement
184    /// allowed from the instruction to the CP.  The HighWaterMark records the
185    /// highest basic block where a new CPEntry can be placed.  To ensure this
186    /// pass terminates, the CP entries are initially placed at the end of the
187    /// function and then move monotonically to lower addresses.  The
188    /// exception to this rule is when the current CP entry for a particular
189    /// CPUser is out of range, but there is another CP entry for the same
190    /// constant value in range.  We want to use the existing in-range CP
191    /// entry, but if it later moves out of range, the search for new water
192    /// should resume where it left off.  The HighWaterMark is used to record
193    /// that point.
194    struct CPUser {
195      MachineInstr *MI;
196      MachineInstr *CPEMI;
197      MachineBasicBlock *HighWaterMark;
198    private:
199      unsigned MaxDisp;
200    public:
201      bool NegOk;
202      bool IsSoImm;
203      bool KnownAlignment;
204      CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp,
205             bool neg, bool soimm)
206        : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm),
207          KnownAlignment(false) {
208        HighWaterMark = CPEMI->getParent();
209      }
210      /// getMaxDisp - Returns the maximum displacement supported by MI.
211      /// Correct for unknown alignment.
212      unsigned getMaxDisp() const {
213        return KnownAlignment ? MaxDisp : MaxDisp - 2;
214      }
215    };
216
217    /// CPUsers - Keep track of all of the machine instructions that use various
218    /// constant pools and their max displacement.
219    std::vector<CPUser> CPUsers;
220
221    /// CPEntry - One per constant pool entry, keeping the machine instruction
222    /// pointer, the constpool index, and the number of CPUser's which
223    /// reference this entry.
224    struct CPEntry {
225      MachineInstr *CPEMI;
226      unsigned CPI;
227      unsigned RefCount;
228      CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0)
229        : CPEMI(cpemi), CPI(cpi), RefCount(rc) {}
230    };
231
232    /// CPEntries - Keep track of all of the constant pool entry machine
233    /// instructions. For each original constpool index (i.e. those that
234    /// existed upon entry to this pass), it keeps a vector of entries.
235    /// Original elements are cloned as we go along; the clones are
236    /// put in the vector of the original element, but have distinct CPIs.
237    std::vector<std::vector<CPEntry> > CPEntries;
238
239    /// ImmBranch - One per immediate branch, keeping the machine instruction
240    /// pointer, conditional or unconditional, the max displacement,
241    /// and (if isCond is true) the corresponding unconditional branch
242    /// opcode.
243    struct ImmBranch {
244      MachineInstr *MI;
245      unsigned MaxDisp : 31;
246      bool isCond : 1;
247      int UncondBr;
248      ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, int ubr)
249        : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {}
250    };
251
252    /// ImmBranches - Keep track of all the immediate branch instructions.
253    ///
254    std::vector<ImmBranch> ImmBranches;
255
256    /// PushPopMIs - Keep track of all the Thumb push / pop instructions.
257    ///
258    SmallVector<MachineInstr*, 4> PushPopMIs;
259
260    /// T2JumpTables - Keep track of all the Thumb2 jumptable instructions.
261    SmallVector<MachineInstr*, 4> T2JumpTables;
262
263    /// HasFarJump - True if any far jump instruction has been emitted during
264    /// the branch fix up pass.
265    bool HasFarJump;
266
267    MachineFunction *MF;
268    MachineConstantPool *MCP;
269    const ARMBaseInstrInfo *TII;
270    const ARMSubtarget *STI;
271    ARMFunctionInfo *AFI;
272    bool isThumb;
273    bool isThumb1;
274    bool isThumb2;
275  public:
276    static char ID;
277    ARMConstantIslands() : MachineFunctionPass(ID) {}
278
279    virtual bool runOnMachineFunction(MachineFunction &MF);
280
281    virtual const char *getPassName() const {
282      return "ARM constant island placement and branch shortening pass";
283    }
284
285  private:
286    void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs);
287    CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
288    unsigned getCPELogAlign(const MachineInstr *CPEMI);
289    void scanFunctionJumpTables();
290    void initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs);
291    MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
292    void updateForInsertedWaterBlock(MachineBasicBlock *NewBB);
293    void adjustBBOffsetsAfter(MachineBasicBlock *BB);
294    bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI);
295    int findInRangeCPEntry(CPUser& U, unsigned UserOffset);
296    bool findAvailableWater(CPUser&U, unsigned UserOffset,
297                            water_iterator &WaterIter);
298    void createNewWater(unsigned CPUserIndex, unsigned UserOffset,
299                        MachineBasicBlock *&NewMBB);
300    bool handleConstantPoolUser(unsigned CPUserIndex);
301    void removeDeadCPEMI(MachineInstr *CPEMI);
302    bool removeUnusedCPEntries();
303    bool isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
304                          MachineInstr *CPEMI, unsigned Disp, bool NegOk,
305                          bool DoDump = false);
306    bool isWaterInRange(unsigned UserOffset, MachineBasicBlock *Water,
307                        CPUser &U, unsigned &Growth);
308    bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
309    bool fixupImmediateBr(ImmBranch &Br);
310    bool fixupConditionalBr(ImmBranch &Br);
311    bool fixupUnconditionalBr(ImmBranch &Br);
312    bool undoLRSpillRestore();
313    bool mayOptimizeThumb2Instruction(const MachineInstr *MI) const;
314    bool optimizeThumb2Instructions();
315    bool optimizeThumb2Branches();
316    bool reorderThumb2JumpTables();
317    bool optimizeThumb2JumpTables();
318    MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB,
319                                                  MachineBasicBlock *JTBB);
320
321    void computeBlockSize(MachineBasicBlock *MBB);
322    unsigned getOffsetOf(MachineInstr *MI) const;
323    unsigned getUserOffset(CPUser&) const;
324    void dumpBBs();
325    void verify();
326
327    bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
328                         unsigned Disp, bool NegativeOK, bool IsSoImm = false);
329    bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
330                         const CPUser &U) {
331      return isOffsetInRange(UserOffset, TrialOffset,
332                             U.getMaxDisp(), U.NegOk, U.IsSoImm);
333    }
334  };
335  char ARMConstantIslands::ID = 0;
336}
337
338/// verify - check BBOffsets, BBSizes, alignment of islands
339void ARMConstantIslands::verify() {
340#ifndef NDEBUG
341  for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
342       MBBI != E; ++MBBI) {
343    MachineBasicBlock *MBB = MBBI;
344    unsigned Align = MBB->getAlignment();
345    unsigned MBBId = MBB->getNumber();
346    assert(BBInfo[MBBId].Offset % (1u << Align) == 0);
347    assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
348  }
349  for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
350    CPUser &U = CPUsers[i];
351    unsigned UserOffset = getUserOffset(U);
352    assert(isCPEntryInRange(U.MI, UserOffset, U.CPEMI, U.getMaxDisp(),
353                            U.NegOk) && "Constant pool entry out of range!");
354  }
355#endif
356}
357
358/// print block size and offset information - debugging
359void ARMConstantIslands::dumpBBs() {
360  DEBUG({
361    for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
362      const BasicBlockInfo &BBI = BBInfo[J];
363      dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
364             << " kb=" << unsigned(BBI.KnownBits)
365             << " ua=" << unsigned(BBI.Unalign)
366             << " pa=" << unsigned(BBI.PostAlign)
367             << format(" size=%#x\n", BBInfo[J].Size);
368    }
369  });
370}
371
372/// createARMConstantIslandPass - returns an instance of the constpool
373/// island pass.
374FunctionPass *llvm::createARMConstantIslandPass() {
375  return new ARMConstantIslands();
376}
377
378bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
379  MF = &mf;
380  MCP = mf.getConstantPool();
381
382  DEBUG(dbgs() << "***** ARMConstantIslands: "
383               << MCP->getConstants().size() << " CP entries, aligned to "
384               << MCP->getConstantPoolAlignment() << " bytes *****\n");
385
386  TII = (const ARMBaseInstrInfo*)MF->getTarget().getInstrInfo();
387  AFI = MF->getInfo<ARMFunctionInfo>();
388  STI = &MF->getTarget().getSubtarget<ARMSubtarget>();
389
390  isThumb = AFI->isThumbFunction();
391  isThumb1 = AFI->isThumb1OnlyFunction();
392  isThumb2 = AFI->isThumb2Function();
393
394  HasFarJump = false;
395
396  // This pass invalidates liveness information when it splits basic blocks.
397  MF->getRegInfo().invalidateLiveness();
398
399  // Renumber all of the machine basic blocks in the function, guaranteeing that
400  // the numbers agree with the position of the block in the function.
401  MF->RenumberBlocks();
402
403  // Try to reorder and otherwise adjust the block layout to make good use
404  // of the TB[BH] instructions.
405  bool MadeChange = false;
406  if (isThumb2 && AdjustJumpTableBlocks) {
407    scanFunctionJumpTables();
408    MadeChange |= reorderThumb2JumpTables();
409    // Data is out of date, so clear it. It'll be re-computed later.
410    T2JumpTables.clear();
411    // Blocks may have shifted around. Keep the numbering up to date.
412    MF->RenumberBlocks();
413  }
414
415  // Thumb1 functions containing constant pools get 4-byte alignment.
416  // This is so we can keep exact track of where the alignment padding goes.
417
418  // ARM and Thumb2 functions need to be 4-byte aligned.
419  if (!isThumb1)
420    MF->EnsureAlignment(2);  // 2 = log2(4)
421
422  // Perform the initial placement of the constant pool entries.  To start with,
423  // we put them all at the end of the function.
424  std::vector<MachineInstr*> CPEMIs;
425  if (!MCP->isEmpty())
426    doInitialPlacement(CPEMIs);
427
428  /// The next UID to take is the first unused one.
429  AFI->initPICLabelUId(CPEMIs.size());
430
431  // Do the initial scan of the function, building up information about the
432  // sizes of each block, the location of all the water, and finding all of the
433  // constant pool users.
434  initializeFunctionInfo(CPEMIs);
435  CPEMIs.clear();
436  DEBUG(dumpBBs());
437
438
439  /// Remove dead constant pool entries.
440  MadeChange |= removeUnusedCPEntries();
441
442  // Iteratively place constant pool entries and fix up branches until there
443  // is no change.
444  unsigned NoCPIters = 0, NoBRIters = 0;
445  while (true) {
446    DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n');
447    bool CPChange = false;
448    for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
449      CPChange |= handleConstantPoolUser(i);
450    if (CPChange && ++NoCPIters > 30)
451      report_fatal_error("Constant Island pass failed to converge!");
452    DEBUG(dumpBBs());
453
454    // Clear NewWaterList now.  If we split a block for branches, it should
455    // appear as "new water" for the next iteration of constant pool placement.
456    NewWaterList.clear();
457
458    DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n');
459    bool BRChange = false;
460    for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
461      BRChange |= fixupImmediateBr(ImmBranches[i]);
462    if (BRChange && ++NoBRIters > 30)
463      report_fatal_error("Branch Fix Up pass failed to converge!");
464    DEBUG(dumpBBs());
465
466    if (!CPChange && !BRChange)
467      break;
468    MadeChange = true;
469  }
470
471  // Shrink 32-bit Thumb2 branch, load, and store instructions.
472  if (isThumb2 && !STI->prefers32BitThumb())
473    MadeChange |= optimizeThumb2Instructions();
474
475  // After a while, this might be made debug-only, but it is not expensive.
476  verify();
477
478  // If LR has been forced spilled and no far jump (i.e. BL) has been issued,
479  // undo the spill / restore of LR if possible.
480  if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump())
481    MadeChange |= undoLRSpillRestore();
482
483  // Save the mapping between original and cloned constpool entries.
484  for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
485    for (unsigned j = 0, je = CPEntries[i].size(); j != je; ++j) {
486      const CPEntry & CPE = CPEntries[i][j];
487      AFI->recordCPEClone(i, CPE.CPI);
488    }
489  }
490
491  DEBUG(dbgs() << '\n'; dumpBBs());
492
493  BBInfo.clear();
494  WaterList.clear();
495  CPUsers.clear();
496  CPEntries.clear();
497  ImmBranches.clear();
498  PushPopMIs.clear();
499  T2JumpTables.clear();
500
501  return MadeChange;
502}
503
504/// doInitialPlacement - Perform the initial placement of the constant pool
505/// entries.  To start with, we put them all at the end of the function.
506void
507ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
508  // Create the basic block to hold the CPE's.
509  MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
510  MF->push_back(BB);
511
512  // MachineConstantPool measures alignment in bytes. We measure in log2(bytes).
513  unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment());
514
515  // Mark the basic block as required by the const-pool.
516  // If AlignConstantIslands isn't set, use 4-byte alignment for everything.
517  BB->setAlignment(AlignConstantIslands ? MaxAlign : 2);
518
519  // The function needs to be as aligned as the basic blocks. The linker may
520  // move functions around based on their alignment.
521  MF->EnsureAlignment(BB->getAlignment());
522
523  // Order the entries in BB by descending alignment.  That ensures correct
524  // alignment of all entries as long as BB is sufficiently aligned.  Keep
525  // track of the insertion point for each alignment.  We are going to bucket
526  // sort the entries as they are created.
527  SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxAlign + 1, BB->end());
528
529  // Add all of the constants from the constant pool to the end block, use an
530  // identity mapping of CPI's to CPE's.
531  const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
532
533  const TargetData &TD = *MF->getTarget().getTargetData();
534  for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
535    unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
536    assert(Size >= 4 && "Too small constant pool entry");
537    unsigned Align = CPs[i].getAlignment();
538    assert(isPowerOf2_32(Align) && "Invalid alignment");
539    // Verify that all constant pool entries are a multiple of their alignment.
540    // If not, we would have to pad them out so that instructions stay aligned.
541    assert((Size % Align) == 0 && "CP Entry not multiple of 4 bytes!");
542
543    // Insert CONSTPOOL_ENTRY before entries with a smaller alignment.
544    unsigned LogAlign = Log2_32(Align);
545    MachineBasicBlock::iterator InsAt = InsPoint[LogAlign];
546    MachineInstr *CPEMI =
547      BuildMI(*BB, InsAt, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
548        .addImm(i).addConstantPoolIndex(i).addImm(Size);
549    CPEMIs.push_back(CPEMI);
550
551    // Ensure that future entries with higher alignment get inserted before
552    // CPEMI. This is bucket sort with iterators.
553    for (unsigned a = LogAlign + 1; a <= MaxAlign; ++a)
554      if (InsPoint[a] == InsAt)
555        InsPoint[a] = CPEMI;
556
557    // Add a new CPEntry, but no corresponding CPUser yet.
558    std::vector<CPEntry> CPEs;
559    CPEs.push_back(CPEntry(CPEMI, i));
560    CPEntries.push_back(CPEs);
561    ++NumCPEs;
562    DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = "
563                 << Size << ", align = " << Align <<'\n');
564  }
565  DEBUG(BB->dump());
566}
567
568/// BBHasFallthrough - Return true if the specified basic block can fallthrough
569/// into the block immediately after it.
570static bool BBHasFallthrough(MachineBasicBlock *MBB) {
571  // Get the next machine basic block in the function.
572  MachineFunction::iterator MBBI = MBB;
573  // Can't fall off end of function.
574  if (llvm::next(MBBI) == MBB->getParent()->end())
575    return false;
576
577  MachineBasicBlock *NextBB = llvm::next(MBBI);
578  for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
579       E = MBB->succ_end(); I != E; ++I)
580    if (*I == NextBB)
581      return true;
582
583  return false;
584}
585
586/// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI,
587/// look up the corresponding CPEntry.
588ARMConstantIslands::CPEntry
589*ARMConstantIslands::findConstPoolEntry(unsigned CPI,
590                                        const MachineInstr *CPEMI) {
591  std::vector<CPEntry> &CPEs = CPEntries[CPI];
592  // Number of entries per constpool index should be small, just do a
593  // linear search.
594  for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
595    if (CPEs[i].CPEMI == CPEMI)
596      return &CPEs[i];
597  }
598  return NULL;
599}
600
601/// getCPELogAlign - Returns the required alignment of the constant pool entry
602/// represented by CPEMI.  Alignment is measured in log2(bytes) units.
603unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
604  assert(CPEMI && CPEMI->getOpcode() == ARM::CONSTPOOL_ENTRY);
605
606  // Everything is 4-byte aligned unless AlignConstantIslands is set.
607  if (!AlignConstantIslands)
608    return 2;
609
610  unsigned CPI = CPEMI->getOperand(1).getIndex();
611  assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
612  unsigned Align = MCP->getConstants()[CPI].getAlignment();
613  assert(isPowerOf2_32(Align) && "Invalid CPE alignment");
614  return Log2_32(Align);
615}
616
617/// scanFunctionJumpTables - Do a scan of the function, building up
618/// information about the sizes of each block and the locations of all
619/// the jump tables.
620void ARMConstantIslands::scanFunctionJumpTables() {
621  for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
622       MBBI != E; ++MBBI) {
623    MachineBasicBlock &MBB = *MBBI;
624
625    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
626         I != E; ++I)
627      if (I->isBranch() && I->getOpcode() == ARM::t2BR_JT)
628        T2JumpTables.push_back(I);
629  }
630}
631
632/// initializeFunctionInfo - Do the initial scan of the function, building up
633/// information about the sizes of each block, the location of all the water,
634/// and finding all of the constant pool users.
635void ARMConstantIslands::
636initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
637  BBInfo.clear();
638  BBInfo.resize(MF->getNumBlockIDs());
639
640  // First thing, compute the size of all basic blocks, and see if the function
641  // has any inline assembly in it. If so, we have to be conservative about
642  // alignment assumptions, as we don't know for sure the size of any
643  // instructions in the inline assembly.
644  for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
645    computeBlockSize(I);
646
647  // The known bits of the entry block offset are determined by the function
648  // alignment.
649  BBInfo.front().KnownBits = MF->getAlignment();
650
651  // Compute block offsets and known bits.
652  adjustBBOffsetsAfter(MF->begin());
653
654  // Now go back through the instructions and build up our data structures.
655  for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
656       MBBI != E; ++MBBI) {
657    MachineBasicBlock &MBB = *MBBI;
658
659    // If this block doesn't fall through into the next MBB, then this is
660    // 'water' that a constant pool island could be placed.
661    if (!BBHasFallthrough(&MBB))
662      WaterList.push_back(&MBB);
663
664    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
665         I != E; ++I) {
666      if (I->isDebugValue())
667        continue;
668
669      int Opc = I->getOpcode();
670      if (I->isBranch()) {
671        bool isCond = false;
672        unsigned Bits = 0;
673        unsigned Scale = 1;
674        int UOpc = Opc;
675        switch (Opc) {
676        default:
677          continue;  // Ignore other JT branches
678        case ARM::t2BR_JT:
679          T2JumpTables.push_back(I);
680          continue;   // Does not get an entry in ImmBranches
681        case ARM::Bcc:
682          isCond = true;
683          UOpc = ARM::B;
684          // Fallthrough
685        case ARM::B:
686          Bits = 24;
687          Scale = 4;
688          break;
689        case ARM::tBcc:
690          isCond = true;
691          UOpc = ARM::tB;
692          Bits = 8;
693          Scale = 2;
694          break;
695        case ARM::tB:
696          Bits = 11;
697          Scale = 2;
698          break;
699        case ARM::t2Bcc:
700          isCond = true;
701          UOpc = ARM::t2B;
702          Bits = 20;
703          Scale = 2;
704          break;
705        case ARM::t2B:
706          Bits = 24;
707          Scale = 2;
708          break;
709        }
710
711        // Record this immediate branch.
712        unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
713        ImmBranches.push_back(ImmBranch(I, MaxOffs, isCond, UOpc));
714      }
715
716      if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET)
717        PushPopMIs.push_back(I);
718
719      if (Opc == ARM::CONSTPOOL_ENTRY)
720        continue;
721
722      // Scan the instructions for constant pool operands.
723      for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
724        if (I->getOperand(op).isCPI()) {
725          // We found one.  The addressing mode tells us the max displacement
726          // from the PC that this instruction permits.
727
728          // Basic size info comes from the TSFlags field.
729          unsigned Bits = 0;
730          unsigned Scale = 1;
731          bool NegOk = false;
732          bool IsSoImm = false;
733
734          switch (Opc) {
735          default:
736            llvm_unreachable("Unknown addressing mode for CP reference!");
737
738          // Taking the address of a CP entry.
739          case ARM::LEApcrel:
740            // This takes a SoImm, which is 8 bit immediate rotated. We'll
741            // pretend the maximum offset is 255 * 4. Since each instruction
742            // 4 byte wide, this is always correct. We'll check for other
743            // displacements that fits in a SoImm as well.
744            Bits = 8;
745            Scale = 4;
746            NegOk = true;
747            IsSoImm = true;
748            break;
749          case ARM::t2LEApcrel:
750            Bits = 12;
751            NegOk = true;
752            break;
753          case ARM::tLEApcrel:
754            Bits = 8;
755            Scale = 4;
756            break;
757
758          case ARM::LDRi12:
759          case ARM::LDRcp:
760          case ARM::t2LDRpci:
761            Bits = 12;  // +-offset_12
762            NegOk = true;
763            break;
764
765          case ARM::tLDRpci:
766            Bits = 8;
767            Scale = 4;  // +(offset_8*4)
768            break;
769
770          case ARM::VLDRD:
771          case ARM::VLDRS:
772            Bits = 8;
773            Scale = 4;  // +-(offset_8*4)
774            NegOk = true;
775            break;
776          }
777
778          // Remember that this is a user of a CP entry.
779          unsigned CPI = I->getOperand(op).getIndex();
780          MachineInstr *CPEMI = CPEMIs[CPI];
781          unsigned MaxOffs = ((1 << Bits)-1) * Scale;
782          CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk, IsSoImm));
783
784          // Increment corresponding CPEntry reference count.
785          CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
786          assert(CPE && "Cannot find a corresponding CPEntry!");
787          CPE->RefCount++;
788
789          // Instructions can only use one CP entry, don't bother scanning the
790          // rest of the operands.
791          break;
792        }
793    }
794  }
795}
796
797/// computeBlockSize - Compute the size and some alignment information for MBB.
798/// This function updates BBInfo directly.
799void ARMConstantIslands::computeBlockSize(MachineBasicBlock *MBB) {
800  BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
801  BBI.Size = 0;
802  BBI.Unalign = 0;
803  BBI.PostAlign = 0;
804
805  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
806       ++I) {
807    BBI.Size += TII->GetInstSizeInBytes(I);
808    // For inline asm, GetInstSizeInBytes returns a conservative estimate.
809    // The actual size may be smaller, but still a multiple of the instr size.
810    if (I->isInlineAsm())
811      BBI.Unalign = isThumb ? 1 : 2;
812    // Also consider instructions that may be shrunk later.
813    else if (isThumb && mayOptimizeThumb2Instruction(I))
814      BBI.Unalign = 1;
815  }
816
817  // tBR_JTr contains a .align 2 directive.
818  if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) {
819    BBI.PostAlign = 2;
820    MBB->getParent()->EnsureAlignment(2);
821  }
822}
823
824/// getOffsetOf - Return the current offset of the specified machine instruction
825/// from the start of the function.  This offset changes as stuff is moved
826/// around inside the function.
827unsigned ARMConstantIslands::getOffsetOf(MachineInstr *MI) const {
828  MachineBasicBlock *MBB = MI->getParent();
829
830  // The offset is composed of two things: the sum of the sizes of all MBB's
831  // before this instruction's block, and the offset from the start of the block
832  // it is in.
833  unsigned Offset = BBInfo[MBB->getNumber()].Offset;
834
835  // Sum instructions before MI in MBB.
836  for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
837    assert(I != MBB->end() && "Didn't find MI in its own basic block?");
838    Offset += TII->GetInstSizeInBytes(I);
839  }
840  return Offset;
841}
842
843/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
844/// ID.
845static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
846                              const MachineBasicBlock *RHS) {
847  return LHS->getNumber() < RHS->getNumber();
848}
849
850/// updateForInsertedWaterBlock - When a block is newly inserted into the
851/// machine function, it upsets all of the block numbers.  Renumber the blocks
852/// and update the arrays that parallel this numbering.
853void ARMConstantIslands::updateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
854  // Renumber the MBB's to keep them consecutive.
855  NewBB->getParent()->RenumberBlocks(NewBB);
856
857  // Insert an entry into BBInfo to align it properly with the (newly
858  // renumbered) block numbers.
859  BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
860
861  // Next, update WaterList.  Specifically, we need to add NewMBB as having
862  // available water after it.
863  water_iterator IP =
864    std::lower_bound(WaterList.begin(), WaterList.end(), NewBB,
865                     CompareMBBNumbers);
866  WaterList.insert(IP, NewBB);
867}
868
869
870/// Split the basic block containing MI into two blocks, which are joined by
871/// an unconditional branch.  Update data structures and renumber blocks to
872/// account for this change and returns the newly created block.
873MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
874  MachineBasicBlock *OrigBB = MI->getParent();
875
876  // Create a new MBB for the code after the OrigBB.
877  MachineBasicBlock *NewBB =
878    MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
879  MachineFunction::iterator MBBI = OrigBB; ++MBBI;
880  MF->insert(MBBI, NewBB);
881
882  // Splice the instructions starting with MI over to NewBB.
883  NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
884
885  // Add an unconditional branch from OrigBB to NewBB.
886  // Note the new unconditional branch is not being recorded.
887  // There doesn't seem to be meaningful DebugInfo available; this doesn't
888  // correspond to anything in the source.
889  unsigned Opc = isThumb ? (isThumb2 ? ARM::t2B : ARM::tB) : ARM::B;
890  if (!isThumb)
891    BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB);
892  else
893    BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB)
894            .addImm(ARMCC::AL).addReg(0);
895  ++NumSplit;
896
897  // Update the CFG.  All succs of OrigBB are now succs of NewBB.
898  NewBB->transferSuccessors(OrigBB);
899
900  // OrigBB branches to NewBB.
901  OrigBB->addSuccessor(NewBB);
902
903  // Update internal data structures to account for the newly inserted MBB.
904  // This is almost the same as updateForInsertedWaterBlock, except that
905  // the Water goes after OrigBB, not NewBB.
906  MF->RenumberBlocks(NewBB);
907
908  // Insert an entry into BBInfo to align it properly with the (newly
909  // renumbered) block numbers.
910  BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
911
912  // Next, update WaterList.  Specifically, we need to add OrigMBB as having
913  // available water after it (but not if it's already there, which happens
914  // when splitting before a conditional branch that is followed by an
915  // unconditional branch - in that case we want to insert NewBB).
916  water_iterator IP =
917    std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB,
918                     CompareMBBNumbers);
919  MachineBasicBlock* WaterBB = *IP;
920  if (WaterBB == OrigBB)
921    WaterList.insert(llvm::next(IP), NewBB);
922  else
923    WaterList.insert(IP, OrigBB);
924  NewWaterList.insert(OrigBB);
925
926  // Figure out how large the OrigBB is.  As the first half of the original
927  // block, it cannot contain a tablejump.  The size includes
928  // the new jump we added.  (It should be possible to do this without
929  // recounting everything, but it's very confusing, and this is rarely
930  // executed.)
931  computeBlockSize(OrigBB);
932
933  // Figure out how large the NewMBB is.  As the second half of the original
934  // block, it may contain a tablejump.
935  computeBlockSize(NewBB);
936
937  // All BBOffsets following these blocks must be modified.
938  adjustBBOffsetsAfter(OrigBB);
939
940  return NewBB;
941}
942
943/// getUserOffset - Compute the offset of U.MI as seen by the hardware
944/// displacement computation.  Update U.KnownAlignment to match its current
945/// basic block location.
946unsigned ARMConstantIslands::getUserOffset(CPUser &U) const {
947  unsigned UserOffset = getOffsetOf(U.MI);
948  const BasicBlockInfo &BBI = BBInfo[U.MI->getParent()->getNumber()];
949  unsigned KnownBits = BBI.internalKnownBits();
950
951  // The value read from PC is offset from the actual instruction address.
952  UserOffset += (isThumb ? 4 : 8);
953
954  // Because of inline assembly, we may not know the alignment (mod 4) of U.MI.
955  // Make sure U.getMaxDisp() returns a constrained range.
956  U.KnownAlignment = (KnownBits >= 2);
957
958  // On Thumb, offsets==2 mod 4 are rounded down by the hardware for
959  // purposes of the displacement computation; compensate for that here.
960  // For unknown alignments, getMaxDisp() constrains the range instead.
961  if (isThumb && U.KnownAlignment)
962    UserOffset &= ~3u;
963
964  return UserOffset;
965}
966
967/// isOffsetInRange - Checks whether UserOffset (the location of a constant pool
968/// reference) is within MaxDisp of TrialOffset (a proposed location of a
969/// constant pool entry).
970/// UserOffset is computed by getUserOffset above to include PC adjustments. If
971/// the mod 4 alignment of UserOffset is not known, the uncertainty must be
972/// subtracted from MaxDisp instead. CPUser::getMaxDisp() does that.
973bool ARMConstantIslands::isOffsetInRange(unsigned UserOffset,
974                                         unsigned TrialOffset, unsigned MaxDisp,
975                                         bool NegativeOK, bool IsSoImm) {
976  if (UserOffset <= TrialOffset) {
977    // User before the Trial.
978    if (TrialOffset - UserOffset <= MaxDisp)
979      return true;
980    // FIXME: Make use full range of soimm values.
981  } else if (NegativeOK) {
982    if (UserOffset - TrialOffset <= MaxDisp)
983      return true;
984    // FIXME: Make use full range of soimm values.
985  }
986  return false;
987}
988
989/// isWaterInRange - Returns true if a CPE placed after the specified
990/// Water (a basic block) will be in range for the specific MI.
991///
992/// Compute how much the function will grow by inserting a CPE after Water.
993bool ARMConstantIslands::isWaterInRange(unsigned UserOffset,
994                                        MachineBasicBlock* Water, CPUser &U,
995                                        unsigned &Growth) {
996  unsigned CPELogAlign = getCPELogAlign(U.CPEMI);
997  unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign);
998  unsigned NextBlockOffset, NextBlockAlignment;
999  MachineFunction::const_iterator NextBlock = Water;
1000  if (++NextBlock == MF->end()) {
1001    NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
1002    NextBlockAlignment = 0;
1003  } else {
1004    NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset;
1005    NextBlockAlignment = NextBlock->getAlignment();
1006  }
1007  unsigned Size = U.CPEMI->getOperand(2).getImm();
1008  unsigned CPEEnd = CPEOffset + Size;
1009
1010  // The CPE may be able to hide in the alignment padding before the next
1011  // block. It may also cause more padding to be required if it is more aligned
1012  // that the next block.
1013  if (CPEEnd > NextBlockOffset) {
1014    Growth = CPEEnd - NextBlockOffset;
1015    // Compute the padding that would go at the end of the CPE to align the next
1016    // block.
1017    Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment);
1018
1019    // If the CPE is to be inserted before the instruction, that will raise
1020    // the offset of the instruction. Also account for unknown alignment padding
1021    // in blocks between CPE and the user.
1022    if (CPEOffset < UserOffset)
1023      UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign);
1024  } else
1025    // CPE fits in existing padding.
1026    Growth = 0;
1027
1028  return isOffsetInRange(UserOffset, CPEOffset, U);
1029}
1030
1031/// isCPEntryInRange - Returns true if the distance between specific MI and
1032/// specific ConstPool entry instruction can fit in MI's displacement field.
1033bool ARMConstantIslands::isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
1034                                      MachineInstr *CPEMI, unsigned MaxDisp,
1035                                      bool NegOk, bool DoDump) {
1036  unsigned CPEOffset  = getOffsetOf(CPEMI);
1037  assert(CPEOffset % 4 == 0 && "Misaligned CPE");
1038
1039  if (DoDump) {
1040    DEBUG({
1041      unsigned Block = MI->getParent()->getNumber();
1042      const BasicBlockInfo &BBI = BBInfo[Block];
1043      dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
1044             << " max delta=" << MaxDisp
1045             << format(" insn address=%#x", UserOffset)
1046             << " in BB#" << Block << ": "
1047             << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI
1048             << format("CPE address=%#x offset=%+d: ", CPEOffset,
1049                       int(CPEOffset-UserOffset));
1050    });
1051  }
1052
1053  return isOffsetInRange(UserOffset, CPEOffset, MaxDisp, NegOk);
1054}
1055
1056#ifndef NDEBUG
1057/// BBIsJumpedOver - Return true of the specified basic block's only predecessor
1058/// unconditionally branches to its only successor.
1059static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
1060  if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
1061    return false;
1062
1063  MachineBasicBlock *Succ = *MBB->succ_begin();
1064  MachineBasicBlock *Pred = *MBB->pred_begin();
1065  MachineInstr *PredMI = &Pred->back();
1066  if (PredMI->getOpcode() == ARM::B || PredMI->getOpcode() == ARM::tB
1067      || PredMI->getOpcode() == ARM::t2B)
1068    return PredMI->getOperand(0).getMBB() == Succ;
1069  return false;
1070}
1071#endif // NDEBUG
1072
1073void ARMConstantIslands::adjustBBOffsetsAfter(MachineBasicBlock *BB) {
1074  unsigned BBNum = BB->getNumber();
1075  for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) {
1076    // Get the offset and known bits at the end of the layout predecessor.
1077    // Include the alignment of the current block.
1078    unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment();
1079    unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
1080    unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
1081
1082    // This is where block i begins.  Stop if the offset is already correct,
1083    // and we have updated 2 blocks.  This is the maximum number of blocks
1084    // changed before calling this function.
1085    if (i > BBNum + 2 &&
1086        BBInfo[i].Offset == Offset &&
1087        BBInfo[i].KnownBits == KnownBits)
1088      break;
1089
1090    BBInfo[i].Offset = Offset;
1091    BBInfo[i].KnownBits = KnownBits;
1092  }
1093}
1094
1095/// decrementCPEReferenceCount - find the constant pool entry with index CPI
1096/// and instruction CPEMI, and decrement its refcount.  If the refcount
1097/// becomes 0 remove the entry and instruction.  Returns true if we removed
1098/// the entry, false if we didn't.
1099
1100bool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI,
1101                                                    MachineInstr *CPEMI) {
1102  // Find the old entry. Eliminate it if it is no longer used.
1103  CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
1104  assert(CPE && "Unexpected!");
1105  if (--CPE->RefCount == 0) {
1106    removeDeadCPEMI(CPEMI);
1107    CPE->CPEMI = NULL;
1108    --NumCPEs;
1109    return true;
1110  }
1111  return false;
1112}
1113
1114/// LookForCPEntryInRange - see if the currently referenced CPE is in range;
1115/// if not, see if an in-range clone of the CPE is in range, and if so,
1116/// change the data structures so the user references the clone.  Returns:
1117/// 0 = no existing entry found
1118/// 1 = entry found, and there were no code insertions or deletions
1119/// 2 = entry found, and there were code insertions or deletions
1120int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset)
1121{
1122  MachineInstr *UserMI = U.MI;
1123  MachineInstr *CPEMI  = U.CPEMI;
1124
1125  // Check to see if the CPE is already in-range.
1126  if (isCPEntryInRange(UserMI, UserOffset, CPEMI, U.getMaxDisp(), U.NegOk,
1127                       true)) {
1128    DEBUG(dbgs() << "In range\n");
1129    return 1;
1130  }
1131
1132  // No.  Look for previously created clones of the CPE that are in range.
1133  unsigned CPI = CPEMI->getOperand(1).getIndex();
1134  std::vector<CPEntry> &CPEs = CPEntries[CPI];
1135  for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
1136    // We already tried this one
1137    if (CPEs[i].CPEMI == CPEMI)
1138      continue;
1139    // Removing CPEs can leave empty entries, skip
1140    if (CPEs[i].CPEMI == NULL)
1141      continue;
1142    if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(),
1143                     U.NegOk)) {
1144      DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#"
1145                   << CPEs[i].CPI << "\n");
1146      // Point the CPUser node to the replacement
1147      U.CPEMI = CPEs[i].CPEMI;
1148      // Change the CPI in the instruction operand to refer to the clone.
1149      for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j)
1150        if (UserMI->getOperand(j).isCPI()) {
1151          UserMI->getOperand(j).setIndex(CPEs[i].CPI);
1152          break;
1153        }
1154      // Adjust the refcount of the clone...
1155      CPEs[i].RefCount++;
1156      // ...and the original.  If we didn't remove the old entry, none of the
1157      // addresses changed, so we don't need another pass.
1158      return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1;
1159    }
1160  }
1161  return 0;
1162}
1163
1164/// getUnconditionalBrDisp - Returns the maximum displacement that can fit in
1165/// the specific unconditional branch instruction.
1166static inline unsigned getUnconditionalBrDisp(int Opc) {
1167  switch (Opc) {
1168  case ARM::tB:
1169    return ((1<<10)-1)*2;
1170  case ARM::t2B:
1171    return ((1<<23)-1)*2;
1172  default:
1173    break;
1174  }
1175
1176  return ((1<<23)-1)*4;
1177}
1178
1179/// findAvailableWater - Look for an existing entry in the WaterList in which
1180/// we can place the CPE referenced from U so it's within range of U's MI.
1181/// Returns true if found, false if not.  If it returns true, WaterIter
1182/// is set to the WaterList entry.  For Thumb, prefer water that will not
1183/// introduce padding to water that will.  To ensure that this pass
1184/// terminates, the CPE location for a particular CPUser is only allowed to
1185/// move to a lower address, so search backward from the end of the list and
1186/// prefer the first water that is in range.
1187bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
1188                                      water_iterator &WaterIter) {
1189  if (WaterList.empty())
1190    return false;
1191
1192  unsigned BestGrowth = ~0u;
1193  for (water_iterator IP = prior(WaterList.end()), B = WaterList.begin();;
1194       --IP) {
1195    MachineBasicBlock* WaterBB = *IP;
1196    // Check if water is in range and is either at a lower address than the
1197    // current "high water mark" or a new water block that was created since
1198    // the previous iteration by inserting an unconditional branch.  In the
1199    // latter case, we want to allow resetting the high water mark back to
1200    // this new water since we haven't seen it before.  Inserting branches
1201    // should be relatively uncommon and when it does happen, we want to be
1202    // sure to take advantage of it for all the CPEs near that block, so that
1203    // we don't insert more branches than necessary.
1204    unsigned Growth;
1205    if (isWaterInRange(UserOffset, WaterBB, U, Growth) &&
1206        (WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
1207         NewWaterList.count(WaterBB)) && Growth < BestGrowth) {
1208      // This is the least amount of required padding seen so far.
1209      BestGrowth = Growth;
1210      WaterIter = IP;
1211      DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber()
1212                   << " Growth=" << Growth << '\n');
1213
1214      // Keep looking unless it is perfect.
1215      if (BestGrowth == 0)
1216        return true;
1217    }
1218    if (IP == B)
1219      break;
1220  }
1221  return BestGrowth != ~0u;
1222}
1223
1224/// createNewWater - No existing WaterList entry will work for
1225/// CPUsers[CPUserIndex], so create a place to put the CPE.  The end of the
1226/// block is used if in range, and the conditional branch munged so control
1227/// flow is correct.  Otherwise the block is split to create a hole with an
1228/// unconditional branch around it.  In either case NewMBB is set to a
1229/// block following which the new island can be inserted (the WaterList
1230/// is not adjusted).
1231void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
1232                                        unsigned UserOffset,
1233                                        MachineBasicBlock *&NewMBB) {
1234  CPUser &U = CPUsers[CPUserIndex];
1235  MachineInstr *UserMI = U.MI;
1236  MachineInstr *CPEMI  = U.CPEMI;
1237  unsigned CPELogAlign = getCPELogAlign(CPEMI);
1238  MachineBasicBlock *UserMBB = UserMI->getParent();
1239  const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()];
1240
1241  // If the block does not end in an unconditional branch already, and if the
1242  // end of the block is within range, make new water there.  (The addition
1243  // below is for the unconditional branch we will be adding: 4 bytes on ARM +
1244  // Thumb2, 2 on Thumb1.
1245  if (BBHasFallthrough(UserMBB)) {
1246    // Size of branch to insert.
1247    unsigned Delta = isThumb1 ? 2 : 4;
1248    // End of UserBlock after adding a branch.
1249    unsigned UserBlockEnd = UserBBI.postOffset() + Delta;
1250    // Compute the offset where the CPE will begin.
1251    unsigned CPEOffset = WorstCaseAlign(UserBlockEnd, CPELogAlign,
1252                                        UserBBI.postKnownBits());
1253
1254    if (isOffsetInRange(UserOffset, CPEOffset, U)) {
1255      DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber()
1256            << format(", expected CPE offset %#x\n", CPEOffset));
1257      NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
1258      // Add an unconditional branch from UserMBB to fallthrough block.  Record
1259      // it for branch lengthening; this new branch will not get out of range,
1260      // but if the preceding conditional branch is out of range, the targets
1261      // will be exchanged, and the altered branch may be out of range, so the
1262      // machinery has to know about it.
1263      int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
1264      if (!isThumb)
1265        BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
1266      else
1267        BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB)
1268          .addImm(ARMCC::AL).addReg(0);
1269      unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
1270      ImmBranches.push_back(ImmBranch(&UserMBB->back(),
1271                                      MaxDisp, false, UncondBr));
1272      BBInfo[UserMBB->getNumber()].Size += Delta;
1273      adjustBBOffsetsAfter(UserMBB);
1274      return;
1275    }
1276  }
1277
1278  // What a big block.  Find a place within the block to split it.  This is a
1279  // little tricky on Thumb1 since instructions are 2 bytes and constant pool
1280  // entries are 4 bytes: if instruction I references island CPE, and
1281  // instruction I+1 references CPE', it will not work well to put CPE as far
1282  // forward as possible, since then CPE' cannot immediately follow it (that
1283  // location is 2 bytes farther away from I+1 than CPE was from I) and we'd
1284  // need to create a new island.  So, we make a first guess, then walk through
1285  // the instructions between the one currently being looked at and the
1286  // possible insertion point, and make sure any other instructions that
1287  // reference CPEs will be able to use the same island area; if not, we back
1288  // up the insertion point.
1289
1290  // Try to split the block so it's fully aligned.  Compute the latest split
1291  // point where we can add a 4-byte branch instruction, and then
1292  // WorstCaseAlign to LogAlign.
1293  unsigned LogAlign = MF->getAlignment();
1294  assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry");
1295  unsigned KnownBits = UserBBI.internalKnownBits();
1296  unsigned UPad = UnknownPadding(LogAlign, KnownBits);
1297  unsigned BaseInsertOffset = UserOffset + U.getMaxDisp();
1298  DEBUG(dbgs() << format("Split in middle of big block before %#x",
1299                         BaseInsertOffset));
1300
1301  // Account for alignment and unknown padding.
1302  BaseInsertOffset &= ~((1u << LogAlign) - 1);
1303  BaseInsertOffset -= UPad;
1304
1305  // The 4 in the following is for the unconditional branch we'll be inserting
1306  // (allows for long branch on Thumb1).  Alignment of the island is handled
1307  // inside isOffsetInRange.
1308  BaseInsertOffset -= 4;
1309
1310  DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset)
1311               << " la=" << LogAlign
1312               << " kb=" << KnownBits
1313               << " up=" << UPad << '\n');
1314
1315  // This could point off the end of the block if we've already got constant
1316  // pool entries following this block; only the last one is in the water list.
1317  // Back past any possible branches (allow for a conditional and a maximally
1318  // long unconditional).
1319  if (BaseInsertOffset >= BBInfo[UserMBB->getNumber()+1].Offset)
1320    BaseInsertOffset = BBInfo[UserMBB->getNumber()+1].Offset -
1321      (isThumb1 ? 6 : 8);
1322  unsigned EndInsertOffset =
1323    WorstCaseAlign(BaseInsertOffset + 4, LogAlign, KnownBits) +
1324    CPEMI->getOperand(2).getImm();
1325  MachineBasicBlock::iterator MI = UserMI;
1326  ++MI;
1327  unsigned CPUIndex = CPUserIndex+1;
1328  unsigned NumCPUsers = CPUsers.size();
1329  MachineInstr *LastIT = 0;
1330  for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
1331       Offset < BaseInsertOffset;
1332       Offset += TII->GetInstSizeInBytes(MI),
1333       MI = llvm::next(MI)) {
1334    if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
1335      CPUser &U = CPUsers[CPUIndex];
1336      if (!isOffsetInRange(Offset, EndInsertOffset, U)) {
1337        // Shift intertion point by one unit of alignment so it is within reach.
1338        BaseInsertOffset -= 1u << LogAlign;
1339        EndInsertOffset  -= 1u << LogAlign;
1340      }
1341      // This is overly conservative, as we don't account for CPEMIs being
1342      // reused within the block, but it doesn't matter much.  Also assume CPEs
1343      // are added in order with alignment padding.  We may eventually be able
1344      // to pack the aligned CPEs better.
1345      EndInsertOffset = RoundUpToAlignment(EndInsertOffset,
1346                                           1u << getCPELogAlign(U.CPEMI)) +
1347        U.CPEMI->getOperand(2).getImm();
1348      CPUIndex++;
1349    }
1350
1351    // Remember the last IT instruction.
1352    if (MI->getOpcode() == ARM::t2IT)
1353      LastIT = MI;
1354  }
1355
1356  --MI;
1357
1358  // Avoid splitting an IT block.
1359  if (LastIT) {
1360    unsigned PredReg = 0;
1361    ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg);
1362    if (CC != ARMCC::AL)
1363      MI = LastIT;
1364  }
1365  NewMBB = splitBlockBeforeInstr(MI);
1366}
1367
1368/// handleConstantPoolUser - Analyze the specified user, checking to see if it
1369/// is out-of-range.  If so, pick up the constant pool value and move it some
1370/// place in-range.  Return true if we changed any addresses (thus must run
1371/// another pass of branch lengthening), false otherwise.
1372bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
1373  CPUser &U = CPUsers[CPUserIndex];
1374  MachineInstr *UserMI = U.MI;
1375  MachineInstr *CPEMI  = U.CPEMI;
1376  unsigned CPI = CPEMI->getOperand(1).getIndex();
1377  unsigned Size = CPEMI->getOperand(2).getImm();
1378  // Compute this only once, it's expensive.
1379  unsigned UserOffset = getUserOffset(U);
1380
1381  // See if the current entry is within range, or there is a clone of it
1382  // in range.
1383  int result = findInRangeCPEntry(U, UserOffset);
1384  if (result==1) return false;
1385  else if (result==2) return true;
1386
1387  // No existing clone of this CPE is within range.
1388  // We will be generating a new clone.  Get a UID for it.
1389  unsigned ID = AFI->createPICLabelUId();
1390
1391  // Look for water where we can place this CPE.
1392  MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock();
1393  MachineBasicBlock *NewMBB;
1394  water_iterator IP;
1395  if (findAvailableWater(U, UserOffset, IP)) {
1396    DEBUG(dbgs() << "Found water in range\n");
1397    MachineBasicBlock *WaterBB = *IP;
1398
1399    // If the original WaterList entry was "new water" on this iteration,
1400    // propagate that to the new island.  This is just keeping NewWaterList
1401    // updated to match the WaterList, which will be updated below.
1402    if (NewWaterList.count(WaterBB)) {
1403      NewWaterList.erase(WaterBB);
1404      NewWaterList.insert(NewIsland);
1405    }
1406    // The new CPE goes before the following block (NewMBB).
1407    NewMBB = llvm::next(MachineFunction::iterator(WaterBB));
1408
1409  } else {
1410    // No water found.
1411    DEBUG(dbgs() << "No water found\n");
1412    createNewWater(CPUserIndex, UserOffset, NewMBB);
1413
1414    // splitBlockBeforeInstr adds to WaterList, which is important when it is
1415    // called while handling branches so that the water will be seen on the
1416    // next iteration for constant pools, but in this context, we don't want
1417    // it.  Check for this so it will be removed from the WaterList.
1418    // Also remove any entry from NewWaterList.
1419    MachineBasicBlock *WaterBB = prior(MachineFunction::iterator(NewMBB));
1420    IP = std::find(WaterList.begin(), WaterList.end(), WaterBB);
1421    if (IP != WaterList.end())
1422      NewWaterList.erase(WaterBB);
1423
1424    // We are adding new water.  Update NewWaterList.
1425    NewWaterList.insert(NewIsland);
1426  }
1427
1428  // Remove the original WaterList entry; we want subsequent insertions in
1429  // this vicinity to go after the one we're about to insert.  This
1430  // considerably reduces the number of times we have to move the same CPE
1431  // more than once and is also important to ensure the algorithm terminates.
1432  if (IP != WaterList.end())
1433    WaterList.erase(IP);
1434
1435  // Okay, we know we can put an island before NewMBB now, do it!
1436  MF->insert(NewMBB, NewIsland);
1437
1438  // Update internal data structures to account for the newly inserted MBB.
1439  updateForInsertedWaterBlock(NewIsland);
1440
1441  // Decrement the old entry, and remove it if refcount becomes 0.
1442  decrementCPEReferenceCount(CPI, CPEMI);
1443
1444  // Now that we have an island to add the CPE to, clone the original CPE and
1445  // add it to the island.
1446  U.HighWaterMark = NewIsland;
1447  U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
1448                .addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
1449  CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
1450  ++NumCPEs;
1451
1452  // Mark the basic block as aligned as required by the const-pool entry.
1453  NewIsland->setAlignment(getCPELogAlign(U.CPEMI));
1454
1455  // Increase the size of the island block to account for the new entry.
1456  BBInfo[NewIsland->getNumber()].Size += Size;
1457  adjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland)));
1458
1459  // Finally, change the CPI in the instruction operand to be ID.
1460  for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
1461    if (UserMI->getOperand(i).isCPI()) {
1462      UserMI->getOperand(i).setIndex(ID);
1463      break;
1464    }
1465
1466  DEBUG(dbgs() << "  Moved CPE to #" << ID << " CPI=" << CPI
1467        << format(" offset=%#x\n", BBInfo[NewIsland->getNumber()].Offset));
1468
1469  return true;
1470}
1471
1472/// removeDeadCPEMI - Remove a dead constant pool entry instruction. Update
1473/// sizes and offsets of impacted basic blocks.
1474void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
1475  MachineBasicBlock *CPEBB = CPEMI->getParent();
1476  unsigned Size = CPEMI->getOperand(2).getImm();
1477  CPEMI->eraseFromParent();
1478  BBInfo[CPEBB->getNumber()].Size -= Size;
1479  // All succeeding offsets have the current size value added in, fix this.
1480  if (CPEBB->empty()) {
1481    BBInfo[CPEBB->getNumber()].Size = 0;
1482
1483    // This block no longer needs to be aligned. <rdar://problem/10534709>.
1484    CPEBB->setAlignment(0);
1485  } else
1486    // Entries are sorted by descending alignment, so realign from the front.
1487    CPEBB->setAlignment(getCPELogAlign(CPEBB->begin()));
1488
1489  adjustBBOffsetsAfter(CPEBB);
1490  // An island has only one predecessor BB and one successor BB. Check if
1491  // this BB's predecessor jumps directly to this BB's successor. This
1492  // shouldn't happen currently.
1493  assert(!BBIsJumpedOver(CPEBB) && "How did this happen?");
1494  // FIXME: remove the empty blocks after all the work is done?
1495}
1496
1497/// removeUnusedCPEntries - Remove constant pool entries whose refcounts
1498/// are zero.
1499bool ARMConstantIslands::removeUnusedCPEntries() {
1500  unsigned MadeChange = false;
1501  for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
1502      std::vector<CPEntry> &CPEs = CPEntries[i];
1503      for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) {
1504        if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) {
1505          removeDeadCPEMI(CPEs[j].CPEMI);
1506          CPEs[j].CPEMI = NULL;
1507          MadeChange = true;
1508        }
1509      }
1510  }
1511  return MadeChange;
1512}
1513
1514/// isBBInRange - Returns true if the distance between specific MI and
1515/// specific BB can fit in MI's displacement field.
1516bool ARMConstantIslands::isBBInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
1517                                     unsigned MaxDisp) {
1518  unsigned PCAdj      = isThumb ? 4 : 8;
1519  unsigned BrOffset   = getOffsetOf(MI) + PCAdj;
1520  unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
1521
1522  DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
1523               << " from BB#" << MI->getParent()->getNumber()
1524               << " max delta=" << MaxDisp
1525               << " from " << getOffsetOf(MI) << " to " << DestOffset
1526               << " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
1527
1528  if (BrOffset <= DestOffset) {
1529    // Branch before the Dest.
1530    if (DestOffset-BrOffset <= MaxDisp)
1531      return true;
1532  } else {
1533    if (BrOffset-DestOffset <= MaxDisp)
1534      return true;
1535  }
1536  return false;
1537}
1538
1539/// fixupImmediateBr - Fix up an immediate branch whose destination is too far
1540/// away to fit in its displacement field.
1541bool ARMConstantIslands::fixupImmediateBr(ImmBranch &Br) {
1542  MachineInstr *MI = Br.MI;
1543  MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
1544
1545  // Check to see if the DestBB is already in-range.
1546  if (isBBInRange(MI, DestBB, Br.MaxDisp))
1547    return false;
1548
1549  if (!Br.isCond)
1550    return fixupUnconditionalBr(Br);
1551  return fixupConditionalBr(Br);
1552}
1553
1554/// fixupUnconditionalBr - Fix up an unconditional branch whose destination is
1555/// too far away to fit in its displacement field. If the LR register has been
1556/// spilled in the epilogue, then we can use BL to implement a far jump.
1557/// Otherwise, add an intermediate branch instruction to a branch.
1558bool
1559ARMConstantIslands::fixupUnconditionalBr(ImmBranch &Br) {
1560  MachineInstr *MI = Br.MI;
1561  MachineBasicBlock *MBB = MI->getParent();
1562  if (!isThumb1)
1563    llvm_unreachable("fixupUnconditionalBr is Thumb1 only!");
1564
1565  // Use BL to implement far jump.
1566  Br.MaxDisp = (1 << 21) * 2;
1567  MI->setDesc(TII->get(ARM::tBfar));
1568  BBInfo[MBB->getNumber()].Size += 2;
1569  adjustBBOffsetsAfter(MBB);
1570  HasFarJump = true;
1571  ++NumUBrFixed;
1572
1573  DEBUG(dbgs() << "  Changed B to long jump " << *MI);
1574
1575  return true;
1576}
1577
1578/// fixupConditionalBr - Fix up a conditional branch whose destination is too
1579/// far away to fit in its displacement field. It is converted to an inverse
1580/// conditional branch + an unconditional branch to the destination.
1581bool
1582ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
1583  MachineInstr *MI = Br.MI;
1584  MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
1585
1586  // Add an unconditional branch to the destination and invert the branch
1587  // condition to jump over it:
1588  // blt L1
1589  // =>
1590  // bge L2
1591  // b   L1
1592  // L2:
1593  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(1).getImm();
1594  CC = ARMCC::getOppositeCondition(CC);
1595  unsigned CCReg = MI->getOperand(2).getReg();
1596
1597  // If the branch is at the end of its MBB and that has a fall-through block,
1598  // direct the updated conditional branch to the fall-through block. Otherwise,
1599  // split the MBB before the next instruction.
1600  MachineBasicBlock *MBB = MI->getParent();
1601  MachineInstr *BMI = &MBB->back();
1602  bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
1603
1604  ++NumCBrFixed;
1605  if (BMI != MI) {
1606    if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
1607        BMI->getOpcode() == Br.UncondBr) {
1608      // Last MI in the BB is an unconditional branch. Can we simply invert the
1609      // condition and swap destinations:
1610      // beq L1
1611      // b   L2
1612      // =>
1613      // bne L2
1614      // b   L1
1615      MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
1616      if (isBBInRange(MI, NewDest, Br.MaxDisp)) {
1617        DEBUG(dbgs() << "  Invert Bcc condition and swap its destination with "
1618                     << *BMI);
1619        BMI->getOperand(0).setMBB(DestBB);
1620        MI->getOperand(0).setMBB(NewDest);
1621        MI->getOperand(1).setImm(CC);
1622        return true;
1623      }
1624    }
1625  }
1626
1627  if (NeedSplit) {
1628    splitBlockBeforeInstr(MI);
1629    // No need for the branch to the next block. We're adding an unconditional
1630    // branch to the destination.
1631    int delta = TII->GetInstSizeInBytes(&MBB->back());
1632    BBInfo[MBB->getNumber()].Size -= delta;
1633    MBB->back().eraseFromParent();
1634    // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
1635  }
1636  MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
1637
1638  DEBUG(dbgs() << "  Insert B to BB#" << DestBB->getNumber()
1639               << " also invert condition and change dest. to BB#"
1640               << NextBB->getNumber() << "\n");
1641
1642  // Insert a new conditional branch and a new unconditional branch.
1643  // Also update the ImmBranch as well as adding a new entry for the new branch.
1644  BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode()))
1645    .addMBB(NextBB).addImm(CC).addReg(CCReg);
1646  Br.MI = &MBB->back();
1647  BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
1648  if (isThumb)
1649    BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB)
1650            .addImm(ARMCC::AL).addReg(0);
1651  else
1652    BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
1653  BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
1654  unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
1655  ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
1656
1657  // Remove the old conditional branch.  It may or may not still be in MBB.
1658  BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI);
1659  MI->eraseFromParent();
1660  adjustBBOffsetsAfter(MBB);
1661  return true;
1662}
1663
1664/// undoLRSpillRestore - Remove Thumb push / pop instructions that only spills
1665/// LR / restores LR to pc. FIXME: This is done here because it's only possible
1666/// to do this if tBfar is not used.
1667bool ARMConstantIslands::undoLRSpillRestore() {
1668  bool MadeChange = false;
1669  for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) {
1670    MachineInstr *MI = PushPopMIs[i];
1671    // First two operands are predicates.
1672    if (MI->getOpcode() == ARM::tPOP_RET &&
1673        MI->getOperand(2).getReg() == ARM::PC &&
1674        MI->getNumExplicitOperands() == 3) {
1675      // Create the new insn and copy the predicate from the old.
1676      BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET))
1677        .addOperand(MI->getOperand(0))
1678        .addOperand(MI->getOperand(1));
1679      MI->eraseFromParent();
1680      MadeChange = true;
1681    }
1682  }
1683  return MadeChange;
1684}
1685
1686// mayOptimizeThumb2Instruction - Returns true if optimizeThumb2Instructions
1687// below may shrink MI.
1688bool
1689ARMConstantIslands::mayOptimizeThumb2Instruction(const MachineInstr *MI) const {
1690  switch(MI->getOpcode()) {
1691    // optimizeThumb2Instructions.
1692    case ARM::t2LEApcrel:
1693    case ARM::t2LDRpci:
1694    // optimizeThumb2Branches.
1695    case ARM::t2B:
1696    case ARM::t2Bcc:
1697    case ARM::tBcc:
1698    // optimizeThumb2JumpTables.
1699    case ARM::t2BR_JT:
1700      return true;
1701  }
1702  return false;
1703}
1704
1705bool ARMConstantIslands::optimizeThumb2Instructions() {
1706  bool MadeChange = false;
1707
1708  // Shrink ADR and LDR from constantpool.
1709  for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
1710    CPUser &U = CPUsers[i];
1711    unsigned Opcode = U.MI->getOpcode();
1712    unsigned NewOpc = 0;
1713    unsigned Scale = 1;
1714    unsigned Bits = 0;
1715    switch (Opcode) {
1716    default: break;
1717    case ARM::t2LEApcrel:
1718      if (isARMLowRegister(U.MI->getOperand(0).getReg())) {
1719        NewOpc = ARM::tLEApcrel;
1720        Bits = 8;
1721        Scale = 4;
1722      }
1723      break;
1724    case ARM::t2LDRpci:
1725      if (isARMLowRegister(U.MI->getOperand(0).getReg())) {
1726        NewOpc = ARM::tLDRpci;
1727        Bits = 8;
1728        Scale = 4;
1729      }
1730      break;
1731    }
1732
1733    if (!NewOpc)
1734      continue;
1735
1736    unsigned UserOffset = getUserOffset(U);
1737    unsigned MaxOffs = ((1 << Bits) - 1) * Scale;
1738
1739    // Be conservative with inline asm.
1740    if (!U.KnownAlignment)
1741      MaxOffs -= 2;
1742
1743    // FIXME: Check if offset is multiple of scale if scale is not 4.
1744    if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) {
1745      U.MI->setDesc(TII->get(NewOpc));
1746      MachineBasicBlock *MBB = U.MI->getParent();
1747      BBInfo[MBB->getNumber()].Size -= 2;
1748      adjustBBOffsetsAfter(MBB);
1749      ++NumT2CPShrunk;
1750      MadeChange = true;
1751    }
1752  }
1753
1754  MadeChange |= optimizeThumb2Branches();
1755  MadeChange |= optimizeThumb2JumpTables();
1756  return MadeChange;
1757}
1758
1759bool ARMConstantIslands::optimizeThumb2Branches() {
1760  bool MadeChange = false;
1761
1762  for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) {
1763    ImmBranch &Br = ImmBranches[i];
1764    unsigned Opcode = Br.MI->getOpcode();
1765    unsigned NewOpc = 0;
1766    unsigned Scale = 1;
1767    unsigned Bits = 0;
1768    switch (Opcode) {
1769    default: break;
1770    case ARM::t2B:
1771      NewOpc = ARM::tB;
1772      Bits = 11;
1773      Scale = 2;
1774      break;
1775    case ARM::t2Bcc: {
1776      NewOpc = ARM::tBcc;
1777      Bits = 8;
1778      Scale = 2;
1779      break;
1780    }
1781    }
1782    if (NewOpc) {
1783      unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
1784      MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
1785      if (isBBInRange(Br.MI, DestBB, MaxOffs)) {
1786        Br.MI->setDesc(TII->get(NewOpc));
1787        MachineBasicBlock *MBB = Br.MI->getParent();
1788        BBInfo[MBB->getNumber()].Size -= 2;
1789        adjustBBOffsetsAfter(MBB);
1790        ++NumT2BrShrunk;
1791        MadeChange = true;
1792      }
1793    }
1794
1795    Opcode = Br.MI->getOpcode();
1796    if (Opcode != ARM::tBcc)
1797      continue;
1798
1799    // If the conditional branch doesn't kill CPSR, then CPSR can be liveout
1800    // so this transformation is not safe.
1801    if (!Br.MI->killsRegister(ARM::CPSR))
1802      continue;
1803
1804    NewOpc = 0;
1805    unsigned PredReg = 0;
1806    ARMCC::CondCodes Pred = getInstrPredicate(Br.MI, PredReg);
1807    if (Pred == ARMCC::EQ)
1808      NewOpc = ARM::tCBZ;
1809    else if (Pred == ARMCC::NE)
1810      NewOpc = ARM::tCBNZ;
1811    if (!NewOpc)
1812      continue;
1813    MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
1814    // Check if the distance is within 126. Subtract starting offset by 2
1815    // because the cmp will be eliminated.
1816    unsigned BrOffset = getOffsetOf(Br.MI) + 4 - 2;
1817    unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
1818    if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) {
1819      MachineBasicBlock::iterator CmpMI = Br.MI;
1820      if (CmpMI != Br.MI->getParent()->begin()) {
1821        --CmpMI;
1822        if (CmpMI->getOpcode() == ARM::tCMPi8) {
1823          unsigned Reg = CmpMI->getOperand(0).getReg();
1824          Pred = getInstrPredicate(CmpMI, PredReg);
1825          if (Pred == ARMCC::AL &&
1826              CmpMI->getOperand(1).getImm() == 0 &&
1827              isARMLowRegister(Reg)) {
1828            MachineBasicBlock *MBB = Br.MI->getParent();
1829            MachineInstr *NewBR =
1830              BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc))
1831              .addReg(Reg).addMBB(DestBB,Br.MI->getOperand(0).getTargetFlags());
1832            CmpMI->eraseFromParent();
1833            Br.MI->eraseFromParent();
1834            Br.MI = NewBR;
1835            BBInfo[MBB->getNumber()].Size -= 2;
1836            adjustBBOffsetsAfter(MBB);
1837            ++NumCBZ;
1838            MadeChange = true;
1839          }
1840        }
1841      }
1842    }
1843  }
1844
1845  return MadeChange;
1846}
1847
1848/// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
1849/// jumptables when it's possible.
1850bool ARMConstantIslands::optimizeThumb2JumpTables() {
1851  bool MadeChange = false;
1852
1853  // FIXME: After the tables are shrunk, can we get rid some of the
1854  // constantpool tables?
1855  MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
1856  if (MJTI == 0) return false;
1857
1858  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
1859  for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
1860    MachineInstr *MI = T2JumpTables[i];
1861    const MCInstrDesc &MCID = MI->getDesc();
1862    unsigned NumOps = MCID.getNumOperands();
1863    unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2);
1864    MachineOperand JTOP = MI->getOperand(JTOpIdx);
1865    unsigned JTI = JTOP.getIndex();
1866    assert(JTI < JT.size());
1867
1868    bool ByteOk = true;
1869    bool HalfWordOk = true;
1870    unsigned JTOffset = getOffsetOf(MI) + 4;
1871    const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
1872    for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
1873      MachineBasicBlock *MBB = JTBBs[j];
1874      unsigned DstOffset = BBInfo[MBB->getNumber()].Offset;
1875      // Negative offset is not ok. FIXME: We should change BB layout to make
1876      // sure all the branches are forward.
1877      if (ByteOk && (DstOffset - JTOffset) > ((1<<8)-1)*2)
1878        ByteOk = false;
1879      unsigned TBHLimit = ((1<<16)-1)*2;
1880      if (HalfWordOk && (DstOffset - JTOffset) > TBHLimit)
1881        HalfWordOk = false;
1882      if (!ByteOk && !HalfWordOk)
1883        break;
1884    }
1885
1886    if (ByteOk || HalfWordOk) {
1887      MachineBasicBlock *MBB = MI->getParent();
1888      unsigned BaseReg = MI->getOperand(0).getReg();
1889      bool BaseRegKill = MI->getOperand(0).isKill();
1890      if (!BaseRegKill)
1891        continue;
1892      unsigned IdxReg = MI->getOperand(1).getReg();
1893      bool IdxRegKill = MI->getOperand(1).isKill();
1894
1895      // Scan backwards to find the instruction that defines the base
1896      // register. Due to post-RA scheduling, we can't count on it
1897      // immediately preceding the branch instruction.
1898      MachineBasicBlock::iterator PrevI = MI;
1899      MachineBasicBlock::iterator B = MBB->begin();
1900      while (PrevI != B && !PrevI->definesRegister(BaseReg))
1901        --PrevI;
1902
1903      // If for some reason we didn't find it, we can't do anything, so
1904      // just skip this one.
1905      if (!PrevI->definesRegister(BaseReg))
1906        continue;
1907
1908      MachineInstr *AddrMI = PrevI;
1909      bool OptOk = true;
1910      // Examine the instruction that calculates the jumptable entry address.
1911      // Make sure it only defines the base register and kills any uses
1912      // other than the index register.
1913      for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) {
1914        const MachineOperand &MO = AddrMI->getOperand(k);
1915        if (!MO.isReg() || !MO.getReg())
1916          continue;
1917        if (MO.isDef() && MO.getReg() != BaseReg) {
1918          OptOk = false;
1919          break;
1920        }
1921        if (MO.isUse() && !MO.isKill() && MO.getReg() != IdxReg) {
1922          OptOk = false;
1923          break;
1924        }
1925      }
1926      if (!OptOk)
1927        continue;
1928
1929      // Now scan back again to find the tLEApcrel or t2LEApcrelJT instruction
1930      // that gave us the initial base register definition.
1931      for (--PrevI; PrevI != B && !PrevI->definesRegister(BaseReg); --PrevI)
1932        ;
1933
1934      // The instruction should be a tLEApcrel or t2LEApcrelJT; we want
1935      // to delete it as well.
1936      MachineInstr *LeaMI = PrevI;
1937      if ((LeaMI->getOpcode() != ARM::tLEApcrelJT &&
1938           LeaMI->getOpcode() != ARM::t2LEApcrelJT) ||
1939          LeaMI->getOperand(0).getReg() != BaseReg)
1940        OptOk = false;
1941
1942      if (!OptOk)
1943        continue;
1944
1945      unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
1946      MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc))
1947        .addReg(IdxReg, getKillRegState(IdxRegKill))
1948        .addJumpTableIndex(JTI, JTOP.getTargetFlags())
1949        .addImm(MI->getOperand(JTOpIdx+1).getImm());
1950      // FIXME: Insert an "ALIGN" instruction to ensure the next instruction
1951      // is 2-byte aligned. For now, asm printer will fix it up.
1952      unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
1953      unsigned OrigSize = TII->GetInstSizeInBytes(AddrMI);
1954      OrigSize += TII->GetInstSizeInBytes(LeaMI);
1955      OrigSize += TII->GetInstSizeInBytes(MI);
1956
1957      AddrMI->eraseFromParent();
1958      LeaMI->eraseFromParent();
1959      MI->eraseFromParent();
1960
1961      int delta = OrigSize - NewSize;
1962      BBInfo[MBB->getNumber()].Size -= delta;
1963      adjustBBOffsetsAfter(MBB);
1964
1965      ++NumTBs;
1966      MadeChange = true;
1967    }
1968  }
1969
1970  return MadeChange;
1971}
1972
1973/// reorderThumb2JumpTables - Adjust the function's block layout to ensure that
1974/// jump tables always branch forwards, since that's what tbb and tbh need.
1975bool ARMConstantIslands::reorderThumb2JumpTables() {
1976  bool MadeChange = false;
1977
1978  MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
1979  if (MJTI == 0) return false;
1980
1981  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
1982  for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
1983    MachineInstr *MI = T2JumpTables[i];
1984    const MCInstrDesc &MCID = MI->getDesc();
1985    unsigned NumOps = MCID.getNumOperands();
1986    unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2);
1987    MachineOperand JTOP = MI->getOperand(JTOpIdx);
1988    unsigned JTI = JTOP.getIndex();
1989    assert(JTI < JT.size());
1990
1991    // We prefer if target blocks for the jump table come after the jump
1992    // instruction so we can use TB[BH]. Loop through the target blocks
1993    // and try to adjust them such that that's true.
1994    int JTNumber = MI->getParent()->getNumber();
1995    const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
1996    for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
1997      MachineBasicBlock *MBB = JTBBs[j];
1998      int DTNumber = MBB->getNumber();
1999
2000      if (DTNumber < JTNumber) {
2001        // The destination precedes the switch. Try to move the block forward
2002        // so we have a positive offset.
2003        MachineBasicBlock *NewBB =
2004          adjustJTTargetBlockForward(MBB, MI->getParent());
2005        if (NewBB)
2006          MJTI->ReplaceMBBInJumpTable(JTI, JTBBs[j], NewBB);
2007        MadeChange = true;
2008      }
2009    }
2010  }
2011
2012  return MadeChange;
2013}
2014
2015MachineBasicBlock *ARMConstantIslands::
2016adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
2017  // If the destination block is terminated by an unconditional branch,
2018  // try to move it; otherwise, create a new block following the jump
2019  // table that branches back to the actual target. This is a very simple
2020  // heuristic. FIXME: We can definitely improve it.
2021  MachineBasicBlock *TBB = 0, *FBB = 0;
2022  SmallVector<MachineOperand, 4> Cond;
2023  SmallVector<MachineOperand, 4> CondPrior;
2024  MachineFunction::iterator BBi = BB;
2025  MachineFunction::iterator OldPrior = prior(BBi);
2026
2027  // If the block terminator isn't analyzable, don't try to move the block
2028  bool B = TII->AnalyzeBranch(*BB, TBB, FBB, Cond);
2029
2030  // If the block ends in an unconditional branch, move it. The prior block
2031  // has to have an analyzable terminator for us to move this one. Be paranoid
2032  // and make sure we're not trying to move the entry block of the function.
2033  if (!B && Cond.empty() && BB != MF->begin() &&
2034      !TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) {
2035    BB->moveAfter(JTBB);
2036    OldPrior->updateTerminator();
2037    BB->updateTerminator();
2038    // Update numbering to account for the block being moved.
2039    MF->RenumberBlocks();
2040    ++NumJTMoved;
2041    return NULL;
2042  }
2043
2044  // Create a new MBB for the code after the jump BB.
2045  MachineBasicBlock *NewBB =
2046    MF->CreateMachineBasicBlock(JTBB->getBasicBlock());
2047  MachineFunction::iterator MBBI = JTBB; ++MBBI;
2048  MF->insert(MBBI, NewBB);
2049
2050  // Add an unconditional branch from NewBB to BB.
2051  // There doesn't seem to be meaningful DebugInfo available; this doesn't
2052  // correspond directly to anything in the source.
2053  assert (isThumb2 && "Adjusting for TB[BH] but not in Thumb2?");
2054  BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)).addMBB(BB)
2055          .addImm(ARMCC::AL).addReg(0);
2056
2057  // Update internal data structures to account for the newly inserted MBB.
2058  MF->RenumberBlocks(NewBB);
2059
2060  // Update the CFG.
2061  NewBB->addSuccessor(BB);
2062  JTBB->removeSuccessor(BB);
2063  JTBB->addSuccessor(NewBB);
2064
2065  ++NumJTInserted;
2066  return NewBB;
2067}
2068