ARMConstantIslandPass.cpp revision 034de5f65f9139954cb01d94eaebbaefd294946e
1//===-- ARMConstantIslandPass.cpp - ARM constant islands --------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a pass that splits the constant pool up into 'islands'
11// which are scattered through-out the function.  This is required due to the
12// limited pc-relative displacements that ARM has.
13//
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "arm-cp-islands"
17#include "ARM.h"
18#include "ARMAddressingModes.h"
19#include "ARMMachineFunctionInfo.h"
20#include "ARMInstrInfo.h"
21#include "llvm/CodeGen/MachineConstantPool.h"
22#include "llvm/CodeGen/MachineFunctionPass.h"
23#include "llvm/CodeGen/MachineInstrBuilder.h"
24#include "llvm/CodeGen/MachineJumpTableInfo.h"
25#include "llvm/Target/TargetData.h"
26#include "llvm/Target/TargetMachine.h"
27#include "llvm/Support/Compiler.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/ErrorHandling.h"
30#include "llvm/Support/raw_ostream.h"
31#include "llvm/ADT/SmallVector.h"
32#include "llvm/ADT/STLExtras.h"
33#include "llvm/ADT/Statistic.h"
34using namespace llvm;
35
36STATISTIC(NumCPEs,       "Number of constpool entries");
37STATISTIC(NumSplit,      "Number of uncond branches inserted");
38STATISTIC(NumCBrFixed,   "Number of cond branches fixed");
39STATISTIC(NumUBrFixed,   "Number of uncond branches fixed");
40STATISTIC(NumTBs,        "Number of table branches generated");
41STATISTIC(NumT2CPShrunk, "Number of Thumb2 constantpool instructions shrunk");
42STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk");
43
44namespace {
45  /// ARMConstantIslands - Due to limited PC-relative displacements, ARM
46  /// requires constant pool entries to be scattered among the instructions
47  /// inside a function.  To do this, it completely ignores the normal LLVM
48  /// constant pool; instead, it places constants wherever it feels like with
49  /// special instructions.
50  ///
51  /// The terminology used in this pass includes:
52  ///   Islands - Clumps of constants placed in the function.
53  ///   Water   - Potential places where an island could be formed.
54  ///   CPE     - A constant pool entry that has been placed somewhere, which
55  ///             tracks a list of users.
56  class VISIBILITY_HIDDEN ARMConstantIslands : public MachineFunctionPass {
57    /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed
58    /// by MBB Number.  The two-byte pads required for Thumb alignment are
59    /// counted as part of the following block (i.e., the offset and size for
60    /// a padded block will both be ==2 mod 4).
61    std::vector<unsigned> BBSizes;
62
63    /// BBOffsets - the offset of each MBB in bytes, starting from 0.
64    /// The two-byte pads required for Thumb alignment are counted as part of
65    /// the following block.
66    std::vector<unsigned> BBOffsets;
67
68    /// WaterList - A sorted list of basic blocks where islands could be placed
69    /// (i.e. blocks that don't fall through to the following block, due
70    /// to a return, unreachable, or unconditional branch).
71    std::vector<MachineBasicBlock*> WaterList;
72
73    typedef std::vector<MachineBasicBlock*>::iterator water_iterator;
74
75    /// CPUser - One user of a constant pool, keeping the machine instruction
76    /// pointer, the constant pool being referenced, and the max displacement
77    /// allowed from the instruction to the CP.
78    struct CPUser {
79      MachineInstr *MI;
80      MachineInstr *CPEMI;
81      unsigned MaxDisp;
82      bool NegOk;
83      bool IsSoImm;
84      CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp,
85             bool neg, bool soimm)
86        : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm) {}
87    };
88
89    /// CPUsers - Keep track of all of the machine instructions that use various
90    /// constant pools and their max displacement.
91    std::vector<CPUser> CPUsers;
92
93    /// CPEntry - One per constant pool entry, keeping the machine instruction
94    /// pointer, the constpool index, and the number of CPUser's which
95    /// reference this entry.
96    struct CPEntry {
97      MachineInstr *CPEMI;
98      unsigned CPI;
99      unsigned RefCount;
100      CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0)
101        : CPEMI(cpemi), CPI(cpi), RefCount(rc) {}
102    };
103
104    /// CPEntries - Keep track of all of the constant pool entry machine
105    /// instructions. For each original constpool index (i.e. those that
106    /// existed upon entry to this pass), it keeps a vector of entries.
107    /// Original elements are cloned as we go along; the clones are
108    /// put in the vector of the original element, but have distinct CPIs.
109    std::vector<std::vector<CPEntry> > CPEntries;
110
111    /// ImmBranch - One per immediate branch, keeping the machine instruction
112    /// pointer, conditional or unconditional, the max displacement,
113    /// and (if isCond is true) the corresponding unconditional branch
114    /// opcode.
115    struct ImmBranch {
116      MachineInstr *MI;
117      unsigned MaxDisp : 31;
118      bool isCond : 1;
119      int UncondBr;
120      ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, int ubr)
121        : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {}
122    };
123
124    /// ImmBranches - Keep track of all the immediate branch instructions.
125    ///
126    std::vector<ImmBranch> ImmBranches;
127
128    /// PushPopMIs - Keep track of all the Thumb push / pop instructions.
129    ///
130    SmallVector<MachineInstr*, 4> PushPopMIs;
131
132    /// T2JumpTables - Keep track of all the Thumb2 jumptable instructions.
133    SmallVector<MachineInstr*, 4> T2JumpTables;
134
135    /// HasFarJump - True if any far jump instruction has been emitted during
136    /// the branch fix up pass.
137    bool HasFarJump;
138
139    const TargetInstrInfo *TII;
140    const ARMSubtarget *STI;
141    ARMFunctionInfo *AFI;
142    bool isThumb;
143    bool isThumb1;
144    bool isThumb2;
145  public:
146    static char ID;
147    ARMConstantIslands() : MachineFunctionPass(&ID) {}
148
149    virtual bool runOnMachineFunction(MachineFunction &MF);
150
151    virtual const char *getPassName() const {
152      return "ARM constant island placement and branch shortening pass";
153    }
154
155  private:
156    void DoInitialPlacement(MachineFunction &MF,
157                            std::vector<MachineInstr*> &CPEMIs);
158    CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
159    void InitialFunctionScan(MachineFunction &MF,
160                             const std::vector<MachineInstr*> &CPEMIs);
161    MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI);
162    void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB);
163    void AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta);
164    bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI);
165    int LookForExistingCPEntry(CPUser& U, unsigned UserOffset);
166    bool LookForWater(CPUser&U, unsigned UserOffset,
167                      MachineBasicBlock** NewMBB);
168    MachineBasicBlock* AcceptWater(MachineBasicBlock *WaterBB,
169                                   water_iterator IP);
170    void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset,
171                      MachineBasicBlock** NewMBB);
172    bool HandleConstantPoolUser(MachineFunction &MF, unsigned CPUserIndex);
173    void RemoveDeadCPEMI(MachineInstr *CPEMI);
174    bool RemoveUnusedCPEntries();
175    bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
176                      MachineInstr *CPEMI, unsigned Disp, bool NegOk,
177                      bool DoDump = false);
178    bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water,
179                        CPUser &U);
180    bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset,
181                         unsigned Disp, bool NegativeOK, bool IsSoImm = false);
182    bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
183    bool FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br);
184    bool FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br);
185    bool FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br);
186    bool UndoLRSpillRestore();
187    bool OptimizeThumb2Instructions(MachineFunction &MF);
188    bool OptimizeThumb2Branches(MachineFunction &MF);
189    bool OptimizeThumb2JumpTables(MachineFunction &MF);
190
191    unsigned GetOffsetOf(MachineInstr *MI) const;
192    void dumpBBs();
193    void verify(MachineFunction &MF);
194  };
195  char ARMConstantIslands::ID = 0;
196}
197
198/// verify - check BBOffsets, BBSizes, alignment of islands
199void ARMConstantIslands::verify(MachineFunction &MF) {
200  assert(BBOffsets.size() == BBSizes.size());
201  for (unsigned i = 1, e = BBOffsets.size(); i != e; ++i)
202    assert(BBOffsets[i-1]+BBSizes[i-1] == BBOffsets[i]);
203  if (!isThumb)
204    return;
205#ifndef NDEBUG
206  for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
207       MBBI != E; ++MBBI) {
208    MachineBasicBlock *MBB = MBBI;
209    if (!MBB->empty() &&
210        MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
211      unsigned MBBId = MBB->getNumber();
212      assert((BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) ||
213             (BBOffsets[MBBId]%4 != 0 && BBSizes[MBBId]%4 != 0));
214    }
215  }
216#endif
217}
218
219/// print block size and offset information - debugging
220void ARMConstantIslands::dumpBBs() {
221  for (unsigned J = 0, E = BBOffsets.size(); J !=E; ++J) {
222    DEBUG(errs() << "block " << J << " offset " << BBOffsets[J]
223                 << " size " << BBSizes[J] << "\n");
224  }
225}
226
227/// createARMConstantIslandPass - returns an instance of the constpool
228/// island pass.
229FunctionPass *llvm::createARMConstantIslandPass() {
230  return new ARMConstantIslands();
231}
232
233bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
234  MachineConstantPool &MCP = *MF.getConstantPool();
235
236  TII = MF.getTarget().getInstrInfo();
237  AFI = MF.getInfo<ARMFunctionInfo>();
238  STI = &MF.getTarget().getSubtarget<ARMSubtarget>();
239
240  isThumb = AFI->isThumbFunction();
241  isThumb1 = AFI->isThumb1OnlyFunction();
242  isThumb2 = AFI->isThumb2Function();
243
244  HasFarJump = false;
245
246  // Renumber all of the machine basic blocks in the function, guaranteeing that
247  // the numbers agree with the position of the block in the function.
248  MF.RenumberBlocks();
249
250  // Thumb1 functions containing constant pools get 4-byte alignment.
251  // This is so we can keep exact track of where the alignment padding goes.
252
253  // Set default. Thumb1 function is 2-byte aligned, ARM and Thumb2 are 4-byte
254  // aligned.
255  AFI->setAlign(isThumb1 ? 1U : 2U);
256
257  // Perform the initial placement of the constant pool entries.  To start with,
258  // we put them all at the end of the function.
259  std::vector<MachineInstr*> CPEMIs;
260  if (!MCP.isEmpty()) {
261    DoInitialPlacement(MF, CPEMIs);
262    if (isThumb1)
263      AFI->setAlign(2U);
264  }
265
266  /// The next UID to take is the first unused one.
267  AFI->initConstPoolEntryUId(CPEMIs.size());
268
269  // Do the initial scan of the function, building up information about the
270  // sizes of each block, the location of all the water, and finding all of the
271  // constant pool users.
272  InitialFunctionScan(MF, CPEMIs);
273  CPEMIs.clear();
274
275  /// Remove dead constant pool entries.
276  RemoveUnusedCPEntries();
277
278  // Iteratively place constant pool entries and fix up branches until there
279  // is no change.
280  bool MadeChange = false;
281  unsigned NoCPIters = 0, NoBRIters = 0;
282  while (true) {
283    bool CPChange = false;
284    for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
285      CPChange |= HandleConstantPoolUser(MF, i);
286    if (CPChange && ++NoCPIters > 30)
287      llvm_unreachable("Constant Island pass failed to converge!");
288    DEBUG(dumpBBs());
289
290    bool BRChange = false;
291    for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
292      BRChange |= FixUpImmediateBr(MF, ImmBranches[i]);
293    if (BRChange && ++NoBRIters > 30)
294      llvm_unreachable("Branch Fix Up pass failed to converge!");
295    DEBUG(dumpBBs());
296
297    if (!CPChange && !BRChange)
298      break;
299    MadeChange = true;
300  }
301
302  // Shrink 32-bit Thumb2 branch, load, and store instructions.
303  if (isThumb2)
304    MadeChange |= OptimizeThumb2Instructions(MF);
305
306  // After a while, this might be made debug-only, but it is not expensive.
307  verify(MF);
308
309  // If LR has been forced spilled and no far jumps (i.e. BL) has been issued.
310  // Undo the spill / restore of LR if possible.
311  if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump())
312    MadeChange |= UndoLRSpillRestore();
313
314  BBSizes.clear();
315  BBOffsets.clear();
316  WaterList.clear();
317  CPUsers.clear();
318  CPEntries.clear();
319  ImmBranches.clear();
320  PushPopMIs.clear();
321  T2JumpTables.clear();
322
323  return MadeChange;
324}
325
326/// DoInitialPlacement - Perform the initial placement of the constant pool
327/// entries.  To start with, we put them all at the end of the function.
328void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF,
329                                        std::vector<MachineInstr*> &CPEMIs) {
330  // Create the basic block to hold the CPE's.
331  MachineBasicBlock *BB = MF.CreateMachineBasicBlock();
332  MF.push_back(BB);
333
334  // Add all of the constants from the constant pool to the end block, use an
335  // identity mapping of CPI's to CPE's.
336  const std::vector<MachineConstantPoolEntry> &CPs =
337    MF.getConstantPool()->getConstants();
338
339  const TargetData &TD = *MF.getTarget().getTargetData();
340  for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
341    unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
342    // Verify that all constant pool entries are a multiple of 4 bytes.  If not,
343    // we would have to pad them out or something so that instructions stay
344    // aligned.
345    assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!");
346    MachineInstr *CPEMI =
347      BuildMI(BB, DebugLoc::getUnknownLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
348                           .addImm(i).addConstantPoolIndex(i).addImm(Size);
349    CPEMIs.push_back(CPEMI);
350
351    // Add a new CPEntry, but no corresponding CPUser yet.
352    std::vector<CPEntry> CPEs;
353    CPEs.push_back(CPEntry(CPEMI, i));
354    CPEntries.push_back(CPEs);
355    NumCPEs++;
356    DEBUG(errs() << "Moved CPI#" << i << " to end of function as #" << i
357                 << "\n");
358  }
359}
360
361/// BBHasFallthrough - Return true if the specified basic block can fallthrough
362/// into the block immediately after it.
363static bool BBHasFallthrough(MachineBasicBlock *MBB) {
364  // Get the next machine basic block in the function.
365  MachineFunction::iterator MBBI = MBB;
366  if (next(MBBI) == MBB->getParent()->end())  // Can't fall off end of function.
367    return false;
368
369  MachineBasicBlock *NextBB = next(MBBI);
370  for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
371       E = MBB->succ_end(); I != E; ++I)
372    if (*I == NextBB)
373      return true;
374
375  return false;
376}
377
378/// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI,
379/// look up the corresponding CPEntry.
380ARMConstantIslands::CPEntry
381*ARMConstantIslands::findConstPoolEntry(unsigned CPI,
382                                        const MachineInstr *CPEMI) {
383  std::vector<CPEntry> &CPEs = CPEntries[CPI];
384  // Number of entries per constpool index should be small, just do a
385  // linear search.
386  for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
387    if (CPEs[i].CPEMI == CPEMI)
388      return &CPEs[i];
389  }
390  return NULL;
391}
392
393/// InitialFunctionScan - Do the initial scan of the function, building up
394/// information about the sizes of each block, the location of all the water,
395/// and finding all of the constant pool users.
396void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
397                                 const std::vector<MachineInstr*> &CPEMIs) {
398  unsigned Offset = 0;
399  for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
400       MBBI != E; ++MBBI) {
401    MachineBasicBlock &MBB = *MBBI;
402
403    // If this block doesn't fall through into the next MBB, then this is
404    // 'water' that a constant pool island could be placed.
405    if (!BBHasFallthrough(&MBB))
406      WaterList.push_back(&MBB);
407
408    unsigned MBBSize = 0;
409    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
410         I != E; ++I) {
411      // Add instruction size to MBBSize.
412      MBBSize += TII->GetInstSizeInBytes(I);
413
414      int Opc = I->getOpcode();
415      if (I->getDesc().isBranch()) {
416        bool isCond = false;
417        unsigned Bits = 0;
418        unsigned Scale = 1;
419        int UOpc = Opc;
420        switch (Opc) {
421        default:
422          continue;  // Ignore other JT branches
423        case ARM::tBR_JTr:
424          // A Thumb1 table jump may involve padding; for the offsets to
425          // be right, functions containing these must be 4-byte aligned.
426          AFI->setAlign(2U);
427          if ((Offset+MBBSize)%4 != 0)
428            // FIXME: Add a pseudo ALIGN instruction instead.
429            MBBSize += 2;           // padding
430          continue;   // Does not get an entry in ImmBranches
431        case ARM::t2BR_JT:
432          T2JumpTables.push_back(I);
433          continue;   // Does not get an entry in ImmBranches
434        case ARM::Bcc:
435          isCond = true;
436          UOpc = ARM::B;
437          // Fallthrough
438        case ARM::B:
439          Bits = 24;
440          Scale = 4;
441          break;
442        case ARM::tBcc:
443          isCond = true;
444          UOpc = ARM::tB;
445          Bits = 8;
446          Scale = 2;
447          break;
448        case ARM::tB:
449          Bits = 11;
450          Scale = 2;
451          break;
452        case ARM::t2Bcc:
453          isCond = true;
454          UOpc = ARM::t2B;
455          Bits = 20;
456          Scale = 2;
457          break;
458        case ARM::t2B:
459          Bits = 24;
460          Scale = 2;
461          break;
462        }
463
464        // Record this immediate branch.
465        unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
466        ImmBranches.push_back(ImmBranch(I, MaxOffs, isCond, UOpc));
467      }
468
469      if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET)
470        PushPopMIs.push_back(I);
471
472      if (Opc == ARM::CONSTPOOL_ENTRY)
473        continue;
474
475      // Scan the instructions for constant pool operands.
476      for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
477        if (I->getOperand(op).isCPI()) {
478          // We found one.  The addressing mode tells us the max displacement
479          // from the PC that this instruction permits.
480
481          // Basic size info comes from the TSFlags field.
482          unsigned Bits = 0;
483          unsigned Scale = 1;
484          bool NegOk = false;
485          bool IsSoImm = false;
486
487          switch (Opc) {
488          default:
489            llvm_unreachable("Unknown addressing mode for CP reference!");
490            break;
491
492          // Taking the address of a CP entry.
493          case ARM::LEApcrel:
494            // This takes a SoImm, which is 8 bit immediate rotated. We'll
495            // pretend the maximum offset is 255 * 4. Since each instruction
496            // 4 byte wide, this is always correct. We'llc heck for other
497            // displacements that fits in a SoImm as well.
498            Bits = 8;
499            Scale = 4;
500            NegOk = true;
501            IsSoImm = true;
502            break;
503          case ARM::t2LEApcrel:
504            Bits = 12;
505            NegOk = true;
506            break;
507          case ARM::tLEApcrel:
508            Bits = 8;
509            Scale = 4;
510            break;
511
512          case ARM::LDR:
513          case ARM::LDRcp:
514          case ARM::t2LDRpci:
515            Bits = 12;  // +-offset_12
516            NegOk = true;
517            break;
518
519          case ARM::tLDRpci:
520          case ARM::tLDRcp:
521            Bits = 8;
522            Scale = 4;  // +(offset_8*4)
523            break;
524
525          case ARM::FLDD:
526          case ARM::FLDS:
527            Bits = 8;
528            Scale = 4;  // +-(offset_8*4)
529            NegOk = true;
530            break;
531          }
532
533          // Remember that this is a user of a CP entry.
534          unsigned CPI = I->getOperand(op).getIndex();
535          MachineInstr *CPEMI = CPEMIs[CPI];
536          unsigned MaxOffs = ((1 << Bits)-1) * Scale;
537          CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk, IsSoImm));
538
539          // Increment corresponding CPEntry reference count.
540          CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
541          assert(CPE && "Cannot find a corresponding CPEntry!");
542          CPE->RefCount++;
543
544          // Instructions can only use one CP entry, don't bother scanning the
545          // rest of the operands.
546          break;
547        }
548    }
549
550    // In thumb mode, if this block is a constpool island, we may need padding
551    // so it's aligned on 4 byte boundary.
552    if (isThumb &&
553        !MBB.empty() &&
554        MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY &&
555        (Offset%4) != 0)
556      MBBSize += 2;
557
558    BBSizes.push_back(MBBSize);
559    BBOffsets.push_back(Offset);
560    Offset += MBBSize;
561  }
562}
563
564/// GetOffsetOf - Return the current offset of the specified machine instruction
565/// from the start of the function.  This offset changes as stuff is moved
566/// around inside the function.
567unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const {
568  MachineBasicBlock *MBB = MI->getParent();
569
570  // The offset is composed of two things: the sum of the sizes of all MBB's
571  // before this instruction's block, and the offset from the start of the block
572  // it is in.
573  unsigned Offset = BBOffsets[MBB->getNumber()];
574
575  // If we're looking for a CONSTPOOL_ENTRY in Thumb, see if this block has
576  // alignment padding, and compensate if so.
577  if (isThumb &&
578      MI->getOpcode() == ARM::CONSTPOOL_ENTRY &&
579      Offset%4 != 0)
580    Offset += 2;
581
582  // Sum instructions before MI in MBB.
583  for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) {
584    assert(I != MBB->end() && "Didn't find MI in its own basic block?");
585    if (&*I == MI) return Offset;
586    Offset += TII->GetInstSizeInBytes(I);
587  }
588}
589
590/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
591/// ID.
592static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
593                              const MachineBasicBlock *RHS) {
594  return LHS->getNumber() < RHS->getNumber();
595}
596
597/// UpdateForInsertedWaterBlock - When a block is newly inserted into the
598/// machine function, it upsets all of the block numbers.  Renumber the blocks
599/// and update the arrays that parallel this numbering.
600void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
601  // Renumber the MBB's to keep them consequtive.
602  NewBB->getParent()->RenumberBlocks(NewBB);
603
604  // Insert a size into BBSizes to align it properly with the (newly
605  // renumbered) block numbers.
606  BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
607
608  // Likewise for BBOffsets.
609  BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
610
611  // Next, update WaterList.  Specifically, we need to add NewMBB as having
612  // available water after it.
613  water_iterator IP =
614    std::lower_bound(WaterList.begin(), WaterList.end(), NewBB,
615                     CompareMBBNumbers);
616  WaterList.insert(IP, NewBB);
617}
618
619
620/// Split the basic block containing MI into two blocks, which are joined by
621/// an unconditional branch.  Update datastructures and renumber blocks to
622/// account for this change and returns the newly created block.
623MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
624  MachineBasicBlock *OrigBB = MI->getParent();
625  MachineFunction &MF = *OrigBB->getParent();
626
627  // Create a new MBB for the code after the OrigBB.
628  MachineBasicBlock *NewBB =
629    MF.CreateMachineBasicBlock(OrigBB->getBasicBlock());
630  MachineFunction::iterator MBBI = OrigBB; ++MBBI;
631  MF.insert(MBBI, NewBB);
632
633  // Splice the instructions starting with MI over to NewBB.
634  NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
635
636  // Add an unconditional branch from OrigBB to NewBB.
637  // Note the new unconditional branch is not being recorded.
638  // There doesn't seem to be meaningful DebugInfo available; this doesn't
639  // correspond to anything in the source.
640  unsigned Opc = isThumb ? (isThumb2 ? ARM::t2B : ARM::tB) : ARM::B;
641  BuildMI(OrigBB, DebugLoc::getUnknownLoc(), TII->get(Opc)).addMBB(NewBB);
642  NumSplit++;
643
644  // Update the CFG.  All succs of OrigBB are now succs of NewBB.
645  while (!OrigBB->succ_empty()) {
646    MachineBasicBlock *Succ = *OrigBB->succ_begin();
647    OrigBB->removeSuccessor(Succ);
648    NewBB->addSuccessor(Succ);
649
650    // This pass should be run after register allocation, so there should be no
651    // PHI nodes to update.
652    assert((Succ->empty() || Succ->begin()->getOpcode() != TargetInstrInfo::PHI)
653           && "PHI nodes should be eliminated by now!");
654  }
655
656  // OrigBB branches to NewBB.
657  OrigBB->addSuccessor(NewBB);
658
659  // Update internal data structures to account for the newly inserted MBB.
660  // This is almost the same as UpdateForInsertedWaterBlock, except that
661  // the Water goes after OrigBB, not NewBB.
662  MF.RenumberBlocks(NewBB);
663
664  // Insert a size into BBSizes to align it properly with the (newly
665  // renumbered) block numbers.
666  BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
667
668  // Likewise for BBOffsets.
669  BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
670
671  // Next, update WaterList.  Specifically, we need to add OrigMBB as having
672  // available water after it (but not if it's already there, which happens
673  // when splitting before a conditional branch that is followed by an
674  // unconditional branch - in that case we want to insert NewBB).
675  water_iterator IP =
676    std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB,
677                     CompareMBBNumbers);
678  MachineBasicBlock* WaterBB = *IP;
679  if (WaterBB == OrigBB)
680    WaterList.insert(next(IP), NewBB);
681  else
682    WaterList.insert(IP, OrigBB);
683
684  // Figure out how large the first NewMBB is.  (It cannot
685  // contain a constpool_entry or tablejump.)
686  unsigned NewBBSize = 0;
687  for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
688       I != E; ++I)
689    NewBBSize += TII->GetInstSizeInBytes(I);
690
691  unsigned OrigBBI = OrigBB->getNumber();
692  unsigned NewBBI = NewBB->getNumber();
693  // Set the size of NewBB in BBSizes.
694  BBSizes[NewBBI] = NewBBSize;
695
696  // We removed instructions from UserMBB, subtract that off from its size.
697  // Add 2 or 4 to the block to count the unconditional branch we added to it.
698  int delta = isThumb1 ? 2 : 4;
699  BBSizes[OrigBBI] -= NewBBSize - delta;
700
701  // ...and adjust BBOffsets for NewBB accordingly.
702  BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI];
703
704  // All BBOffsets following these blocks must be modified.
705  AdjustBBOffsetsAfter(NewBB, delta);
706
707  return NewBB;
708}
709
710/// OffsetIsInRange - Checks whether UserOffset (the location of a constant pool
711/// reference) is within MaxDisp of TrialOffset (a proposed location of a
712/// constant pool entry).
713bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset,
714                                         unsigned TrialOffset, unsigned MaxDisp,
715                                         bool NegativeOK, bool IsSoImm) {
716  // On Thumb offsets==2 mod 4 are rounded down by the hardware for
717  // purposes of the displacement computation; compensate for that here.
718  // Effectively, the valid range of displacements is 2 bytes smaller for such
719  // references.
720  unsigned TotalAdj = 0;
721  if (isThumb && UserOffset%4 !=0) {
722    UserOffset -= 2;
723    TotalAdj = 2;
724  }
725  // CPEs will be rounded up to a multiple of 4.
726  if (isThumb && TrialOffset%4 != 0) {
727    TrialOffset += 2;
728    TotalAdj += 2;
729  }
730
731  // In Thumb2 mode, later branch adjustments can shift instructions up and
732  // cause alignment change. In the worst case scenario this can cause the
733  // user's effective address to be subtracted by 2 and the CPE's address to
734  // be plus 2.
735  if (isThumb2 && TotalAdj != 4)
736    MaxDisp -= (4 - TotalAdj);
737
738  if (UserOffset <= TrialOffset) {
739    // User before the Trial.
740    if (TrialOffset - UserOffset <= MaxDisp)
741      return true;
742    // FIXME: Make use full range of soimm values.
743  } else if (NegativeOK) {
744    if (UserOffset - TrialOffset <= MaxDisp)
745      return true;
746    // FIXME: Make use full range of soimm values.
747  }
748  return false;
749}
750
751/// WaterIsInRange - Returns true if a CPE placed after the specified
752/// Water (a basic block) will be in range for the specific MI.
753
754bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset,
755                                        MachineBasicBlock* Water, CPUser &U) {
756  unsigned MaxDisp = U.MaxDisp;
757  unsigned CPEOffset = BBOffsets[Water->getNumber()] +
758                       BBSizes[Water->getNumber()];
759
760  // If the CPE is to be inserted before the instruction, that will raise
761  // the offset of the instruction.  (Currently applies only to ARM, so
762  // no alignment compensation attempted here.)
763  if (CPEOffset < UserOffset)
764    UserOffset += U.CPEMI->getOperand(2).getImm();
765
766  return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, U.NegOk, U.IsSoImm);
767}
768
769/// CPEIsInRange - Returns true if the distance between specific MI and
770/// specific ConstPool entry instruction can fit in MI's displacement field.
771bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
772                                      MachineInstr *CPEMI, unsigned MaxDisp,
773                                      bool NegOk, bool DoDump) {
774  unsigned CPEOffset  = GetOffsetOf(CPEMI);
775  assert(CPEOffset%4 == 0 && "Misaligned CPE");
776
777  if (DoDump) {
778    DEBUG(errs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
779                 << " max delta=" << MaxDisp
780                 << " insn address=" << UserOffset
781                 << " CPE address=" << CPEOffset
782                 << " offset=" << int(CPEOffset-UserOffset) << "\t" << *MI);
783  }
784
785  return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, NegOk);
786}
787
788#ifndef NDEBUG
789/// BBIsJumpedOver - Return true of the specified basic block's only predecessor
790/// unconditionally branches to its only successor.
791static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
792  if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
793    return false;
794
795  MachineBasicBlock *Succ = *MBB->succ_begin();
796  MachineBasicBlock *Pred = *MBB->pred_begin();
797  MachineInstr *PredMI = &Pred->back();
798  if (PredMI->getOpcode() == ARM::B || PredMI->getOpcode() == ARM::tB
799      || PredMI->getOpcode() == ARM::t2B)
800    return PredMI->getOperand(0).getMBB() == Succ;
801  return false;
802}
803#endif // NDEBUG
804
805void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
806                                              int delta) {
807  MachineFunction::iterator MBBI = BB; MBBI = next(MBBI);
808  for(unsigned i = BB->getNumber()+1, e = BB->getParent()->getNumBlockIDs();
809      i < e; ++i) {
810    BBOffsets[i] += delta;
811    // If some existing blocks have padding, adjust the padding as needed, a
812    // bit tricky.  delta can be negative so don't use % on that.
813    if (!isThumb)
814      continue;
815    MachineBasicBlock *MBB = MBBI;
816    if (!MBB->empty()) {
817      // Constant pool entries require padding.
818      if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
819        unsigned OldOffset = BBOffsets[i] - delta;
820        if ((OldOffset%4) == 0 && (BBOffsets[i]%4) != 0) {
821          // add new padding
822          BBSizes[i] += 2;
823          delta += 2;
824        } else if ((OldOffset%4) != 0 && (BBOffsets[i]%4) == 0) {
825          // remove existing padding
826          BBSizes[i] -= 2;
827          delta -= 2;
828        }
829      }
830      // Thumb1 jump tables require padding.  They should be at the end;
831      // following unconditional branches are removed by AnalyzeBranch.
832      MachineInstr *ThumbJTMI = prior(MBB->end());
833      if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
834        unsigned NewMIOffset = GetOffsetOf(ThumbJTMI);
835        unsigned OldMIOffset = NewMIOffset - delta;
836        if ((OldMIOffset%4) == 0 && (NewMIOffset%4) != 0) {
837          // remove existing padding
838          BBSizes[i] -= 2;
839          delta -= 2;
840        } else if ((OldMIOffset%4) != 0 && (NewMIOffset%4) == 0) {
841          // add new padding
842          BBSizes[i] += 2;
843          delta += 2;
844        }
845      }
846      if (delta==0)
847        return;
848    }
849    MBBI = next(MBBI);
850  }
851}
852
853/// DecrementOldEntry - find the constant pool entry with index CPI
854/// and instruction CPEMI, and decrement its refcount.  If the refcount
855/// becomes 0 remove the entry and instruction.  Returns true if we removed
856/// the entry, false if we didn't.
857
858bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) {
859  // Find the old entry. Eliminate it if it is no longer used.
860  CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
861  assert(CPE && "Unexpected!");
862  if (--CPE->RefCount == 0) {
863    RemoveDeadCPEMI(CPEMI);
864    CPE->CPEMI = NULL;
865    NumCPEs--;
866    return true;
867  }
868  return false;
869}
870
871/// LookForCPEntryInRange - see if the currently referenced CPE is in range;
872/// if not, see if an in-range clone of the CPE is in range, and if so,
873/// change the data structures so the user references the clone.  Returns:
874/// 0 = no existing entry found
875/// 1 = entry found, and there were no code insertions or deletions
876/// 2 = entry found, and there were code insertions or deletions
877int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
878{
879  MachineInstr *UserMI = U.MI;
880  MachineInstr *CPEMI  = U.CPEMI;
881
882  // Check to see if the CPE is already in-range.
883  if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, U.NegOk, true)) {
884    DEBUG(errs() << "In range\n");
885    return 1;
886  }
887
888  // No.  Look for previously created clones of the CPE that are in range.
889  unsigned CPI = CPEMI->getOperand(1).getIndex();
890  std::vector<CPEntry> &CPEs = CPEntries[CPI];
891  for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
892    // We already tried this one
893    if (CPEs[i].CPEMI == CPEMI)
894      continue;
895    // Removing CPEs can leave empty entries, skip
896    if (CPEs[i].CPEMI == NULL)
897      continue;
898    if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, U.NegOk)) {
899      DEBUG(errs() << "Replacing CPE#" << CPI << " with CPE#"
900                   << CPEs[i].CPI << "\n");
901      // Point the CPUser node to the replacement
902      U.CPEMI = CPEs[i].CPEMI;
903      // Change the CPI in the instruction operand to refer to the clone.
904      for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j)
905        if (UserMI->getOperand(j).isCPI()) {
906          UserMI->getOperand(j).setIndex(CPEs[i].CPI);
907          break;
908        }
909      // Adjust the refcount of the clone...
910      CPEs[i].RefCount++;
911      // ...and the original.  If we didn't remove the old entry, none of the
912      // addresses changed, so we don't need another pass.
913      return DecrementOldEntry(CPI, CPEMI) ? 2 : 1;
914    }
915  }
916  return 0;
917}
918
919/// getUnconditionalBrDisp - Returns the maximum displacement that can fit in
920/// the specific unconditional branch instruction.
921static inline unsigned getUnconditionalBrDisp(int Opc) {
922  switch (Opc) {
923  case ARM::tB:
924    return ((1<<10)-1)*2;
925  case ARM::t2B:
926    return ((1<<23)-1)*2;
927  default:
928    break;
929  }
930
931  return ((1<<23)-1)*4;
932}
933
934/// AcceptWater - Small amount of common code factored out of the following.
935
936MachineBasicBlock* ARMConstantIslands::AcceptWater(MachineBasicBlock *WaterBB,
937                                                   water_iterator IP) {
938  DEBUG(errs() << "found water in range\n");
939  // Remove the original WaterList entry; we want subsequent
940  // insertions in this vicinity to go after the one we're
941  // about to insert.  This considerably reduces the number
942  // of times we have to move the same CPE more than once.
943  WaterList.erase(IP);
944  // CPE goes before following block (NewMBB).
945  return next(MachineFunction::iterator(WaterBB));
946}
947
948/// LookForWater - look for an existing entry in the WaterList in which
949/// we can place the CPE referenced from U so it's within range of U's MI.
950/// Returns true if found, false if not.  If it returns true, *NewMBB
951/// is set to the WaterList entry.
952/// For ARM, we prefer the water that's farthest away. For Thumb, prefer
953/// water that will not introduce padding to water that will; within each
954/// group, prefer the water that's farthest away.
955bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
956                                      MachineBasicBlock** NewMBB) {
957  water_iterator IPThatWouldPad;
958  MachineBasicBlock* WaterBBThatWouldPad = NULL;
959  if (!WaterList.empty()) {
960    for (water_iterator IP = prior(WaterList.end()),
961           B = WaterList.begin();; --IP) {
962      MachineBasicBlock* WaterBB = *IP;
963      if (WaterIsInRange(UserOffset, WaterBB, U)) {
964        unsigned WBBId = WaterBB->getNumber();
965        if (isThumb &&
966            (BBOffsets[WBBId] + BBSizes[WBBId])%4 != 0) {
967          // This is valid Water, but would introduce padding.  Remember
968          // it in case we don't find any Water that doesn't do this.
969          if (!WaterBBThatWouldPad) {
970            WaterBBThatWouldPad = WaterBB;
971            IPThatWouldPad = IP;
972          }
973        } else {
974          *NewMBB = AcceptWater(WaterBB, IP);
975          return true;
976        }
977      }
978      if (IP == B)
979        break;
980    }
981  }
982  if (isThumb && WaterBBThatWouldPad) {
983    *NewMBB = AcceptWater(WaterBBThatWouldPad, IPThatWouldPad);
984    return true;
985  }
986  return false;
987}
988
989/// CreateNewWater - No existing WaterList entry will work for
990/// CPUsers[CPUserIndex], so create a place to put the CPE.  The end of the
991/// block is used if in range, and the conditional branch munged so control
992/// flow is correct.  Otherwise the block is split to create a hole with an
993/// unconditional branch around it.  In either case *NewMBB is set to a
994/// block following which the new island can be inserted (the WaterList
995/// is not adjusted).
996
997void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
998                        unsigned UserOffset, MachineBasicBlock** NewMBB) {
999  CPUser &U = CPUsers[CPUserIndex];
1000  MachineInstr *UserMI = U.MI;
1001  MachineInstr *CPEMI  = U.CPEMI;
1002  MachineBasicBlock *UserMBB = UserMI->getParent();
1003  unsigned OffsetOfNextBlock = BBOffsets[UserMBB->getNumber()] +
1004                               BBSizes[UserMBB->getNumber()];
1005  assert(OffsetOfNextBlock== BBOffsets[UserMBB->getNumber()+1]);
1006
1007  // If the use is at the end of the block, or the end of the block
1008  // is within range, make new water there.  (The addition below is
1009  // for the unconditional branch we will be adding:  4 bytes on ARM + Thumb2,
1010  // 2 on Thumb1.  Possible Thumb1 alignment padding is allowed for
1011  // inside OffsetIsInRange.
1012  // If the block ends in an unconditional branch already, it is water,
1013  // and is known to be out of range, so we'll always be adding a branch.)
1014  if (&UserMBB->back() == UserMI ||
1015      OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb1 ? 2: 4),
1016                      U.MaxDisp, U.NegOk, U.IsSoImm)) {
1017    DEBUG(errs() << "Split at end of block\n");
1018    if (&UserMBB->back() == UserMI)
1019      assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!");
1020    *NewMBB = next(MachineFunction::iterator(UserMBB));
1021    // Add an unconditional branch from UserMBB to fallthrough block.
1022    // Record it for branch lengthening; this new branch will not get out of
1023    // range, but if the preceding conditional branch is out of range, the
1024    // targets will be exchanged, and the altered branch may be out of
1025    // range, so the machinery has to know about it.
1026    int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
1027    BuildMI(UserMBB, DebugLoc::getUnknownLoc(),
1028            TII->get(UncondBr)).addMBB(*NewMBB);
1029    unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
1030    ImmBranches.push_back(ImmBranch(&UserMBB->back(),
1031                          MaxDisp, false, UncondBr));
1032    int delta = isThumb1 ? 2 : 4;
1033    BBSizes[UserMBB->getNumber()] += delta;
1034    AdjustBBOffsetsAfter(UserMBB, delta);
1035  } else {
1036    // What a big block.  Find a place within the block to split it.
1037    // This is a little tricky on Thumb1 since instructions are 2 bytes
1038    // and constant pool entries are 4 bytes: if instruction I references
1039    // island CPE, and instruction I+1 references CPE', it will
1040    // not work well to put CPE as far forward as possible, since then
1041    // CPE' cannot immediately follow it (that location is 2 bytes
1042    // farther away from I+1 than CPE was from I) and we'd need to create
1043    // a new island.  So, we make a first guess, then walk through the
1044    // instructions between the one currently being looked at and the
1045    // possible insertion point, and make sure any other instructions
1046    // that reference CPEs will be able to use the same island area;
1047    // if not, we back up the insertion point.
1048
1049    // The 4 in the following is for the unconditional branch we'll be
1050    // inserting (allows for long branch on Thumb1).  Alignment of the
1051    // island is handled inside OffsetIsInRange.
1052    unsigned BaseInsertOffset = UserOffset + U.MaxDisp -4;
1053    // This could point off the end of the block if we've already got
1054    // constant pool entries following this block; only the last one is
1055    // in the water list.  Back past any possible branches (allow for a
1056    // conditional and a maximally long unconditional).
1057    if (BaseInsertOffset >= BBOffsets[UserMBB->getNumber()+1])
1058      BaseInsertOffset = BBOffsets[UserMBB->getNumber()+1] -
1059                              (isThumb1 ? 6 : 8);
1060    unsigned EndInsertOffset = BaseInsertOffset +
1061           CPEMI->getOperand(2).getImm();
1062    MachineBasicBlock::iterator MI = UserMI;
1063    ++MI;
1064    unsigned CPUIndex = CPUserIndex+1;
1065    for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
1066         Offset < BaseInsertOffset;
1067         Offset += TII->GetInstSizeInBytes(MI),
1068            MI = next(MI)) {
1069      if (CPUIndex < CPUsers.size() && CPUsers[CPUIndex].MI == MI) {
1070        CPUser &U = CPUsers[CPUIndex];
1071        if (!OffsetIsInRange(Offset, EndInsertOffset,
1072                             U.MaxDisp, U.NegOk, U.IsSoImm)) {
1073          BaseInsertOffset -= (isThumb1 ? 2 : 4);
1074          EndInsertOffset  -= (isThumb1 ? 2 : 4);
1075        }
1076        // This is overly conservative, as we don't account for CPEMIs
1077        // being reused within the block, but it doesn't matter much.
1078        EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm();
1079        CPUIndex++;
1080      }
1081    }
1082    DEBUG(errs() << "Split in middle of big block\n");
1083    *NewMBB = SplitBlockBeforeInstr(prior(MI));
1084  }
1085}
1086
1087/// HandleConstantPoolUser - Analyze the specified user, checking to see if it
1088/// is out-of-range.  If so, pick up the constant pool value and move it some
1089/// place in-range.  Return true if we changed any addresses (thus must run
1090/// another pass of branch lengthening), false otherwise.
1091bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
1092                                                unsigned CPUserIndex) {
1093  CPUser &U = CPUsers[CPUserIndex];
1094  MachineInstr *UserMI = U.MI;
1095  MachineInstr *CPEMI  = U.CPEMI;
1096  unsigned CPI = CPEMI->getOperand(1).getIndex();
1097  unsigned Size = CPEMI->getOperand(2).getImm();
1098  MachineBasicBlock *NewMBB;
1099  // Compute this only once, it's expensive.  The 4 or 8 is the value the
1100  // hardware keeps in the PC.
1101  unsigned UserOffset = GetOffsetOf(UserMI) + (isThumb ? 4 : 8);
1102
1103  // See if the current entry is within range, or there is a clone of it
1104  // in range.
1105  int result = LookForExistingCPEntry(U, UserOffset);
1106  if (result==1) return false;
1107  else if (result==2) return true;
1108
1109  // No existing clone of this CPE is within range.
1110  // We will be generating a new clone.  Get a UID for it.
1111  unsigned ID = AFI->createConstPoolEntryUId();
1112
1113  // Look for water where we can place this CPE.  We look for the farthest one
1114  // away that will work.  Forward references only for now (although later
1115  // we might find some that are backwards).
1116
1117  if (!LookForWater(U, UserOffset, &NewMBB)) {
1118    // No water found.
1119    DEBUG(errs() << "No water found\n");
1120    CreateNewWater(CPUserIndex, UserOffset, &NewMBB);
1121  }
1122
1123  // Okay, we know we can put an island before NewMBB now, do it!
1124  MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock();
1125  MF.insert(NewMBB, NewIsland);
1126
1127  // Update internal data structures to account for the newly inserted MBB.
1128  UpdateForInsertedWaterBlock(NewIsland);
1129
1130  // Decrement the old entry, and remove it if refcount becomes 0.
1131  DecrementOldEntry(CPI, CPEMI);
1132
1133  // Now that we have an island to add the CPE to, clone the original CPE and
1134  // add it to the island.
1135  U.CPEMI = BuildMI(NewIsland, DebugLoc::getUnknownLoc(),
1136                    TII->get(ARM::CONSTPOOL_ENTRY))
1137                .addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
1138  CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
1139  NumCPEs++;
1140
1141  BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()];
1142  // Compensate for .align 2 in thumb mode.
1143  if (isThumb && BBOffsets[NewIsland->getNumber()]%4 != 0)
1144    Size += 2;
1145  // Increase the size of the island block to account for the new entry.
1146  BBSizes[NewIsland->getNumber()] += Size;
1147  AdjustBBOffsetsAfter(NewIsland, Size);
1148
1149  // Finally, change the CPI in the instruction operand to be ID.
1150  for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
1151    if (UserMI->getOperand(i).isCPI()) {
1152      UserMI->getOperand(i).setIndex(ID);
1153      break;
1154    }
1155
1156  DEBUG(errs() << "  Moved CPE to #" << ID << " CPI=" << CPI
1157           << '\t' << *UserMI);
1158
1159  return true;
1160}
1161
1162/// RemoveDeadCPEMI - Remove a dead constant pool entry instruction. Update
1163/// sizes and offsets of impacted basic blocks.
1164void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) {
1165  MachineBasicBlock *CPEBB = CPEMI->getParent();
1166  unsigned Size = CPEMI->getOperand(2).getImm();
1167  CPEMI->eraseFromParent();
1168  BBSizes[CPEBB->getNumber()] -= Size;
1169  // All succeeding offsets have the current size value added in, fix this.
1170  if (CPEBB->empty()) {
1171    // In thumb1 mode, the size of island may be padded by two to compensate for
1172    // the alignment requirement.  Then it will now be 2 when the block is
1173    // empty, so fix this.
1174    // All succeeding offsets have the current size value added in, fix this.
1175    if (BBSizes[CPEBB->getNumber()] != 0) {
1176      Size += BBSizes[CPEBB->getNumber()];
1177      BBSizes[CPEBB->getNumber()] = 0;
1178    }
1179  }
1180  AdjustBBOffsetsAfter(CPEBB, -Size);
1181  // An island has only one predecessor BB and one successor BB. Check if
1182  // this BB's predecessor jumps directly to this BB's successor. This
1183  // shouldn't happen currently.
1184  assert(!BBIsJumpedOver(CPEBB) && "How did this happen?");
1185  // FIXME: remove the empty blocks after all the work is done?
1186}
1187
1188/// RemoveUnusedCPEntries - Remove constant pool entries whose refcounts
1189/// are zero.
1190bool ARMConstantIslands::RemoveUnusedCPEntries() {
1191  unsigned MadeChange = false;
1192  for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
1193      std::vector<CPEntry> &CPEs = CPEntries[i];
1194      for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) {
1195        if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) {
1196          RemoveDeadCPEMI(CPEs[j].CPEMI);
1197          CPEs[j].CPEMI = NULL;
1198          MadeChange = true;
1199        }
1200      }
1201  }
1202  return MadeChange;
1203}
1204
1205/// BBIsInRange - Returns true if the distance between specific MI and
1206/// specific BB can fit in MI's displacement field.
1207bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
1208                                     unsigned MaxDisp) {
1209  unsigned PCAdj      = isThumb ? 4 : 8;
1210  unsigned BrOffset   = GetOffsetOf(MI) + PCAdj;
1211  unsigned DestOffset = BBOffsets[DestBB->getNumber()];
1212
1213  DEBUG(errs() << "Branch of destination BB#" << DestBB->getNumber()
1214               << " from BB#" << MI->getParent()->getNumber()
1215               << " max delta=" << MaxDisp
1216               << " from " << GetOffsetOf(MI) << " to " << DestOffset
1217               << " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
1218
1219  if (BrOffset <= DestOffset) {
1220    // Branch before the Dest.
1221    if (DestOffset-BrOffset <= MaxDisp)
1222      return true;
1223  } else {
1224    if (BrOffset-DestOffset <= MaxDisp)
1225      return true;
1226  }
1227  return false;
1228}
1229
1230/// FixUpImmediateBr - Fix up an immediate branch whose destination is too far
1231/// away to fit in its displacement field.
1232bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) {
1233  MachineInstr *MI = Br.MI;
1234  MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
1235
1236  // Check to see if the DestBB is already in-range.
1237  if (BBIsInRange(MI, DestBB, Br.MaxDisp))
1238    return false;
1239
1240  if (!Br.isCond)
1241    return FixUpUnconditionalBr(MF, Br);
1242  return FixUpConditionalBr(MF, Br);
1243}
1244
1245/// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is
1246/// too far away to fit in its displacement field. If the LR register has been
1247/// spilled in the epilogue, then we can use BL to implement a far jump.
1248/// Otherwise, add an intermediate branch instruction to a branch.
1249bool
1250ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) {
1251  MachineInstr *MI = Br.MI;
1252  MachineBasicBlock *MBB = MI->getParent();
1253  if (!isThumb1)
1254    llvm_unreachable("FixUpUnconditionalBr is Thumb1 only!");
1255
1256  // Use BL to implement far jump.
1257  Br.MaxDisp = (1 << 21) * 2;
1258  MI->setDesc(TII->get(ARM::tBfar));
1259  BBSizes[MBB->getNumber()] += 2;
1260  AdjustBBOffsetsAfter(MBB, 2);
1261  HasFarJump = true;
1262  NumUBrFixed++;
1263
1264  DEBUG(errs() << "  Changed B to long jump " << *MI);
1265
1266  return true;
1267}
1268
1269/// FixUpConditionalBr - Fix up a conditional branch whose destination is too
1270/// far away to fit in its displacement field. It is converted to an inverse
1271/// conditional branch + an unconditional branch to the destination.
1272bool
1273ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
1274  MachineInstr *MI = Br.MI;
1275  MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
1276
1277  // Add an unconditional branch to the destination and invert the branch
1278  // condition to jump over it:
1279  // blt L1
1280  // =>
1281  // bge L2
1282  // b   L1
1283  // L2:
1284  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(1).getImm();
1285  CC = ARMCC::getOppositeCondition(CC);
1286  unsigned CCReg = MI->getOperand(2).getReg();
1287
1288  // If the branch is at the end of its MBB and that has a fall-through block,
1289  // direct the updated conditional branch to the fall-through block. Otherwise,
1290  // split the MBB before the next instruction.
1291  MachineBasicBlock *MBB = MI->getParent();
1292  MachineInstr *BMI = &MBB->back();
1293  bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
1294
1295  NumCBrFixed++;
1296  if (BMI != MI) {
1297    if (next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
1298        BMI->getOpcode() == Br.UncondBr) {
1299      // Last MI in the BB is an unconditional branch. Can we simply invert the
1300      // condition and swap destinations:
1301      // beq L1
1302      // b   L2
1303      // =>
1304      // bne L2
1305      // b   L1
1306      MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
1307      if (BBIsInRange(MI, NewDest, Br.MaxDisp)) {
1308        DEBUG(errs() << "  Invert Bcc condition and swap its destination with "
1309                     << *BMI);
1310        BMI->getOperand(0).setMBB(DestBB);
1311        MI->getOperand(0).setMBB(NewDest);
1312        MI->getOperand(1).setImm(CC);
1313        return true;
1314      }
1315    }
1316  }
1317
1318  if (NeedSplit) {
1319    SplitBlockBeforeInstr(MI);
1320    // No need for the branch to the next block. We're adding an unconditional
1321    // branch to the destination.
1322    int delta = TII->GetInstSizeInBytes(&MBB->back());
1323    BBSizes[MBB->getNumber()] -= delta;
1324    MachineBasicBlock* SplitBB = next(MachineFunction::iterator(MBB));
1325    AdjustBBOffsetsAfter(SplitBB, -delta);
1326    MBB->back().eraseFromParent();
1327    // BBOffsets[SplitBB] is wrong temporarily, fixed below
1328  }
1329  MachineBasicBlock *NextBB = next(MachineFunction::iterator(MBB));
1330
1331  DEBUG(errs() << "  Insert B to BB#" << DestBB->getNumber()
1332               << " also invert condition and change dest. to BB#"
1333               << NextBB->getNumber() << "\n");
1334
1335  // Insert a new conditional branch and a new unconditional branch.
1336  // Also update the ImmBranch as well as adding a new entry for the new branch.
1337  BuildMI(MBB, DebugLoc::getUnknownLoc(),
1338          TII->get(MI->getOpcode()))
1339    .addMBB(NextBB).addImm(CC).addReg(CCReg);
1340  Br.MI = &MBB->back();
1341  BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
1342  BuildMI(MBB, DebugLoc::getUnknownLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
1343  BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
1344  unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
1345  ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
1346
1347  // Remove the old conditional branch.  It may or may not still be in MBB.
1348  BBSizes[MI->getParent()->getNumber()] -= TII->GetInstSizeInBytes(MI);
1349  MI->eraseFromParent();
1350
1351  // The net size change is an addition of one unconditional branch.
1352  int delta = TII->GetInstSizeInBytes(&MBB->back());
1353  AdjustBBOffsetsAfter(MBB, delta);
1354  return true;
1355}
1356
1357/// UndoLRSpillRestore - Remove Thumb push / pop instructions that only spills
1358/// LR / restores LR to pc. FIXME: This is done here because it's only possible
1359/// to do this if tBfar is not used.
1360bool ARMConstantIslands::UndoLRSpillRestore() {
1361  bool MadeChange = false;
1362  for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) {
1363    MachineInstr *MI = PushPopMIs[i];
1364    // First two operands are predicates, the third is a zero since there
1365    // is no writeback.
1366    if (MI->getOpcode() == ARM::tPOP_RET &&
1367        MI->getOperand(3).getReg() == ARM::PC &&
1368        MI->getNumExplicitOperands() == 4) {
1369      BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET));
1370      MI->eraseFromParent();
1371      MadeChange = true;
1372    }
1373  }
1374  return MadeChange;
1375}
1376
1377bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) {
1378  bool MadeChange = false;
1379
1380  // Shrink ADR and LDR from constantpool.
1381  for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
1382    CPUser &U = CPUsers[i];
1383    unsigned Opcode = U.MI->getOpcode();
1384    unsigned NewOpc = 0;
1385    unsigned Scale = 1;
1386    unsigned Bits = 0;
1387    switch (Opcode) {
1388    default: break;
1389    case ARM::t2LEApcrel:
1390      if (isARMLowRegister(U.MI->getOperand(0).getReg())) {
1391        NewOpc = ARM::tLEApcrel;
1392        Bits = 8;
1393        Scale = 4;
1394      }
1395      break;
1396    case ARM::t2LDRpci:
1397      if (isARMLowRegister(U.MI->getOperand(0).getReg())) {
1398        NewOpc = ARM::tLDRpci;
1399        Bits = 8;
1400        Scale = 4;
1401      }
1402      break;
1403    }
1404
1405    if (!NewOpc)
1406      continue;
1407
1408    unsigned UserOffset = GetOffsetOf(U.MI) + 4;
1409    unsigned MaxOffs = ((1 << Bits) - 1) * Scale;
1410    // FIXME: Check if offset is multiple of scale if scale is not 4.
1411    if (CPEIsInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) {
1412      U.MI->setDesc(TII->get(NewOpc));
1413      MachineBasicBlock *MBB = U.MI->getParent();
1414      BBSizes[MBB->getNumber()] -= 2;
1415      AdjustBBOffsetsAfter(MBB, -2);
1416      ++NumT2CPShrunk;
1417      MadeChange = true;
1418    }
1419  }
1420
1421  MadeChange |= OptimizeThumb2Branches(MF);
1422  MadeChange |= OptimizeThumb2JumpTables(MF);
1423  return MadeChange;
1424}
1425
1426bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
1427  bool MadeChange = false;
1428
1429  for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) {
1430    ImmBranch &Br = ImmBranches[i];
1431    unsigned Opcode = Br.MI->getOpcode();
1432    unsigned NewOpc = 0;
1433    unsigned Scale = 1;
1434    unsigned Bits = 0;
1435    switch (Opcode) {
1436    default: break;
1437    case ARM::t2B:
1438      NewOpc = ARM::tB;
1439      Bits = 11;
1440      Scale = 2;
1441      break;
1442    case ARM::t2Bcc:
1443      NewOpc = ARM::tBcc;
1444      Bits = 8;
1445      Scale = 2;
1446      break;
1447    }
1448    if (!NewOpc)
1449      continue;
1450
1451    unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
1452    MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
1453    if (BBIsInRange(Br.MI, DestBB, MaxOffs)) {
1454      Br.MI->setDesc(TII->get(NewOpc));
1455      MachineBasicBlock *MBB = Br.MI->getParent();
1456      BBSizes[MBB->getNumber()] -= 2;
1457      AdjustBBOffsetsAfter(MBB, -2);
1458      ++NumT2BrShrunk;
1459      MadeChange = true;
1460    }
1461  }
1462
1463  return MadeChange;
1464}
1465
1466
1467/// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
1468/// jumptables when it's possible.
1469bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
1470  bool MadeChange = false;
1471
1472  // FIXME: After the tables are shrunk, can we get rid some of the
1473  // constantpool tables?
1474  const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
1475  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
1476  for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
1477    MachineInstr *MI = T2JumpTables[i];
1478    const TargetInstrDesc &TID = MI->getDesc();
1479    unsigned NumOps = TID.getNumOperands();
1480    unsigned JTOpIdx = NumOps - (TID.isPredicable() ? 3 : 2);
1481    MachineOperand JTOP = MI->getOperand(JTOpIdx);
1482    unsigned JTI = JTOP.getIndex();
1483    assert(JTI < JT.size());
1484
1485    bool ByteOk = true;
1486    bool HalfWordOk = true;
1487    unsigned JTOffset = GetOffsetOf(MI) + 4;
1488    const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
1489    for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
1490      MachineBasicBlock *MBB = JTBBs[j];
1491      unsigned DstOffset = BBOffsets[MBB->getNumber()];
1492      // Negative offset is not ok. FIXME: We should change BB layout to make
1493      // sure all the branches are forward.
1494      if (ByteOk && (DstOffset - JTOffset) > ((1<<8)-1)*2)
1495        ByteOk = false;
1496      unsigned TBHLimit = ((1<<16)-1)*2;
1497      if (HalfWordOk && (DstOffset - JTOffset) > TBHLimit)
1498        HalfWordOk = false;
1499      if (!ByteOk && !HalfWordOk)
1500        break;
1501    }
1502
1503    if (ByteOk || HalfWordOk) {
1504      MachineBasicBlock *MBB = MI->getParent();
1505      unsigned BaseReg = MI->getOperand(0).getReg();
1506      bool BaseRegKill = MI->getOperand(0).isKill();
1507      if (!BaseRegKill)
1508        continue;
1509      unsigned IdxReg = MI->getOperand(1).getReg();
1510      bool IdxRegKill = MI->getOperand(1).isKill();
1511      MachineBasicBlock::iterator PrevI = MI;
1512      if (PrevI == MBB->begin())
1513        continue;
1514
1515      MachineInstr *AddrMI = --PrevI;
1516      bool OptOk = true;
1517      // Examine the instruction that calculate the jumptable entry address.
1518      // If it's not the one just before the t2BR_JT, we won't delete it, then
1519      // it's not worth doing the optimization.
1520      for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) {
1521        const MachineOperand &MO = AddrMI->getOperand(k);
1522        if (!MO.isReg() || !MO.getReg())
1523          continue;
1524        if (MO.isDef() && MO.getReg() != BaseReg) {
1525          OptOk = false;
1526          break;
1527        }
1528        if (MO.isUse() && !MO.isKill() && MO.getReg() != IdxReg) {
1529          OptOk = false;
1530          break;
1531        }
1532      }
1533      if (!OptOk)
1534        continue;
1535
1536      // The previous instruction should be a tLEApcrel or t2LEApcrelJT, we want
1537      // to delete it as well.
1538      MachineInstr *LeaMI = --PrevI;
1539      if ((LeaMI->getOpcode() != ARM::tLEApcrelJT &&
1540           LeaMI->getOpcode() != ARM::t2LEApcrelJT) ||
1541          LeaMI->getOperand(0).getReg() != BaseReg)
1542        OptOk = false;
1543
1544      if (!OptOk)
1545        continue;
1546
1547      unsigned Opc = ByteOk ? ARM::t2TBB : ARM::t2TBH;
1548      MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc))
1549        .addReg(IdxReg, getKillRegState(IdxRegKill))
1550        .addJumpTableIndex(JTI, JTOP.getTargetFlags())
1551        .addImm(MI->getOperand(JTOpIdx+1).getImm());
1552      // FIXME: Insert an "ALIGN" instruction to ensure the next instruction
1553      // is 2-byte aligned. For now, asm printer will fix it up.
1554      unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
1555      unsigned OrigSize = TII->GetInstSizeInBytes(AddrMI);
1556      OrigSize += TII->GetInstSizeInBytes(LeaMI);
1557      OrigSize += TII->GetInstSizeInBytes(MI);
1558
1559      AddrMI->eraseFromParent();
1560      LeaMI->eraseFromParent();
1561      MI->eraseFromParent();
1562
1563      int delta = OrigSize - NewSize;
1564      BBSizes[MBB->getNumber()] -= delta;
1565      AdjustBBOffsetsAfter(MBB, -delta);
1566
1567      ++NumTBs;
1568      MadeChange = true;
1569    }
1570  }
1571
1572  return MadeChange;
1573}
1574