ARMLoadStoreOptimizer.cpp revision dce4a407a24b04eebc6a376f8e62b41aaa7b071f
1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a pass that performs load / store related peephole
11// optimizations. This pass should be run after register allocation.
12//
13//===----------------------------------------------------------------------===//
14
15#include "ARM.h"
16#include "ARMBaseInstrInfo.h"
17#include "ARMBaseRegisterInfo.h"
18#include "ARMISelLowering.h"
19#include "ARMMachineFunctionInfo.h"
20#include "ARMSubtarget.h"
21#include "MCTargetDesc/ARMAddressingModes.h"
22#include "Thumb1RegisterInfo.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/ADT/SmallPtrSet.h"
26#include "llvm/ADT/SmallSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/Statistic.h"
29#include "llvm/CodeGen/MachineBasicBlock.h"
30#include "llvm/CodeGen/MachineFunctionPass.h"
31#include "llvm/CodeGen/MachineInstr.h"
32#include "llvm/CodeGen/MachineInstrBuilder.h"
33#include "llvm/CodeGen/MachineRegisterInfo.h"
34#include "llvm/CodeGen/RegisterScavenging.h"
35#include "llvm/CodeGen/SelectionDAGNodes.h"
36#include "llvm/IR/DataLayout.h"
37#include "llvm/IR/DerivedTypes.h"
38#include "llvm/IR/Function.h"
39#include "llvm/Support/Debug.h"
40#include "llvm/Support/ErrorHandling.h"
41#include "llvm/Target/TargetInstrInfo.h"
42#include "llvm/Target/TargetMachine.h"
43#include "llvm/Target/TargetRegisterInfo.h"
44using namespace llvm;
45
46#define DEBUG_TYPE "arm-ldst-opt"
47
48STATISTIC(NumLDMGened , "Number of ldm instructions generated");
49STATISTIC(NumSTMGened , "Number of stm instructions generated");
50STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
51STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
52STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
53STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
54STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
55STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
56STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
57STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
58STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
59
60/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
61/// load / store instructions to form ldm / stm instructions.
62
63namespace {
64  struct ARMLoadStoreOpt : public MachineFunctionPass {
65    static char ID;
66    ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
67
68    const TargetInstrInfo *TII;
69    const TargetRegisterInfo *TRI;
70    const ARMSubtarget *STI;
71    const TargetLowering *TL;
72    ARMFunctionInfo *AFI;
73    RegScavenger *RS;
74    bool isThumb1, isThumb2;
75
76    bool runOnMachineFunction(MachineFunction &Fn) override;
77
78    const char *getPassName() const override {
79      return "ARM load / store optimization pass";
80    }
81
82  private:
83    struct MemOpQueueEntry {
84      int Offset;
85      unsigned Reg;
86      bool isKill;
87      unsigned Position;
88      MachineBasicBlock::iterator MBBI;
89      bool Merged;
90      MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
91                      MachineBasicBlock::iterator i)
92        : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
93    };
94    typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
95    typedef MemOpQueue::iterator MemOpQueueIter;
96
97    void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
98                          const MemOpQueue &MemOps, unsigned DefReg,
99                          unsigned RangeBegin, unsigned RangeEnd);
100    void UpdateBaseRegUses(MachineBasicBlock &MBB,
101                           MachineBasicBlock::iterator MBBI,
102                           DebugLoc dl, unsigned Base, unsigned WordOffset,
103                           ARMCC::CondCodes Pred, unsigned PredReg);
104    bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
105                  int Offset, unsigned Base, bool BaseKill, int Opcode,
106                  ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
107                  DebugLoc dl,
108                  ArrayRef<std::pair<unsigned, bool> > Regs,
109                  ArrayRef<unsigned> ImpDefs);
110    void MergeOpsUpdate(MachineBasicBlock &MBB,
111                        MemOpQueue &MemOps,
112                        unsigned memOpsBegin,
113                        unsigned memOpsEnd,
114                        unsigned insertAfter,
115                        int Offset,
116                        unsigned Base,
117                        bool BaseKill,
118                        int Opcode,
119                        ARMCC::CondCodes Pred,
120                        unsigned PredReg,
121                        unsigned Scratch,
122                        DebugLoc dl,
123                        SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
124    void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
125                      int Opcode, unsigned Size,
126                      ARMCC::CondCodes Pred, unsigned PredReg,
127                      unsigned Scratch, MemOpQueue &MemOps,
128                      SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
129    void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
130    bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
131                             MachineBasicBlock::iterator &MBBI);
132    bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
133                                  MachineBasicBlock::iterator MBBI,
134                                  const TargetInstrInfo *TII,
135                                  bool &Advance,
136                                  MachineBasicBlock::iterator &I);
137    bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
138                                   MachineBasicBlock::iterator MBBI,
139                                   bool &Advance,
140                                   MachineBasicBlock::iterator &I);
141    bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
142    bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
143  };
144  char ARMLoadStoreOpt::ID = 0;
145}
146
147static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
148  switch (Opcode) {
149  default: llvm_unreachable("Unhandled opcode!");
150  case ARM::LDRi12:
151    ++NumLDMGened;
152    switch (Mode) {
153    default: llvm_unreachable("Unhandled submode!");
154    case ARM_AM::ia: return ARM::LDMIA;
155    case ARM_AM::da: return ARM::LDMDA;
156    case ARM_AM::db: return ARM::LDMDB;
157    case ARM_AM::ib: return ARM::LDMIB;
158    }
159  case ARM::STRi12:
160    ++NumSTMGened;
161    switch (Mode) {
162    default: llvm_unreachable("Unhandled submode!");
163    case ARM_AM::ia: return ARM::STMIA;
164    case ARM_AM::da: return ARM::STMDA;
165    case ARM_AM::db: return ARM::STMDB;
166    case ARM_AM::ib: return ARM::STMIB;
167    }
168  case ARM::tLDRi:
169    // tLDMIA is writeback-only - unless the base register is in the input
170    // reglist.
171    ++NumLDMGened;
172    switch (Mode) {
173    default: llvm_unreachable("Unhandled submode!");
174    case ARM_AM::ia: return ARM::tLDMIA;
175    }
176  case ARM::tSTRi:
177    // There is no non-writeback tSTMIA either.
178    ++NumSTMGened;
179    switch (Mode) {
180    default: llvm_unreachable("Unhandled submode!");
181    case ARM_AM::ia: return ARM::tSTMIA_UPD;
182    }
183  case ARM::t2LDRi8:
184  case ARM::t2LDRi12:
185    ++NumLDMGened;
186    switch (Mode) {
187    default: llvm_unreachable("Unhandled submode!");
188    case ARM_AM::ia: return ARM::t2LDMIA;
189    case ARM_AM::db: return ARM::t2LDMDB;
190    }
191  case ARM::t2STRi8:
192  case ARM::t2STRi12:
193    ++NumSTMGened;
194    switch (Mode) {
195    default: llvm_unreachable("Unhandled submode!");
196    case ARM_AM::ia: return ARM::t2STMIA;
197    case ARM_AM::db: return ARM::t2STMDB;
198    }
199  case ARM::VLDRS:
200    ++NumVLDMGened;
201    switch (Mode) {
202    default: llvm_unreachable("Unhandled submode!");
203    case ARM_AM::ia: return ARM::VLDMSIA;
204    case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
205    }
206  case ARM::VSTRS:
207    ++NumVSTMGened;
208    switch (Mode) {
209    default: llvm_unreachable("Unhandled submode!");
210    case ARM_AM::ia: return ARM::VSTMSIA;
211    case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
212    }
213  case ARM::VLDRD:
214    ++NumVLDMGened;
215    switch (Mode) {
216    default: llvm_unreachable("Unhandled submode!");
217    case ARM_AM::ia: return ARM::VLDMDIA;
218    case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
219    }
220  case ARM::VSTRD:
221    ++NumVSTMGened;
222    switch (Mode) {
223    default: llvm_unreachable("Unhandled submode!");
224    case ARM_AM::ia: return ARM::VSTMDIA;
225    case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
226    }
227  }
228}
229
230namespace llvm {
231  namespace ARM_AM {
232
233AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
234  switch (Opcode) {
235  default: llvm_unreachable("Unhandled opcode!");
236  case ARM::LDMIA_RET:
237  case ARM::LDMIA:
238  case ARM::LDMIA_UPD:
239  case ARM::STMIA:
240  case ARM::STMIA_UPD:
241  case ARM::tLDMIA:
242  case ARM::tLDMIA_UPD:
243  case ARM::tSTMIA_UPD:
244  case ARM::t2LDMIA_RET:
245  case ARM::t2LDMIA:
246  case ARM::t2LDMIA_UPD:
247  case ARM::t2STMIA:
248  case ARM::t2STMIA_UPD:
249  case ARM::VLDMSIA:
250  case ARM::VLDMSIA_UPD:
251  case ARM::VSTMSIA:
252  case ARM::VSTMSIA_UPD:
253  case ARM::VLDMDIA:
254  case ARM::VLDMDIA_UPD:
255  case ARM::VSTMDIA:
256  case ARM::VSTMDIA_UPD:
257    return ARM_AM::ia;
258
259  case ARM::LDMDA:
260  case ARM::LDMDA_UPD:
261  case ARM::STMDA:
262  case ARM::STMDA_UPD:
263    return ARM_AM::da;
264
265  case ARM::LDMDB:
266  case ARM::LDMDB_UPD:
267  case ARM::STMDB:
268  case ARM::STMDB_UPD:
269  case ARM::t2LDMDB:
270  case ARM::t2LDMDB_UPD:
271  case ARM::t2STMDB:
272  case ARM::t2STMDB_UPD:
273  case ARM::VLDMSDB_UPD:
274  case ARM::VSTMSDB_UPD:
275  case ARM::VLDMDDB_UPD:
276  case ARM::VSTMDDB_UPD:
277    return ARM_AM::db;
278
279  case ARM::LDMIB:
280  case ARM::LDMIB_UPD:
281  case ARM::STMIB:
282  case ARM::STMIB_UPD:
283    return ARM_AM::ib;
284  }
285}
286
287  } // end namespace ARM_AM
288} // end namespace llvm
289
290static bool isT1i32Load(unsigned Opc) {
291  return Opc == ARM::tLDRi;
292}
293
294static bool isT2i32Load(unsigned Opc) {
295  return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
296}
297
298static bool isi32Load(unsigned Opc) {
299  return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
300}
301
302static bool isT1i32Store(unsigned Opc) {
303  return Opc == ARM::tSTRi;
304}
305
306static bool isT2i32Store(unsigned Opc) {
307  return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
308}
309
310static bool isi32Store(unsigned Opc) {
311  return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
312}
313
314static unsigned getImmScale(unsigned Opc) {
315  switch (Opc) {
316  default: llvm_unreachable("Unhandled opcode!");
317  case ARM::tLDRi:
318  case ARM::tSTRi:
319    return 1;
320  case ARM::tLDRHi:
321  case ARM::tSTRHi:
322    return 2;
323  case ARM::tLDRBi:
324  case ARM::tSTRBi:
325    return 4;
326  }
327}
328
329/// Update future uses of the base register with the offset introduced
330/// due to writeback. This function only works on Thumb1.
331void
332ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
333                                   MachineBasicBlock::iterator MBBI,
334                                   DebugLoc dl, unsigned Base,
335                                   unsigned WordOffset,
336                                   ARMCC::CondCodes Pred, unsigned PredReg) {
337  assert(isThumb1 && "Can only update base register uses for Thumb1!");
338
339  // Start updating any instructions with immediate offsets. Insert a sub before
340  // the first non-updateable instruction (if any).
341  for (; MBBI != MBB.end(); ++MBBI) {
342    if (MBBI->readsRegister(Base)) {
343      unsigned Opc = MBBI->getOpcode();
344      int Offset;
345      bool InsertSub = false;
346
347      if (Opc == ARM::tLDRi  || Opc == ARM::tSTRi  ||
348          Opc == ARM::tLDRHi || Opc == ARM::tSTRHi ||
349          Opc == ARM::tLDRBi || Opc == ARM::tSTRBi) {
350        // Loads and stores with immediate offsets can be updated, but only if
351        // the new offset isn't negative.
352        // The MachineOperand containing the offset immediate is the last one
353        // before predicates.
354        MachineOperand &MO =
355          MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
356        // The offsets are scaled by 1, 2 or 4 depending on the Opcode
357        Offset = MO.getImm() - WordOffset * getImmScale(Opc);
358        if (Offset >= 0)
359          MO.setImm(Offset);
360        else
361          InsertSub = true;
362
363      } else if (Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) {
364        // SUB/ADD using this register. Merge it with the update.
365        // If the merged offset is too large, insert a new sub instead.
366        MachineOperand &MO =
367          MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
368        Offset = (Opc == ARM::tSUBi8) ?
369          MO.getImm() + WordOffset * 4 :
370          MO.getImm() - WordOffset * 4 ;
371        if (TL->isLegalAddImmediate(Offset)) {
372          MO.setImm(Offset);
373          // The base register has now been reset, so exit early.
374          return;
375        } else {
376          InsertSub = true;
377        }
378
379      } else {
380        // Can't update the instruction.
381        InsertSub = true;
382      }
383
384      if (InsertSub) {
385        // An instruction above couldn't be updated, so insert a sub.
386        AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base))
387          .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4)
388          .addImm(Pred).addReg(PredReg);
389        return;
390      }
391    }
392
393    if (MBBI->killsRegister(Base))
394      // Register got killed. Stop updating.
395      return;
396  }
397
398  // The end of the block was reached. This means register liveness escapes the
399  // block, and it's necessary to insert a sub before the last instruction.
400  if (MBB.succ_size() > 0)
401    // But only insert the SUB if there is actually a successor block.
402    // FIXME: Check more carefully if register is live at this point, e.g. by
403    // also examining the successor block's register liveness information.
404    AddDefaultT1CC(BuildMI(MBB, --MBBI, dl, TII->get(ARM::tSUBi8), Base))
405      .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4)
406      .addImm(Pred).addReg(PredReg);
407}
408
409/// MergeOps - Create and insert a LDM or STM with Base as base register and
410/// registers in Regs as the register operands that would be loaded / stored.
411/// It returns true if the transformation is done.
412bool
413ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
414                          MachineBasicBlock::iterator MBBI,
415                          int Offset, unsigned Base, bool BaseKill,
416                          int Opcode, ARMCC::CondCodes Pred,
417                          unsigned PredReg, unsigned Scratch, DebugLoc dl,
418                          ArrayRef<std::pair<unsigned, bool> > Regs,
419                          ArrayRef<unsigned> ImpDefs) {
420  // Only a single register to load / store. Don't bother.
421  unsigned NumRegs = Regs.size();
422  if (NumRegs <= 1)
423    return false;
424
425  ARM_AM::AMSubMode Mode = ARM_AM::ia;
426  // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
427  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
428  bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
429
430  if (Offset == 4 && haveIBAndDA) {
431    Mode = ARM_AM::ib;
432  } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
433    Mode = ARM_AM::da;
434  } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
435    // VLDM/VSTM do not support DB mode without also updating the base reg.
436    Mode = ARM_AM::db;
437  } else if (Offset != 0) {
438    // Check if this is a supported opcode before inserting instructions to
439    // calculate a new base register.
440    if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
441
442    // If starting offset isn't zero, insert a MI to materialize a new base.
443    // But only do so if it is cost effective, i.e. merging more than two
444    // loads / stores.
445    if (NumRegs <= 2)
446      return false;
447
448    unsigned NewBase;
449    if (isi32Load(Opcode)) {
450      // If it is a load, then just use one of the destination register to
451      // use as the new base.
452      NewBase = Regs[NumRegs-1].first;
453    } else {
454      // Use the scratch register to use as a new base.
455      NewBase = Scratch;
456      if (NewBase == 0)
457        return false;
458    }
459
460    int BaseOpc =
461      isThumb2 ? ARM::t2ADDri :
462      isThumb1 ? ARM::tADDi8  : ARM::ADDri;
463
464    if (Offset < 0) {
465      BaseOpc =
466        isThumb2 ? ARM::t2SUBri :
467        isThumb1 ? ARM::tSUBi8  : ARM::SUBri;
468      Offset = - Offset;
469    }
470
471    if (!TL->isLegalAddImmediate(Offset))
472      // FIXME: Try add with register operand?
473      return false; // Probably not worth it then.
474
475    if (isThumb1) {
476      if (Base != NewBase) {
477        // Need to insert a MOV to the new base first.
478        // FIXME: If the immediate fits in 3 bits, use ADD instead.
479        BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
480          .addReg(Base, getKillRegState(BaseKill))
481          .addImm(Pred).addReg(PredReg);
482      }
483      AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase))
484        .addReg(NewBase, getKillRegState(true)).addImm(Offset)
485        .addImm(Pred).addReg(PredReg);
486    } else {
487      BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
488        .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
489        .addImm(Pred).addReg(PredReg).addReg(0);
490    }
491
492    Base = NewBase;
493    BaseKill = true; // New base is always killed straight away.
494  }
495
496  bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
497                Opcode == ARM::VLDRD);
498
499  // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
500  // base register writeback.
501  Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
502  if (!Opcode) return false;
503
504  bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
505
506  // Exception: If the base register is in the input reglist, Thumb1 LDM is
507  // non-writeback. Check for this.
508  if (Opcode == ARM::tLDRi && isThumb1)
509    for (unsigned I = 0; I < NumRegs; ++I)
510      if (Base == Regs[I].first) {
511        Writeback = false;
512        break;
513      }
514
515  MachineInstrBuilder MIB;
516
517  if (Writeback) {
518    if (Opcode == ARM::tLDMIA)
519      // Update tLDMIA with writeback if necessary.
520      Opcode = ARM::tLDMIA_UPD;
521
522    // The base isn't dead after a merged instruction with writeback. Update
523    // future uses of the base with the added offset (if possible), or reset
524    // the base register as necessary.
525    if (!BaseKill)
526      UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
527
528    MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
529
530    // Thumb1: we might need to set base writeback when building the MI.
531    MIB.addReg(Base, getDefRegState(true))
532       .addReg(Base, getKillRegState(BaseKill));
533  } else {
534    // No writeback, simply build the MachineInstr.
535    MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
536    MIB.addReg(Base, getKillRegState(BaseKill));
537  }
538
539  MIB.addImm(Pred).addReg(PredReg);
540
541  for (unsigned i = 0; i != NumRegs; ++i)
542    MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
543                     | getKillRegState(Regs[i].second));
544
545  // Add implicit defs for super-registers.
546  for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
547    MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
548
549  return true;
550}
551
552/// \brief Find all instructions using a given imp-def within a range.
553///
554/// We are trying to combine a range of instructions, one of which (located at
555/// position RangeBegin) implicitly defines a register. The final LDM/STM will
556/// be placed at RangeEnd, and so any uses of this definition between RangeStart
557/// and RangeEnd must be modified to use an undefined value.
558///
559/// The live range continues until we find a second definition or one of the
560/// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
561/// we must consider all uses and decide which are relevant in a second pass.
562void ARMLoadStoreOpt::findUsesOfImpDef(
563    SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
564    unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
565  std::map<unsigned, MachineOperand *> Uses;
566  unsigned LastLivePos = RangeEnd;
567
568  // First we find all uses of this register with Position between RangeBegin
569  // and RangeEnd, any or all of these could be uses of a definition at
570  // RangeBegin. We also record the latest position a definition at RangeBegin
571  // would be considered live.
572  for (unsigned i = 0; i < MemOps.size(); ++i) {
573    MachineInstr &MI = *MemOps[i].MBBI;
574    unsigned MIPosition = MemOps[i].Position;
575    if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
576      continue;
577
578    // If this instruction defines the register, then any later use will be of
579    // that definition rather than ours.
580    if (MI.definesRegister(DefReg))
581      LastLivePos = std::min(LastLivePos, MIPosition);
582
583    MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
584    if (!UseOp)
585      continue;
586
587    // If this instruction kills the register then (assuming liveness is
588    // correct when we start) we don't need to think about anything after here.
589    if (UseOp->isKill())
590      LastLivePos = std::min(LastLivePos, MIPosition);
591
592    Uses[MIPosition] = UseOp;
593  }
594
595  // Now we traverse the list of all uses, and append the ones that actually use
596  // our definition to the requested list.
597  for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
598                                                      E = Uses.end();
599       I != E; ++I) {
600    // List is sorted by position so once we've found one out of range there
601    // will be no more to consider.
602    if (I->first > LastLivePos)
603      break;
604    UsesOfImpDefs.push_back(I->second);
605  }
606}
607
608// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
609// success.
610void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
611                                     MemOpQueue &memOps,
612                                     unsigned memOpsBegin, unsigned memOpsEnd,
613                                     unsigned insertAfter, int Offset,
614                                     unsigned Base, bool BaseKill,
615                                     int Opcode,
616                                     ARMCC::CondCodes Pred, unsigned PredReg,
617                                     unsigned Scratch,
618                                     DebugLoc dl,
619                         SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
620  // First calculate which of the registers should be killed by the merged
621  // instruction.
622  const unsigned insertPos = memOps[insertAfter].Position;
623  SmallSet<unsigned, 4> KilledRegs;
624  DenseMap<unsigned, unsigned> Killer;
625  for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
626    if (i == memOpsBegin) {
627      i = memOpsEnd;
628      if (i == e)
629        break;
630    }
631    if (memOps[i].Position < insertPos && memOps[i].isKill) {
632      unsigned Reg = memOps[i].Reg;
633      KilledRegs.insert(Reg);
634      Killer[Reg] = i;
635    }
636  }
637
638  SmallVector<std::pair<unsigned, bool>, 8> Regs;
639  SmallVector<unsigned, 8> ImpDefs;
640  SmallVector<MachineOperand *, 8> UsesOfImpDefs;
641  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
642    unsigned Reg = memOps[i].Reg;
643    // If we are inserting the merged operation after an operation that
644    // uses the same register, make sure to transfer any kill flag.
645    bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
646    Regs.push_back(std::make_pair(Reg, isKill));
647
648    // Collect any implicit defs of super-registers. They must be preserved.
649    for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
650      if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
651        continue;
652      unsigned DefReg = MO->getReg();
653      if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
654        ImpDefs.push_back(DefReg);
655
656      // There may be other uses of the definition between this instruction and
657      // the eventual LDM/STM position. These should be marked undef if the
658      // merge takes place.
659      findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
660                       insertPos);
661    }
662  }
663
664  // Try to do the merge.
665  MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
666  ++Loc;
667  if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
668                Pred, PredReg, Scratch, dl, Regs, ImpDefs))
669    return;
670
671  // Merge succeeded, update records.
672  Merges.push_back(std::prev(Loc));
673
674  // In gathering loads together, we may have moved the imp-def of a register
675  // past one of its uses. This is OK, since we know better than the rest of
676  // LLVM what's OK with ARM loads and stores; but we still have to adjust the
677  // affected uses.
678  for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
679                                                   E = UsesOfImpDefs.end();
680                                                   I != E; ++I)
681    (*I)->setIsUndef();
682
683  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
684    // Remove kill flags from any memops that come before insertPos.
685    if (Regs[i-memOpsBegin].second) {
686      unsigned Reg = Regs[i-memOpsBegin].first;
687      if (KilledRegs.count(Reg)) {
688        unsigned j = Killer[Reg];
689        int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
690        assert(Idx >= 0 && "Cannot find killing operand");
691        memOps[j].MBBI->getOperand(Idx).setIsKill(false);
692        memOps[j].isKill = false;
693      }
694      memOps[i].isKill = true;
695    }
696    MBB.erase(memOps[i].MBBI);
697    // Update this memop to refer to the merged instruction.
698    // We may need to move kill flags again.
699    memOps[i].Merged = true;
700    memOps[i].MBBI = Merges.back();
701    memOps[i].Position = insertPos;
702  }
703}
704
705/// MergeLDR_STR - Merge a number of load / store instructions into one or more
706/// load / store multiple instructions.
707void
708ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
709                         unsigned Base, int Opcode, unsigned Size,
710                         ARMCC::CondCodes Pred, unsigned PredReg,
711                         unsigned Scratch, MemOpQueue &MemOps,
712                         SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
713  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
714  int Offset = MemOps[SIndex].Offset;
715  int SOffset = Offset;
716  unsigned insertAfter = SIndex;
717  MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
718  DebugLoc dl = Loc->getDebugLoc();
719  const MachineOperand &PMO = Loc->getOperand(0);
720  unsigned PReg = PMO.getReg();
721  unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
722  unsigned Count = 1;
723  unsigned Limit = ~0U;
724
725  // vldm / vstm limit are 32 for S variants, 16 for D variants.
726
727  switch (Opcode) {
728  default: break;
729  case ARM::VSTRS:
730    Limit = 32;
731    break;
732  case ARM::VSTRD:
733    Limit = 16;
734    break;
735  case ARM::VLDRD:
736    Limit = 16;
737    break;
738  case ARM::VLDRS:
739    Limit = 32;
740    break;
741  }
742
743  for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
744    int NewOffset = MemOps[i].Offset;
745    const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
746    unsigned Reg = MO.getReg();
747    unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
748    // Register numbers must be in ascending order. For VFP / NEON load and
749    // store multiples, the registers must also be consecutive and within the
750    // limit on the number of registers per instruction.
751    if (Reg != ARM::SP &&
752        NewOffset == Offset + (int)Size &&
753        ((isNotVFP && RegNum > PRegNum) ||
754         ((Count < Limit) && RegNum == PRegNum+1)) &&
755        // On Swift we don't want vldm/vstm to start with a odd register num
756        // because Q register unaligned vldm/vstm need more uops.
757        (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
758      Offset += Size;
759      PRegNum = RegNum;
760      ++Count;
761    } else {
762      // Can't merge this in. Try merge the earlier ones first.
763      MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
764                     Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
765      MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
766                   MemOps, Merges);
767      return;
768    }
769
770    if (MemOps[i].Position > MemOps[insertAfter].Position)
771      insertAfter = i;
772  }
773
774  bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
775  MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
776                 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
777}
778
779static bool definesCPSR(MachineInstr *MI) {
780  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
781    const MachineOperand &MO = MI->getOperand(i);
782    if (!MO.isReg())
783      continue;
784    if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
785      // If the instruction has live CPSR def, then it's not safe to fold it
786      // into load / store.
787      return true;
788  }
789
790  return false;
791}
792
793static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
794                                unsigned Bytes, unsigned Limit,
795                                ARMCC::CondCodes Pred, unsigned PredReg) {
796  unsigned MyPredReg = 0;
797  if (!MI)
798    return false;
799
800  bool CheckCPSRDef = false;
801  switch (MI->getOpcode()) {
802  default: return false;
803  case ARM::tSUBi8:
804  case ARM::t2SUBri:
805  case ARM::SUBri:
806    CheckCPSRDef = true;
807  // fallthrough
808  case ARM::tSUBspi:
809    break;
810  }
811
812  // Make sure the offset fits in 8 bits.
813  if (Bytes == 0 || (Limit && Bytes >= Limit))
814    return false;
815
816  unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
817                    MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
818  if (!(MI->getOperand(0).getReg() == Base &&
819        MI->getOperand(1).getReg() == Base &&
820        (MI->getOperand(2).getImm() * Scale) == Bytes &&
821        getInstrPredicate(MI, MyPredReg) == Pred &&
822        MyPredReg == PredReg))
823    return false;
824
825  return CheckCPSRDef ? !definesCPSR(MI) : true;
826}
827
828static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
829                                unsigned Bytes, unsigned Limit,
830                                ARMCC::CondCodes Pred, unsigned PredReg) {
831  unsigned MyPredReg = 0;
832  if (!MI)
833    return false;
834
835  bool CheckCPSRDef = false;
836  switch (MI->getOpcode()) {
837  default: return false;
838  case ARM::tADDi8:
839  case ARM::t2ADDri:
840  case ARM::ADDri:
841    CheckCPSRDef = true;
842  // fallthrough
843  case ARM::tADDspi:
844    break;
845  }
846
847  if (Bytes == 0 || (Limit && Bytes >= Limit))
848    // Make sure the offset fits in 8 bits.
849    return false;
850
851  unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
852                    MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
853  if (!(MI->getOperand(0).getReg() == Base &&
854        MI->getOperand(1).getReg() == Base &&
855        (MI->getOperand(2).getImm() * Scale) == Bytes &&
856        getInstrPredicate(MI, MyPredReg) == Pred &&
857        MyPredReg == PredReg))
858    return false;
859
860  return CheckCPSRDef ? !definesCPSR(MI) : true;
861}
862
863static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
864  switch (MI->getOpcode()) {
865  default: return 0;
866  case ARM::LDRi12:
867  case ARM::STRi12:
868  case ARM::tLDRi:
869  case ARM::tSTRi:
870  case ARM::t2LDRi8:
871  case ARM::t2LDRi12:
872  case ARM::t2STRi8:
873  case ARM::t2STRi12:
874  case ARM::VLDRS:
875  case ARM::VSTRS:
876    return 4;
877  case ARM::VLDRD:
878  case ARM::VSTRD:
879    return 8;
880  case ARM::LDMIA:
881  case ARM::LDMDA:
882  case ARM::LDMDB:
883  case ARM::LDMIB:
884  case ARM::STMIA:
885  case ARM::STMDA:
886  case ARM::STMDB:
887  case ARM::STMIB:
888  case ARM::tLDMIA:
889  case ARM::tLDMIA_UPD:
890  case ARM::tSTMIA_UPD:
891  case ARM::t2LDMIA:
892  case ARM::t2LDMDB:
893  case ARM::t2STMIA:
894  case ARM::t2STMDB:
895  case ARM::VLDMSIA:
896  case ARM::VSTMSIA:
897    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
898  case ARM::VLDMDIA:
899  case ARM::VSTMDIA:
900    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
901  }
902}
903
904static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
905                                            ARM_AM::AMSubMode Mode) {
906  switch (Opc) {
907  default: llvm_unreachable("Unhandled opcode!");
908  case ARM::LDMIA:
909  case ARM::LDMDA:
910  case ARM::LDMDB:
911  case ARM::LDMIB:
912    switch (Mode) {
913    default: llvm_unreachable("Unhandled submode!");
914    case ARM_AM::ia: return ARM::LDMIA_UPD;
915    case ARM_AM::ib: return ARM::LDMIB_UPD;
916    case ARM_AM::da: return ARM::LDMDA_UPD;
917    case ARM_AM::db: return ARM::LDMDB_UPD;
918    }
919  case ARM::STMIA:
920  case ARM::STMDA:
921  case ARM::STMDB:
922  case ARM::STMIB:
923    switch (Mode) {
924    default: llvm_unreachable("Unhandled submode!");
925    case ARM_AM::ia: return ARM::STMIA_UPD;
926    case ARM_AM::ib: return ARM::STMIB_UPD;
927    case ARM_AM::da: return ARM::STMDA_UPD;
928    case ARM_AM::db: return ARM::STMDB_UPD;
929    }
930  case ARM::t2LDMIA:
931  case ARM::t2LDMDB:
932    switch (Mode) {
933    default: llvm_unreachable("Unhandled submode!");
934    case ARM_AM::ia: return ARM::t2LDMIA_UPD;
935    case ARM_AM::db: return ARM::t2LDMDB_UPD;
936    }
937  case ARM::t2STMIA:
938  case ARM::t2STMDB:
939    switch (Mode) {
940    default: llvm_unreachable("Unhandled submode!");
941    case ARM_AM::ia: return ARM::t2STMIA_UPD;
942    case ARM_AM::db: return ARM::t2STMDB_UPD;
943    }
944  case ARM::VLDMSIA:
945    switch (Mode) {
946    default: llvm_unreachable("Unhandled submode!");
947    case ARM_AM::ia: return ARM::VLDMSIA_UPD;
948    case ARM_AM::db: return ARM::VLDMSDB_UPD;
949    }
950  case ARM::VLDMDIA:
951    switch (Mode) {
952    default: llvm_unreachable("Unhandled submode!");
953    case ARM_AM::ia: return ARM::VLDMDIA_UPD;
954    case ARM_AM::db: return ARM::VLDMDDB_UPD;
955    }
956  case ARM::VSTMSIA:
957    switch (Mode) {
958    default: llvm_unreachable("Unhandled submode!");
959    case ARM_AM::ia: return ARM::VSTMSIA_UPD;
960    case ARM_AM::db: return ARM::VSTMSDB_UPD;
961    }
962  case ARM::VSTMDIA:
963    switch (Mode) {
964    default: llvm_unreachable("Unhandled submode!");
965    case ARM_AM::ia: return ARM::VSTMDIA_UPD;
966    case ARM_AM::db: return ARM::VSTMDDB_UPD;
967    }
968  }
969}
970
971/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
972/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
973///
974/// stmia rn, <ra, rb, rc>
975/// rn := rn + 4 * 3;
976/// =>
977/// stmia rn!, <ra, rb, rc>
978///
979/// rn := rn - 4 * 3;
980/// ldmia rn, <ra, rb, rc>
981/// =>
982/// ldmdb rn!, <ra, rb, rc>
983bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
984                                               MachineBasicBlock::iterator MBBI,
985                                               bool &Advance,
986                                               MachineBasicBlock::iterator &I) {
987  // Thumb1 is already using updating loads/stores.
988  if (isThumb1) return false;
989
990  MachineInstr *MI = MBBI;
991  unsigned Base = MI->getOperand(0).getReg();
992  bool BaseKill = MI->getOperand(0).isKill();
993  unsigned Bytes = getLSMultipleTransferSize(MI);
994  unsigned PredReg = 0;
995  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
996  int Opcode = MI->getOpcode();
997  DebugLoc dl = MI->getDebugLoc();
998
999  // Can't use an updating ld/st if the base register is also a dest
1000  // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1001  for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1002    if (MI->getOperand(i).getReg() == Base)
1003      return false;
1004
1005  bool DoMerge = false;
1006  ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
1007
1008  // Try merging with the previous instruction.
1009  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1010  if (MBBI != BeginMBBI) {
1011    MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1012    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1013      --PrevMBBI;
1014    if (Mode == ARM_AM::ia &&
1015        isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1016      Mode = ARM_AM::db;
1017      DoMerge = true;
1018    } else if (Mode == ARM_AM::ib &&
1019               isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1020      Mode = ARM_AM::da;
1021      DoMerge = true;
1022    }
1023    if (DoMerge)
1024      MBB.erase(PrevMBBI);
1025  }
1026
1027  // Try merging with the next instruction.
1028  MachineBasicBlock::iterator EndMBBI = MBB.end();
1029  if (!DoMerge && MBBI != EndMBBI) {
1030    MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1031    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1032      ++NextMBBI;
1033    if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
1034        isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1035      DoMerge = true;
1036    } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
1037               isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1038      DoMerge = true;
1039    }
1040    if (DoMerge) {
1041      if (NextMBBI == I) {
1042        Advance = true;
1043        ++I;
1044      }
1045      MBB.erase(NextMBBI);
1046    }
1047  }
1048
1049  if (!DoMerge)
1050    return false;
1051
1052  unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1053  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1054    .addReg(Base, getDefRegState(true)) // WB base register
1055    .addReg(Base, getKillRegState(BaseKill))
1056    .addImm(Pred).addReg(PredReg);
1057
1058  // Transfer the rest of operands.
1059  for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1060    MIB.addOperand(MI->getOperand(OpNum));
1061
1062  // Transfer memoperands.
1063  MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1064
1065  MBB.erase(MBBI);
1066  return true;
1067}
1068
1069static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1070                                             ARM_AM::AddrOpc Mode) {
1071  switch (Opc) {
1072  case ARM::LDRi12:
1073    return ARM::LDR_PRE_IMM;
1074  case ARM::STRi12:
1075    return ARM::STR_PRE_IMM;
1076  case ARM::VLDRS:
1077    return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1078  case ARM::VLDRD:
1079    return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1080  case ARM::VSTRS:
1081    return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1082  case ARM::VSTRD:
1083    return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1084  case ARM::t2LDRi8:
1085  case ARM::t2LDRi12:
1086    return ARM::t2LDR_PRE;
1087  case ARM::t2STRi8:
1088  case ARM::t2STRi12:
1089    return ARM::t2STR_PRE;
1090  default: llvm_unreachable("Unhandled opcode!");
1091  }
1092}
1093
1094static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1095                                              ARM_AM::AddrOpc Mode) {
1096  switch (Opc) {
1097  case ARM::LDRi12:
1098    return ARM::LDR_POST_IMM;
1099  case ARM::STRi12:
1100    return ARM::STR_POST_IMM;
1101  case ARM::VLDRS:
1102    return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1103  case ARM::VLDRD:
1104    return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1105  case ARM::VSTRS:
1106    return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1107  case ARM::VSTRD:
1108    return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1109  case ARM::t2LDRi8:
1110  case ARM::t2LDRi12:
1111    return ARM::t2LDR_POST;
1112  case ARM::t2STRi8:
1113  case ARM::t2STRi12:
1114    return ARM::t2STR_POST;
1115  default: llvm_unreachable("Unhandled opcode!");
1116  }
1117}
1118
1119/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
1120/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1121bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
1122                                               MachineBasicBlock::iterator MBBI,
1123                                               const TargetInstrInfo *TII,
1124                                               bool &Advance,
1125                                               MachineBasicBlock::iterator &I) {
1126  // Thumb1 doesn't have updating LDR/STR.
1127  // FIXME: Use LDM/STM with single register instead.
1128  if (isThumb1) return false;
1129
1130  MachineInstr *MI = MBBI;
1131  unsigned Base = MI->getOperand(1).getReg();
1132  bool BaseKill = MI->getOperand(1).isKill();
1133  unsigned Bytes = getLSMultipleTransferSize(MI);
1134  int Opcode = MI->getOpcode();
1135  DebugLoc dl = MI->getDebugLoc();
1136  bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1137                Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1138  bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1139  if (isi32Load(Opcode) || isi32Store(Opcode))
1140    if (MI->getOperand(2).getImm() != 0)
1141      return false;
1142  if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1143    return false;
1144
1145  bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
1146  // Can't do the merge if the destination register is the same as the would-be
1147  // writeback register.
1148  if (MI->getOperand(0).getReg() == Base)
1149    return false;
1150
1151  unsigned PredReg = 0;
1152  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1153  bool DoMerge = false;
1154  ARM_AM::AddrOpc AddSub = ARM_AM::add;
1155  unsigned NewOpc = 0;
1156  // AM2 - 12 bits, thumb2 - 8 bits.
1157  unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
1158
1159  // Try merging with the previous instruction.
1160  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1161  if (MBBI != BeginMBBI) {
1162    MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1163    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1164      --PrevMBBI;
1165    if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1166      DoMerge = true;
1167      AddSub = ARM_AM::sub;
1168    } else if (!isAM5 &&
1169               isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1170      DoMerge = true;
1171    }
1172    if (DoMerge) {
1173      NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
1174      MBB.erase(PrevMBBI);
1175    }
1176  }
1177
1178  // Try merging with the next instruction.
1179  MachineBasicBlock::iterator EndMBBI = MBB.end();
1180  if (!DoMerge && MBBI != EndMBBI) {
1181    MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1182    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1183      ++NextMBBI;
1184    if (!isAM5 &&
1185        isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1186      DoMerge = true;
1187      AddSub = ARM_AM::sub;
1188    } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1189      DoMerge = true;
1190    }
1191    if (DoMerge) {
1192      NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1193      if (NextMBBI == I) {
1194        Advance = true;
1195        ++I;
1196      }
1197      MBB.erase(NextMBBI);
1198    }
1199  }
1200
1201  if (!DoMerge)
1202    return false;
1203
1204  if (isAM5) {
1205    // VLDM[SD]_UPD, VSTM[SD]_UPD
1206    // (There are no base-updating versions of VLDR/VSTR instructions, but the
1207    // updating load/store-multiple instructions can be used with only one
1208    // register.)
1209    MachineOperand &MO = MI->getOperand(0);
1210    BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1211      .addReg(Base, getDefRegState(true)) // WB base register
1212      .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1213      .addImm(Pred).addReg(PredReg)
1214      .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1215                            getKillRegState(MO.isKill())));
1216  } else if (isLd) {
1217    if (isAM2) {
1218      // LDR_PRE, LDR_POST
1219      if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1220        int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1221        BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1222          .addReg(Base, RegState::Define)
1223          .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1224      } else {
1225        int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1226        BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1227          .addReg(Base, RegState::Define)
1228          .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1229      }
1230    } else {
1231      int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1232      // t2LDR_PRE, t2LDR_POST
1233      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1234        .addReg(Base, RegState::Define)
1235        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1236    }
1237  } else {
1238    MachineOperand &MO = MI->getOperand(0);
1239    // FIXME: post-indexed stores use am2offset_imm, which still encodes
1240    // the vestigal zero-reg offset register. When that's fixed, this clause
1241    // can be removed entirely.
1242    if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1243      int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1244      // STR_PRE, STR_POST
1245      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1246        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1247        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1248    } else {
1249      int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1250      // t2STR_PRE, t2STR_POST
1251      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1252        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1253        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1254    }
1255  }
1256  MBB.erase(MBBI);
1257
1258  return true;
1259}
1260
1261/// isMemoryOp - Returns true if instruction is a memory operation that this
1262/// pass is capable of operating on.
1263static bool isMemoryOp(const MachineInstr *MI) {
1264  // When no memory operands are present, conservatively assume unaligned,
1265  // volatile, unfoldable.
1266  if (!MI->hasOneMemOperand())
1267    return false;
1268
1269  const MachineMemOperand *MMO = *MI->memoperands_begin();
1270
1271  // Don't touch volatile memory accesses - we may be changing their order.
1272  if (MMO->isVolatile())
1273    return false;
1274
1275  // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1276  // not.
1277  if (MMO->getAlignment() < 4)
1278    return false;
1279
1280  // str <undef> could probably be eliminated entirely, but for now we just want
1281  // to avoid making a mess of it.
1282  // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1283  if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1284      MI->getOperand(0).isUndef())
1285    return false;
1286
1287  // Likewise don't mess with references to undefined addresses.
1288  if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1289      MI->getOperand(1).isUndef())
1290    return false;
1291
1292  int Opcode = MI->getOpcode();
1293  switch (Opcode) {
1294  default: break;
1295  case ARM::VLDRS:
1296  case ARM::VSTRS:
1297    return MI->getOperand(1).isReg();
1298  case ARM::VLDRD:
1299  case ARM::VSTRD:
1300    return MI->getOperand(1).isReg();
1301  case ARM::LDRi12:
1302  case ARM::STRi12:
1303  case ARM::tLDRi:
1304  case ARM::tSTRi:
1305  case ARM::t2LDRi8:
1306  case ARM::t2LDRi12:
1307  case ARM::t2STRi8:
1308  case ARM::t2STRi12:
1309    return MI->getOperand(1).isReg();
1310  }
1311  return false;
1312}
1313
1314/// AdvanceRS - Advance register scavenger to just before the earliest memory
1315/// op that is being merged.
1316void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1317  MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1318  unsigned Position = MemOps[0].Position;
1319  for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1320    if (MemOps[i].Position < Position) {
1321      Position = MemOps[i].Position;
1322      Loc = MemOps[i].MBBI;
1323    }
1324  }
1325
1326  if (Loc != MBB.begin())
1327    RS->forward(std::prev(Loc));
1328}
1329
1330static int getMemoryOpOffset(const MachineInstr *MI) {
1331  int Opcode = MI->getOpcode();
1332  bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1333  unsigned NumOperands = MI->getDesc().getNumOperands();
1334  unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1335
1336  if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1337      Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1338      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1339      Opcode == ARM::LDRi12   || Opcode == ARM::STRi12)
1340    return OffField;
1341
1342  // Thumb1 immediate offsets are scaled by 4
1343  if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
1344    return OffField * 4;
1345
1346  int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1347    : ARM_AM::getAM5Offset(OffField) * 4;
1348  if (isAM3) {
1349    if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1350      Offset = -Offset;
1351  } else {
1352    if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1353      Offset = -Offset;
1354  }
1355  return Offset;
1356}
1357
1358static void InsertLDR_STR(MachineBasicBlock &MBB,
1359                          MachineBasicBlock::iterator &MBBI,
1360                          int Offset, bool isDef,
1361                          DebugLoc dl, unsigned NewOpc,
1362                          unsigned Reg, bool RegDeadKill, bool RegUndef,
1363                          unsigned BaseReg, bool BaseKill, bool BaseUndef,
1364                          bool OffKill, bool OffUndef,
1365                          ARMCC::CondCodes Pred, unsigned PredReg,
1366                          const TargetInstrInfo *TII, bool isT2) {
1367  if (isDef) {
1368    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1369                                      TII->get(NewOpc))
1370      .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1371      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1372    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1373  } else {
1374    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1375                                      TII->get(NewOpc))
1376      .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1377      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1378    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1379  }
1380}
1381
1382bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1383                                          MachineBasicBlock::iterator &MBBI) {
1384  MachineInstr *MI = &*MBBI;
1385  unsigned Opcode = MI->getOpcode();
1386  if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1387      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1388    const MachineOperand &BaseOp = MI->getOperand(2);
1389    unsigned BaseReg = BaseOp.getReg();
1390    unsigned EvenReg = MI->getOperand(0).getReg();
1391    unsigned OddReg  = MI->getOperand(1).getReg();
1392    unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1393    unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
1394    // ARM errata 602117: LDRD with base in list may result in incorrect base
1395    // register when interrupted or faulted.
1396    bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1397    if (!Errata602117 &&
1398        ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1399      return false;
1400
1401    MachineBasicBlock::iterator NewBBI = MBBI;
1402    bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1403    bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1404    bool EvenDeadKill = isLd ?
1405      MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1406    bool EvenUndef = MI->getOperand(0).isUndef();
1407    bool OddDeadKill  = isLd ?
1408      MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1409    bool OddUndef = MI->getOperand(1).isUndef();
1410    bool BaseKill = BaseOp.isKill();
1411    bool BaseUndef = BaseOp.isUndef();
1412    bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1413    bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1414    int OffImm = getMemoryOpOffset(MI);
1415    unsigned PredReg = 0;
1416    ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1417
1418    if (OddRegNum > EvenRegNum && OffImm == 0) {
1419      // Ascending register numbers and no offset. It's safe to change it to a
1420      // ldm or stm.
1421      unsigned NewOpc = (isLd)
1422        ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1423        : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1424      if (isLd) {
1425        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1426          .addReg(BaseReg, getKillRegState(BaseKill))
1427          .addImm(Pred).addReg(PredReg)
1428          .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1429          .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1430        ++NumLDRD2LDM;
1431      } else {
1432        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1433          .addReg(BaseReg, getKillRegState(BaseKill))
1434          .addImm(Pred).addReg(PredReg)
1435          .addReg(EvenReg,
1436                  getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1437          .addReg(OddReg,
1438                  getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
1439        ++NumSTRD2STM;
1440      }
1441      NewBBI = std::prev(MBBI);
1442    } else {
1443      // Split into two instructions.
1444      unsigned NewOpc = (isLd)
1445        ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1446        : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1447      // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1448      // so adjust and use t2LDRi12 here for that.
1449      unsigned NewOpc2 = (isLd)
1450        ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1451        : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1452      DebugLoc dl = MBBI->getDebugLoc();
1453      // If this is a load and base register is killed, it may have been
1454      // re-defed by the load, make sure the first load does not clobber it.
1455      if (isLd &&
1456          (BaseKill || OffKill) &&
1457          (TRI->regsOverlap(EvenReg, BaseReg))) {
1458        assert(!TRI->regsOverlap(OddReg, BaseReg));
1459        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1460                      OddReg, OddDeadKill, false,
1461                      BaseReg, false, BaseUndef, false, OffUndef,
1462                      Pred, PredReg, TII, isT2);
1463        NewBBI = std::prev(MBBI);
1464        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1465                      EvenReg, EvenDeadKill, false,
1466                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1467                      Pred, PredReg, TII, isT2);
1468      } else {
1469        if (OddReg == EvenReg && EvenDeadKill) {
1470          // If the two source operands are the same, the kill marker is
1471          // probably on the first one. e.g.
1472          // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1473          EvenDeadKill = false;
1474          OddDeadKill = true;
1475        }
1476        // Never kill the base register in the first instruction.
1477        if (EvenReg == BaseReg)
1478          EvenDeadKill = false;
1479        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1480                      EvenReg, EvenDeadKill, EvenUndef,
1481                      BaseReg, false, BaseUndef, false, OffUndef,
1482                      Pred, PredReg, TII, isT2);
1483        NewBBI = std::prev(MBBI);
1484        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1485                      OddReg, OddDeadKill, OddUndef,
1486                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1487                      Pred, PredReg, TII, isT2);
1488      }
1489      if (isLd)
1490        ++NumLDRD2LDR;
1491      else
1492        ++NumSTRD2STR;
1493    }
1494
1495    MBB.erase(MI);
1496    MBBI = NewBBI;
1497    return true;
1498  }
1499  return false;
1500}
1501
1502/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1503/// ops of the same base and incrementing offset into LDM / STM ops.
1504bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1505  unsigned NumMerges = 0;
1506  unsigned NumMemOps = 0;
1507  MemOpQueue MemOps;
1508  unsigned CurrBase = 0;
1509  int CurrOpc = -1;
1510  unsigned CurrSize = 0;
1511  ARMCC::CondCodes CurrPred = ARMCC::AL;
1512  unsigned CurrPredReg = 0;
1513  unsigned Position = 0;
1514  SmallVector<MachineBasicBlock::iterator,4> Merges;
1515
1516  RS->enterBasicBlock(&MBB);
1517  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1518  while (MBBI != E) {
1519    if (FixInvalidRegPairOp(MBB, MBBI))
1520      continue;
1521
1522    bool Advance  = false;
1523    bool TryMerge = false;
1524    bool Clobber  = false;
1525
1526    bool isMemOp = isMemoryOp(MBBI);
1527    if (isMemOp) {
1528      int Opcode = MBBI->getOpcode();
1529      unsigned Size = getLSMultipleTransferSize(MBBI);
1530      const MachineOperand &MO = MBBI->getOperand(0);
1531      unsigned Reg = MO.getReg();
1532      bool isKill = MO.isDef() ? false : MO.isKill();
1533      unsigned Base = MBBI->getOperand(1).getReg();
1534      unsigned PredReg = 0;
1535      ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1536      int Offset = getMemoryOpOffset(MBBI);
1537      // Watch out for:
1538      // r4 := ldr [r5]
1539      // r5 := ldr [r5, #4]
1540      // r6 := ldr [r5, #8]
1541      //
1542      // The second ldr has effectively broken the chain even though it
1543      // looks like the later ldr(s) use the same base register. Try to
1544      // merge the ldr's so far, including this one. But don't try to
1545      // combine the following ldr(s).
1546      Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1547
1548      // Watch out for:
1549      // r4 := ldr [r0, #8]
1550      // r4 := ldr [r0, #4]
1551      //
1552      // The optimization may reorder the second ldr in front of the first
1553      // ldr, which violates write after write(WAW) dependence. The same as
1554      // str. Try to merge inst(s) already in MemOps.
1555      bool Overlap = false;
1556      for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
1557        if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
1558          Overlap = true;
1559          break;
1560        }
1561      }
1562
1563      if (CurrBase == 0 && !Clobber) {
1564        // Start of a new chain.
1565        CurrBase = Base;
1566        CurrOpc  = Opcode;
1567        CurrSize = Size;
1568        CurrPred = Pred;
1569        CurrPredReg = PredReg;
1570        MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1571        ++NumMemOps;
1572        Advance = true;
1573      } else if (!Overlap) {
1574        if (Clobber) {
1575          TryMerge = true;
1576          Advance = true;
1577        }
1578
1579        if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1580          // No need to match PredReg.
1581          // Continue adding to the queue.
1582          if (Offset > MemOps.back().Offset) {
1583            MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1584                                             Position, MBBI));
1585            ++NumMemOps;
1586            Advance = true;
1587          } else {
1588            for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1589                 I != E; ++I) {
1590              if (Offset < I->Offset) {
1591                MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1592                                                 Position, MBBI));
1593                ++NumMemOps;
1594                Advance = true;
1595                break;
1596              } else if (Offset == I->Offset) {
1597                // Collision! This can't be merged!
1598                break;
1599              }
1600            }
1601          }
1602        }
1603      }
1604    }
1605
1606    if (MBBI->isDebugValue()) {
1607      ++MBBI;
1608      if (MBBI == E)
1609        // Reach the end of the block, try merging the memory instructions.
1610        TryMerge = true;
1611    } else if (Advance) {
1612      ++Position;
1613      ++MBBI;
1614      if (MBBI == E)
1615        // Reach the end of the block, try merging the memory instructions.
1616        TryMerge = true;
1617    } else {
1618      TryMerge = true;
1619    }
1620
1621    if (TryMerge) {
1622      if (NumMemOps > 1) {
1623        // Try to find a free register to use as a new base in case it's needed.
1624        // First advance to the instruction just before the start of the chain.
1625        AdvanceRS(MBB, MemOps);
1626
1627        // Find a scratch register.
1628        unsigned Scratch =
1629          RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
1630
1631        // Process the load / store instructions.
1632        RS->forward(std::prev(MBBI));
1633
1634        // Merge ops.
1635        Merges.clear();
1636        MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1637                     CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1638
1639        // Try folding preceding/trailing base inc/dec into the generated
1640        // LDM/STM ops.
1641        for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1642          if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1643            ++NumMerges;
1644        NumMerges += Merges.size();
1645
1646        // Try folding preceding/trailing base inc/dec into those load/store
1647        // that were not merged to form LDM/STM ops.
1648        for (unsigned i = 0; i != NumMemOps; ++i)
1649          if (!MemOps[i].Merged)
1650            if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1651              ++NumMerges;
1652
1653        // RS may be pointing to an instruction that's deleted.
1654        RS->skipTo(std::prev(MBBI));
1655      } else if (NumMemOps == 1) {
1656        // Try folding preceding/trailing base inc/dec into the single
1657        // load/store.
1658        if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1659          ++NumMerges;
1660          RS->forward(std::prev(MBBI));
1661        }
1662      }
1663
1664      CurrBase = 0;
1665      CurrOpc = -1;
1666      CurrSize = 0;
1667      CurrPred = ARMCC::AL;
1668      CurrPredReg = 0;
1669      if (NumMemOps) {
1670        MemOps.clear();
1671        NumMemOps = 0;
1672      }
1673
1674      // If iterator hasn't been advanced and this is not a memory op, skip it.
1675      // It can't start a new chain anyway.
1676      if (!Advance && !isMemOp && MBBI != E) {
1677        ++Position;
1678        ++MBBI;
1679      }
1680    }
1681  }
1682  return NumMerges > 0;
1683}
1684
1685/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1686/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1687/// directly restore the value of LR into pc.
1688///   ldmfd sp!, {..., lr}
1689///   bx lr
1690/// or
1691///   ldmfd sp!, {..., lr}
1692///   mov pc, lr
1693/// =>
1694///   ldmfd sp!, {..., pc}
1695bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1696  // Thumb1 LDM doesn't allow high registers.
1697  if (isThumb1) return false;
1698  if (MBB.empty()) return false;
1699
1700  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1701  if (MBBI != MBB.begin() &&
1702      (MBBI->getOpcode() == ARM::BX_RET ||
1703       MBBI->getOpcode() == ARM::tBX_RET ||
1704       MBBI->getOpcode() == ARM::MOVPCLR)) {
1705    MachineInstr *PrevMI = std::prev(MBBI);
1706    unsigned Opcode = PrevMI->getOpcode();
1707    if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1708        Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1709        Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1710      MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1711      if (MO.getReg() != ARM::LR)
1712        return false;
1713      unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1714      assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1715              Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1716      PrevMI->setDesc(TII->get(NewOpc));
1717      MO.setReg(ARM::PC);
1718      PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1719      MBB.erase(MBBI);
1720      return true;
1721    }
1722  }
1723  return false;
1724}
1725
1726bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1727  const TargetMachine &TM = Fn.getTarget();
1728  TL = TM.getTargetLowering();
1729  AFI = Fn.getInfo<ARMFunctionInfo>();
1730  TII = TM.getInstrInfo();
1731  TRI = TM.getRegisterInfo();
1732  STI = &TM.getSubtarget<ARMSubtarget>();
1733  RS = new RegScavenger();
1734  isThumb2 = AFI->isThumb2Function();
1735  isThumb1 = AFI->isThumbFunction() && !isThumb2;
1736
1737  bool Modified = false;
1738  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1739       ++MFI) {
1740    MachineBasicBlock &MBB = *MFI;
1741    Modified |= LoadStoreMultipleOpti(MBB);
1742    if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1743      Modified |= MergeReturnIntoLDM(MBB);
1744  }
1745
1746  delete RS;
1747  return Modified;
1748}
1749
1750
1751/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1752/// load / stores from consecutive locations close to make it more
1753/// likely they will be combined later.
1754
1755namespace {
1756  struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1757    static char ID;
1758    ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1759
1760    const DataLayout *TD;
1761    const TargetInstrInfo *TII;
1762    const TargetRegisterInfo *TRI;
1763    const ARMSubtarget *STI;
1764    MachineRegisterInfo *MRI;
1765    MachineFunction *MF;
1766
1767    bool runOnMachineFunction(MachineFunction &Fn) override;
1768
1769    const char *getPassName() const override {
1770      return "ARM pre- register allocation load / store optimization pass";
1771    }
1772
1773  private:
1774    bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1775                          unsigned &NewOpc, unsigned &EvenReg,
1776                          unsigned &OddReg, unsigned &BaseReg,
1777                          int &Offset,
1778                          unsigned &PredReg, ARMCC::CondCodes &Pred,
1779                          bool &isT2);
1780    bool RescheduleOps(MachineBasicBlock *MBB,
1781                       SmallVectorImpl<MachineInstr *> &Ops,
1782                       unsigned Base, bool isLd,
1783                       DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1784    bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1785  };
1786  char ARMPreAllocLoadStoreOpt::ID = 0;
1787}
1788
1789bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1790  TD  = Fn.getTarget().getDataLayout();
1791  TII = Fn.getTarget().getInstrInfo();
1792  TRI = Fn.getTarget().getRegisterInfo();
1793  STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1794  MRI = &Fn.getRegInfo();
1795  MF  = &Fn;
1796
1797  bool Modified = false;
1798  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1799       ++MFI)
1800    Modified |= RescheduleLoadStoreInstrs(MFI);
1801
1802  return Modified;
1803}
1804
1805static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1806                                      MachineBasicBlock::iterator I,
1807                                      MachineBasicBlock::iterator E,
1808                                      SmallPtrSet<MachineInstr*, 4> &MemOps,
1809                                      SmallSet<unsigned, 4> &MemRegs,
1810                                      const TargetRegisterInfo *TRI) {
1811  // Are there stores / loads / calls between them?
1812  // FIXME: This is overly conservative. We should make use of alias information
1813  // some day.
1814  SmallSet<unsigned, 4> AddedRegPressure;
1815  while (++I != E) {
1816    if (I->isDebugValue() || MemOps.count(&*I))
1817      continue;
1818    if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1819      return false;
1820    if (isLd && I->mayStore())
1821      return false;
1822    if (!isLd) {
1823      if (I->mayLoad())
1824        return false;
1825      // It's not safe to move the first 'str' down.
1826      // str r1, [r0]
1827      // strh r5, [r0]
1828      // str r4, [r0, #+4]
1829      if (I->mayStore())
1830        return false;
1831    }
1832    for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1833      MachineOperand &MO = I->getOperand(j);
1834      if (!MO.isReg())
1835        continue;
1836      unsigned Reg = MO.getReg();
1837      if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1838        return false;
1839      if (Reg != Base && !MemRegs.count(Reg))
1840        AddedRegPressure.insert(Reg);
1841    }
1842  }
1843
1844  // Estimate register pressure increase due to the transformation.
1845  if (MemRegs.size() <= 4)
1846    // Ok if we are moving small number of instructions.
1847    return true;
1848  return AddedRegPressure.size() <= MemRegs.size() * 2;
1849}
1850
1851
1852/// Copy Op0 and Op1 operands into a new array assigned to MI.
1853static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1854                                   MachineInstr *Op1) {
1855  assert(MI->memoperands_empty() && "expected a new machineinstr");
1856  size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1857    + (Op1->memoperands_end() - Op1->memoperands_begin());
1858
1859  MachineFunction *MF = MI->getParent()->getParent();
1860  MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1861  MachineSDNode::mmo_iterator MemEnd =
1862    std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1863  MemEnd =
1864    std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1865  MI->setMemRefs(MemBegin, MemEnd);
1866}
1867
1868bool
1869ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1870                                          DebugLoc &dl,
1871                                          unsigned &NewOpc, unsigned &EvenReg,
1872                                          unsigned &OddReg, unsigned &BaseReg,
1873                                          int &Offset, unsigned &PredReg,
1874                                          ARMCC::CondCodes &Pred,
1875                                          bool &isT2) {
1876  // Make sure we're allowed to generate LDRD/STRD.
1877  if (!STI->hasV5TEOps())
1878    return false;
1879
1880  // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1881  unsigned Scale = 1;
1882  unsigned Opcode = Op0->getOpcode();
1883  if (Opcode == ARM::LDRi12) {
1884    NewOpc = ARM::LDRD;
1885  } else if (Opcode == ARM::STRi12) {
1886    NewOpc = ARM::STRD;
1887  } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1888    NewOpc = ARM::t2LDRDi8;
1889    Scale = 4;
1890    isT2 = true;
1891  } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1892    NewOpc = ARM::t2STRDi8;
1893    Scale = 4;
1894    isT2 = true;
1895  } else {
1896    return false;
1897  }
1898
1899  // Make sure the base address satisfies i64 ld / st alignment requirement.
1900  // At the moment, we ignore the memoryoperand's value.
1901  // If we want to use AliasAnalysis, we should check it accordingly.
1902  if (!Op0->hasOneMemOperand() ||
1903      (*Op0->memoperands_begin())->isVolatile())
1904    return false;
1905
1906  unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1907  const Function *Func = MF->getFunction();
1908  unsigned ReqAlign = STI->hasV6Ops()
1909    ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1910    : 8;  // Pre-v6 need 8-byte align
1911  if (Align < ReqAlign)
1912    return false;
1913
1914  // Then make sure the immediate offset fits.
1915  int OffImm = getMemoryOpOffset(Op0);
1916  if (isT2) {
1917    int Limit = (1 << 8) * Scale;
1918    if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1919      return false;
1920    Offset = OffImm;
1921  } else {
1922    ARM_AM::AddrOpc AddSub = ARM_AM::add;
1923    if (OffImm < 0) {
1924      AddSub = ARM_AM::sub;
1925      OffImm = - OffImm;
1926    }
1927    int Limit = (1 << 8) * Scale;
1928    if (OffImm >= Limit || (OffImm & (Scale-1)))
1929      return false;
1930    Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1931  }
1932  EvenReg = Op0->getOperand(0).getReg();
1933  OddReg  = Op1->getOperand(0).getReg();
1934  if (EvenReg == OddReg)
1935    return false;
1936  BaseReg = Op0->getOperand(1).getReg();
1937  Pred = getInstrPredicate(Op0, PredReg);
1938  dl = Op0->getDebugLoc();
1939  return true;
1940}
1941
1942bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1943                                 SmallVectorImpl<MachineInstr *> &Ops,
1944                                 unsigned Base, bool isLd,
1945                                 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1946  bool RetVal = false;
1947
1948  // Sort by offset (in reverse order).
1949  std::sort(Ops.begin(), Ops.end(),
1950            [](const MachineInstr *LHS, const MachineInstr *RHS) {
1951    int LOffset = getMemoryOpOffset(LHS);
1952    int ROffset = getMemoryOpOffset(RHS);
1953    assert(LHS == RHS || LOffset != ROffset);
1954    return LOffset > ROffset;
1955  });
1956
1957  // The loads / stores of the same base are in order. Scan them from first to
1958  // last and check for the following:
1959  // 1. Any def of base.
1960  // 2. Any gaps.
1961  while (Ops.size() > 1) {
1962    unsigned FirstLoc = ~0U;
1963    unsigned LastLoc = 0;
1964    MachineInstr *FirstOp = nullptr;
1965    MachineInstr *LastOp = nullptr;
1966    int LastOffset = 0;
1967    unsigned LastOpcode = 0;
1968    unsigned LastBytes = 0;
1969    unsigned NumMove = 0;
1970    for (int i = Ops.size() - 1; i >= 0; --i) {
1971      MachineInstr *Op = Ops[i];
1972      unsigned Loc = MI2LocMap[Op];
1973      if (Loc <= FirstLoc) {
1974        FirstLoc = Loc;
1975        FirstOp = Op;
1976      }
1977      if (Loc >= LastLoc) {
1978        LastLoc = Loc;
1979        LastOp = Op;
1980      }
1981
1982      unsigned LSMOpcode
1983        = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
1984      if (LastOpcode && LSMOpcode != LastOpcode)
1985        break;
1986
1987      int Offset = getMemoryOpOffset(Op);
1988      unsigned Bytes = getLSMultipleTransferSize(Op);
1989      if (LastBytes) {
1990        if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1991          break;
1992      }
1993      LastOffset = Offset;
1994      LastBytes = Bytes;
1995      LastOpcode = LSMOpcode;
1996      if (++NumMove == 8) // FIXME: Tune this limit.
1997        break;
1998    }
1999
2000    if (NumMove <= 1)
2001      Ops.pop_back();
2002    else {
2003      SmallPtrSet<MachineInstr*, 4> MemOps;
2004      SmallSet<unsigned, 4> MemRegs;
2005      for (int i = NumMove-1; i >= 0; --i) {
2006        MemOps.insert(Ops[i]);
2007        MemRegs.insert(Ops[i]->getOperand(0).getReg());
2008      }
2009
2010      // Be conservative, if the instructions are too far apart, don't
2011      // move them. We want to limit the increase of register pressure.
2012      bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2013      if (DoMove)
2014        DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2015                                           MemOps, MemRegs, TRI);
2016      if (!DoMove) {
2017        for (unsigned i = 0; i != NumMove; ++i)
2018          Ops.pop_back();
2019      } else {
2020        // This is the new location for the loads / stores.
2021        MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2022        while (InsertPos != MBB->end()
2023               && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2024          ++InsertPos;
2025
2026        // If we are moving a pair of loads / stores, see if it makes sense
2027        // to try to allocate a pair of registers that can form register pairs.
2028        MachineInstr *Op0 = Ops.back();
2029        MachineInstr *Op1 = Ops[Ops.size()-2];
2030        unsigned EvenReg = 0, OddReg = 0;
2031        unsigned BaseReg = 0, PredReg = 0;
2032        ARMCC::CondCodes Pred = ARMCC::AL;
2033        bool isT2 = false;
2034        unsigned NewOpc = 0;
2035        int Offset = 0;
2036        DebugLoc dl;
2037        if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2038                                             EvenReg, OddReg, BaseReg,
2039                                             Offset, PredReg, Pred, isT2)) {
2040          Ops.pop_back();
2041          Ops.pop_back();
2042
2043          const MCInstrDesc &MCID = TII->get(NewOpc);
2044          const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2045          MRI->constrainRegClass(EvenReg, TRC);
2046          MRI->constrainRegClass(OddReg, TRC);
2047
2048          // Form the pair instruction.
2049          if (isLd) {
2050            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2051              .addReg(EvenReg, RegState::Define)
2052              .addReg(OddReg, RegState::Define)
2053              .addReg(BaseReg);
2054            // FIXME: We're converting from LDRi12 to an insn that still
2055            // uses addrmode2, so we need an explicit offset reg. It should
2056            // always by reg0 since we're transforming LDRi12s.
2057            if (!isT2)
2058              MIB.addReg(0);
2059            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2060            concatenateMemOperands(MIB, Op0, Op1);
2061            DEBUG(dbgs() << "Formed " << *MIB << "\n");
2062            ++NumLDRDFormed;
2063          } else {
2064            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2065              .addReg(EvenReg)
2066              .addReg(OddReg)
2067              .addReg(BaseReg);
2068            // FIXME: We're converting from LDRi12 to an insn that still
2069            // uses addrmode2, so we need an explicit offset reg. It should
2070            // always by reg0 since we're transforming STRi12s.
2071            if (!isT2)
2072              MIB.addReg(0);
2073            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2074            concatenateMemOperands(MIB, Op0, Op1);
2075            DEBUG(dbgs() << "Formed " << *MIB << "\n");
2076            ++NumSTRDFormed;
2077          }
2078          MBB->erase(Op0);
2079          MBB->erase(Op1);
2080
2081          // Add register allocation hints to form register pairs.
2082          MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
2083          MRI->setRegAllocationHint(OddReg,  ARMRI::RegPairOdd, EvenReg);
2084        } else {
2085          for (unsigned i = 0; i != NumMove; ++i) {
2086            MachineInstr *Op = Ops.back();
2087            Ops.pop_back();
2088            MBB->splice(InsertPos, MBB, Op);
2089          }
2090        }
2091
2092        NumLdStMoved += NumMove;
2093        RetVal = true;
2094      }
2095    }
2096  }
2097
2098  return RetVal;
2099}
2100
2101bool
2102ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2103  bool RetVal = false;
2104
2105  DenseMap<MachineInstr*, unsigned> MI2LocMap;
2106  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2107  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2108  SmallVector<unsigned, 4> LdBases;
2109  SmallVector<unsigned, 4> StBases;
2110
2111  unsigned Loc = 0;
2112  MachineBasicBlock::iterator MBBI = MBB->begin();
2113  MachineBasicBlock::iterator E = MBB->end();
2114  while (MBBI != E) {
2115    for (; MBBI != E; ++MBBI) {
2116      MachineInstr *MI = MBBI;
2117      if (MI->isCall() || MI->isTerminator()) {
2118        // Stop at barriers.
2119        ++MBBI;
2120        break;
2121      }
2122
2123      if (!MI->isDebugValue())
2124        MI2LocMap[MI] = ++Loc;
2125
2126      if (!isMemoryOp(MI))
2127        continue;
2128      unsigned PredReg = 0;
2129      if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2130        continue;
2131
2132      int Opc = MI->getOpcode();
2133      bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
2134      unsigned Base = MI->getOperand(1).getReg();
2135      int Offset = getMemoryOpOffset(MI);
2136
2137      bool StopHere = false;
2138      if (isLd) {
2139        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2140          Base2LdsMap.find(Base);
2141        if (BI != Base2LdsMap.end()) {
2142          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2143            if (Offset == getMemoryOpOffset(BI->second[i])) {
2144              StopHere = true;
2145              break;
2146            }
2147          }
2148          if (!StopHere)
2149            BI->second.push_back(MI);
2150        } else {
2151          Base2LdsMap[Base].push_back(MI);
2152          LdBases.push_back(Base);
2153        }
2154      } else {
2155        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2156          Base2StsMap.find(Base);
2157        if (BI != Base2StsMap.end()) {
2158          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2159            if (Offset == getMemoryOpOffset(BI->second[i])) {
2160              StopHere = true;
2161              break;
2162            }
2163          }
2164          if (!StopHere)
2165            BI->second.push_back(MI);
2166        } else {
2167          Base2StsMap[Base].push_back(MI);
2168          StBases.push_back(Base);
2169        }
2170      }
2171
2172      if (StopHere) {
2173        // Found a duplicate (a base+offset combination that's seen earlier).
2174        // Backtrack.
2175        --Loc;
2176        break;
2177      }
2178    }
2179
2180    // Re-schedule loads.
2181    for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2182      unsigned Base = LdBases[i];
2183      SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2184      if (Lds.size() > 1)
2185        RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2186    }
2187
2188    // Re-schedule stores.
2189    for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2190      unsigned Base = StBases[i];
2191      SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2192      if (Sts.size() > 1)
2193        RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2194    }
2195
2196    if (MBBI != E) {
2197      Base2LdsMap.clear();
2198      Base2StsMap.clear();
2199      LdBases.clear();
2200      StBases.clear();
2201    }
2202  }
2203
2204  return RetVal;
2205}
2206
2207
2208/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
2209/// optimization pass.
2210FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2211  if (PreAlloc)
2212    return new ARMPreAllocLoadStoreOpt();
2213  return new ARMLoadStoreOpt();
2214}
2215